1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "common_nvswitch.h"
25 #include "intr_nvswitch.h"
26 #include "regkey_nvswitch.h"
27 #include "soe/soe_nvswitch.h"
28 #include "cci/cci_nvswitch.h"
29
30 #include "ls10/ls10.h"
31 #include "ls10/minion_ls10.h"
32 #include "ls10/soe_ls10.h"
33
34 #include "nvswitch/ls10/dev_ctrl_ip.h"
35 #include "nvswitch/ls10/dev_pri_masterstation_ip.h"
36 #include "nvswitch/ls10/dev_pri_hub_sys_ip.h"
37 #include "nvswitch/ls10/dev_pri_hub_sysb_ip.h"
38 #include "nvswitch/ls10/dev_pri_hub_prt_ip.h"
39
40 #include "nvswitch/ls10/dev_npg_ip.h"
41 #include "nvswitch/ls10/dev_nport_ip.h"
42 #include "nvswitch/ls10/dev_route_ip.h"
43 #include "nvswitch/ls10/dev_ingress_ip.h"
44 #include "nvswitch/ls10/dev_sourcetrack_ip.h"
45 #include "nvswitch/ls10/dev_egress_ip.h"
46 #include "nvswitch/ls10/dev_tstate_ip.h"
47 #include "nvswitch/ls10/dev_multicasttstate_ip.h"
48 #include "nvswitch/ls10/dev_reductiontstate_ip.h"
49
50 #include "nvswitch/ls10/dev_nvlw_ip.h"
51 #include "nvswitch/ls10/dev_minion_ip.h"
52 #include "nvswitch/ls10/dev_minion_ip_addendum.h"
53 #include "nvswitch/ls10/dev_cpr_ip.h"
54 #include "nvswitch/ls10/dev_nvlipt_ip.h"
55 #include "nvswitch/ls10/dev_nvlipt_lnk_ip.h"
56 #include "nvswitch/ls10/dev_nvltlc_ip.h"
57 #include "nvswitch/ls10/dev_nvldl_ip.h"
58
59 #include "nvswitch/ls10/dev_nxbar_tcp_global_ip.h"
60 #include "nvswitch/ls10/dev_nxbar_tile_ip.h"
61 #include "nvswitch/ls10/dev_nxbar_tileout_ip.h"
62
63 #include "nvswitch/ls10/dev_ctrl_ip_addendum.h"
64
65 static void _nvswitch_create_deferred_link_errors_task_ls10(nvswitch_device *device, NvU32 nvlipt_instance, NvU32 link);
66
67 static void
_nvswitch_construct_ecc_error_event_ls10(INFOROM_NVS_ECC_ERROR_EVENT * err_event,NvU32 sxid,NvU32 linkId,NvBool bAddressValid,NvU32 address,NvBool bUncErr,NvU32 errorCount)68 _nvswitch_construct_ecc_error_event_ls10
69 (
70 INFOROM_NVS_ECC_ERROR_EVENT *err_event,
71 NvU32 sxid,
72 NvU32 linkId,
73 NvBool bAddressValid,
74 NvU32 address,
75 NvBool bUncErr,
76 NvU32 errorCount
77 )
78 {
79 err_event->sxid = sxid;
80 err_event->linkId = linkId;
81 err_event->bAddressValid = bAddressValid;
82 err_event->address = address;
83 err_event->bUncErr = bUncErr;
84 err_event->errorCount = errorCount;
85 }
86
87 static void
_nvswitch_initialize_minion_interrupts(nvswitch_device * device,NvU32 instance)88 _nvswitch_initialize_minion_interrupts
89 (
90 nvswitch_device *device,
91 NvU32 instance
92 )
93 {
94 NvU32 intrEn, localDiscoveredLinks, globalLink, i;
95 localDiscoveredLinks = 0;
96
97 // Tree 1 (non-stall) is disabled until there is a need
98 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _MINION_INTR_NONSTALL_EN, 0);
99
100 // Tree 0 (stall) is where we route _all_ MINION interrupts for now
101 intrEn = DRF_DEF(_MINION, _MINION_INTR_STALL_EN, _FATAL, _ENABLE) |
102 DRF_DEF(_MINION, _MINION_INTR_STALL_EN, _NONFATAL, _ENABLE) |
103 DRF_DEF(_MINION, _MINION_INTR_STALL_EN, _FALCON_STALL, _ENABLE) |
104 DRF_DEF(_MINION, _MINION_INTR_STALL_EN, _FALCON_NOSTALL, _DISABLE);
105
106 for (i = 0; i < NVSWITCH_LINKS_PER_MINION_LS10; ++i)
107 {
108 // get the global link number of the link we are iterating over
109 globalLink = (instance * NVSWITCH_LINKS_PER_MINION_LS10) + i;
110
111 // the link is valid place bit in link mask
112 if (device->link[globalLink].valid)
113 {
114 localDiscoveredLinks |= NVBIT(i);
115 }
116 }
117
118 intrEn = FLD_SET_DRF_NUM(_MINION, _MINION_INTR_STALL_EN, _LINK,
119 localDiscoveredLinks, intrEn);
120
121 {
122 // Disable interrupts only if explicitly requested to. Default to enable.
123 if (device->regkeys.minion_intr != NV_SWITCH_REGKEY_MINION_INTERRUPTS_DISABLE)
124 {
125 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _MINION_INTR_STALL_EN, intrEn);
126 }
127 }
128 }
129
130 static void
_nvswitch_initialize_nvlipt_interrupts_ls10(nvswitch_device * device)131 _nvswitch_initialize_nvlipt_interrupts_ls10
132 (
133 nvswitch_device *device
134 )
135 {
136 NvU32 i;
137 NvU32 regval = 0;
138
139 //
140 // NVLipt interrupt routing (NVLIPT_COMMON, NVLIPT_LNK, NVLDL, NVLTLC)
141 // will be initialized by MINION NVLPROD flow
142 //
143 // We must enable interrupts at the top levels in NVLW, NVLIPT_COMMON,
144 // NVLIPT_LNK and MINION
145 //
146
147 // NVLW
148 regval = DRF_NUM(_NVLW_COMMON, _INTR_0_MASK, _FATAL, 0x1) |
149 DRF_NUM(_NVLW_COMMON, _INTR_0_MASK, _NONFATAL, 0x0) |
150 DRF_NUM(_NVLW_COMMON, _INTR_0_MASK, _CORRECTABLE, 0x0) |
151 DRF_NUM(_NVLW_COMMON, _INTR_0_MASK, _INTR0, 0x1) |
152 DRF_NUM(_NVLW_COMMON, _INTR_0_MASK, _INTR1, 0x0);
153 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_COMMON, _INTR_0_MASK, regval);
154
155 regval = DRF_NUM(_NVLW_COMMON, _INTR_1_MASK, _FATAL, 0x0) |
156 DRF_NUM(_NVLW_COMMON, _INTR_1_MASK, _NONFATAL, 0x1) |
157 DRF_NUM(_NVLW_COMMON, _INTR_1_MASK, _CORRECTABLE, 0x1) |
158 DRF_NUM(_NVLW_COMMON, _INTR_1_MASK, _INTR0, 0x0) |
159 DRF_NUM(_NVLW_COMMON, _INTR_1_MASK, _INTR1, 0x1);
160 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_COMMON, _INTR_1_MASK, regval);
161
162 regval = DRF_NUM(_NVLW_COMMON, _INTR_2_MASK, _FATAL, 0x0) |
163 DRF_NUM(_NVLW_COMMON, _INTR_2_MASK, _NONFATAL, 0x0) |
164 DRF_NUM(_NVLW_COMMON, _INTR_2_MASK, _CORRECTABLE, 0x0) |
165 DRF_NUM(_NVLW_COMMON, _INTR_2_MASK, _INTR0, 0x0) |
166 DRF_NUM(_NVLW_COMMON, _INTR_2_MASK, _INTR1, 0x0);
167 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_COMMON, _INTR_2_MASK, regval);
168
169 // NVLW link
170 for (i = 0; i < NV_NVLW_LINK_INTR_0_MASK__SIZE_1; i++)
171 {
172 regval = DRF_NUM(_NVLW_LINK, _INTR_0_MASK, _FATAL, 0x1) |
173 DRF_NUM(_NVLW_LINK, _INTR_0_MASK, _NONFATAL, 0x0) |
174 DRF_NUM(_NVLW_LINK, _INTR_0_MASK, _CORRECTABLE, 0x0) |
175 DRF_NUM(_NVLW_LINK, _INTR_0_MASK, _INTR0, 0x1) |
176 DRF_NUM(_NVLW_LINK, _INTR_0_MASK, _INTR1, 0x0);
177 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_LINK, _INTR_0_MASK(i), regval);
178
179 regval = DRF_NUM(_NVLW_LINK, _INTR_1_MASK, _FATAL, 0x0) |
180 DRF_NUM(_NVLW_LINK, _INTR_1_MASK, _NONFATAL, 0x1) |
181 DRF_NUM(_NVLW_LINK, _INTR_1_MASK, _CORRECTABLE, 0x1) |
182 DRF_NUM(_NVLW_LINK, _INTR_1_MASK, _INTR0, 0x0) |
183 DRF_NUM(_NVLW_LINK, _INTR_1_MASK, _INTR1, 0x1);
184 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_LINK, _INTR_1_MASK(i), regval);
185
186 regval = DRF_NUM(_NVLW_LINK, _INTR_2_MASK, _FATAL, 0x0) |
187 DRF_NUM(_NVLW_LINK, _INTR_2_MASK, _NONFATAL, 0x0) |
188 DRF_NUM(_NVLW_LINK, _INTR_2_MASK, _CORRECTABLE, 0x0) |
189 DRF_NUM(_NVLW_LINK, _INTR_2_MASK, _INTR0, 0x0) |
190 DRF_NUM(_NVLW_LINK, _INTR_2_MASK, _INTR1, 0x0);
191 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_LINK, _INTR_2_MASK(i), regval);
192 }
193
194 // NVLIPT_COMMON
195 regval = DRF_NUM(_NVLIPT_COMMON, _INTR_CONTROL_COMMON, _INT0_EN, 0x1) |
196 DRF_NUM(_NVLIPT_COMMON, _INTR_CONTROL_COMMON, _INT1_EN, 0x1);
197
198 NVSWITCH_BCAST_WR32_LS10(device, NVLIPT, _NVLIPT_COMMON, _INTR_CONTROL_COMMON, regval);
199
200 // NVLIPT_LNK
201 regval = DRF_NUM(_NVLIPT_LNK, _INTR_CONTROL_LINK, _INT0_EN, 0x1) |
202 DRF_NUM(_NVLIPT_LNK, _INTR_CONTROL_LINK, _INT1_EN, 0x1);
203 NVSWITCH_BCAST_WR32_LS10(device, NVLIPT_LNK, _NVLIPT_LNK, _INTR_CONTROL_LINK, regval);
204
205 // NVLIPT_LNK_INTR_1
206 regval = DRF_NUM(_NVLIPT_LNK, _INTR_INT1_EN, _LINKSTATEREQUESTREADYSET, 0x1);
207 NVSWITCH_BCAST_WR32_LS10(device, NVLIPT_LNK, _NVLIPT_LNK, _INTR_INT1_EN, regval);
208
209 // MINION
210 for (i = 0; i < NUM_MINION_ENGINE_LS10; ++i)
211 {
212 if (!NVSWITCH_ENG_VALID_LS10(device, MINION, i))
213 {
214 continue;
215 }
216
217 _nvswitch_initialize_minion_interrupts(device,i);
218 }
219
220 // CPR
221
222 regval = NVSWITCH_ENG_RD32(device, CPR, _BCAST, 0, _CPR_SYS, _ERR_LOG_EN_0);
223 regval = FLD_SET_DRF(_CPR_SYS, _ERR_LOG_EN_0, _ENGINE_RESET_ERR, __PROD, regval);
224 NVSWITCH_ENG_WR32(device, CPR, _BCAST, 0, _CPR_SYS, _ERR_LOG_EN_0, regval);
225
226 regval = DRF_DEF(_CPR_SYS, _NVLW_INTR_0_MASK, _CPR_INTR, _ENABLE) |
227 DRF_DEF(_CPR_SYS, _NVLW_INTR_0_MASK, _INTR0, _ENABLE);
228 NVSWITCH_ENG_WR32(device, CPR, _BCAST, 0, _CPR_SYS, _NVLW_INTR_0_MASK, regval);
229
230 regval = DRF_DEF(_CPR_SYS, _NVLW_INTR_1_MASK, _CPR_INTR, _DISABLE) |
231 DRF_DEF(_CPR_SYS, _NVLW_INTR_1_MASK, _INTR1, _ENABLE);
232 NVSWITCH_ENG_WR32(device, CPR, _BCAST, 0, _CPR_SYS, _NVLW_INTR_1_MASK, regval);
233
234 regval = DRF_DEF(_CPR_SYS, _NVLW_INTR_2_MASK, _CPR_INTR, _DISABLE) |
235 DRF_DEF(_CPR_SYS, _NVLW_INTR_2_MASK, _INTR2, _ENABLE);
236 NVSWITCH_ENG_WR32(device, CPR, _BCAST, 0, _CPR_SYS, _NVLW_INTR_2_MASK, regval);
237 }
238
239 static void
_nvswitch_initialize_route_interrupts(nvswitch_device * device)240 _nvswitch_initialize_route_interrupts
241 (
242 nvswitch_device *device
243 )
244 {
245 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
246
247 chip_device->intr_mask.route.fatal =
248 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _ROUTEBUFERR, _ENABLE) |
249 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _GLT_ECC_DBE_ERR, _ENABLE) |
250 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _PDCTRLPARERR, _ENABLE) |
251 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _NVS_ECC_DBE_ERR, _ENABLE) |
252 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _CDTPARERR, _ENABLE) |
253 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _MCRID_ECC_DBE_ERR, _ENABLE) |
254 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _EXTMCRID_ECC_DBE_ERR, _ENABLE) |
255 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _RAM_ECC_DBE_ERR, _ENABLE);
256
257 chip_device->intr_mask.route.nonfatal =
258 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _NOPORTDEFINEDERR, _ENABLE) |
259 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _INVALIDROUTEPOLICYERR, _ENABLE) |
260 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _GLT_ECC_LIMIT_ERR, _ENABLE) |
261 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _NVS_ECC_LIMIT_ERR, _ENABLE) |
262 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _MCRID_ECC_LIMIT_ERR, _ENABLE) |
263 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _EXTMCRID_ECC_LIMIT_ERR, _ENABLE) |
264 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _RAM_ECC_LIMIT_ERR, _ENABLE) |
265 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _INVALID_MCRID_ERR, _ENABLE);
266 // NOTE: _MC_TRIGGER_ERR is debug-use only
267 }
268
269 static void
_nvswitch_initialize_ingress_interrupts(nvswitch_device * device)270 _nvswitch_initialize_ingress_interrupts
271 (
272 nvswitch_device *device
273 )
274 {
275 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
276
277 chip_device->intr_mask.ingress[0].fatal =
278 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _CMDDECODEERR, _ENABLE) |
279 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _EXTAREMAPTAB_ECC_DBE_ERR, _ENABLE) |
280 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _NCISOC_HDR_ECC_DBE_ERR, _ENABLE) |
281 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _INVALIDVCSET, _ENABLE) |
282 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _REMAPTAB_ECC_DBE_ERR, _ENABLE) |
283 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _RIDTAB_ECC_DBE_ERR, _ENABLE) |
284 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _RLANTAB_ECC_DBE_ERR, _ENABLE) |
285 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _NCISOC_PARITY_ERR, _ENABLE) |
286 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _EXTBREMAPTAB_ECC_DBE_ERR, _ENABLE) |
287 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _MCREMAPTAB_ECC_DBE_ERR, _ENABLE);
288
289 chip_device->intr_mask.ingress[0].nonfatal =
290 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _REQCONTEXTMISMATCHERR, _ENABLE) |
291 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _ACLFAIL, _ENABLE) |
292 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _NCISOC_HDR_ECC_LIMIT_ERR, _ENABLE) |
293 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _ADDRBOUNDSERR, _ENABLE) |
294 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RIDTABCFGERR, _ENABLE) |
295 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RLANTABCFGERR, _ENABLE) |
296 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _REMAPTAB_ECC_LIMIT_ERR, _ENABLE) |
297 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RIDTAB_ECC_LIMIT_ERR, _ENABLE) |
298 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RLANTAB_ECC_LIMIT_ERR, _ENABLE) |
299 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _ADDRTYPEERR, _ENABLE) |
300 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTAREMAPTAB_INDEX_ERR, _ENABLE) |
301 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTBREMAPTAB_INDEX_ERR, _ENABLE) |
302 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _MCREMAPTAB_INDEX_ERR, _ENABLE) |
303 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTAREMAPTAB_REQCONTEXTMISMATCHERR, _ENABLE) |
304 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTBREMAPTAB_REQCONTEXTMISMATCHERR, _ENABLE) |
305 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _MCREMAPTAB_REQCONTEXTMISMATCHERR, _ENABLE) |
306 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTAREMAPTAB_ACLFAIL, _ENABLE) |
307 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTBREMAPTAB_ACLFAIL, _ENABLE) |
308 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _MCREMAPTAB_ACLFAIL, _ENABLE) |
309 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTAREMAPTAB_ADDRBOUNDSERR, _ENABLE) |
310 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTBREMAPTAB_ADDRBOUNDSERR, _ENABLE) |
311 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _MCREMAPTAB_ADDRBOUNDSERR, _ENABLE);
312
313 chip_device->intr_mask.ingress[1].fatal = 0;
314
315 chip_device->intr_mask.ingress[1].nonfatal =
316 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _EXTAREMAPTAB_ECC_LIMIT_ERR, _ENABLE) |
317 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _EXTBREMAPTAB_ECC_LIMIT_ERR, _ENABLE) |
318 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCREMAPTAB_ECC_LIMIT_ERR, _ENABLE) |
319 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCCMDTOUCADDRERR, _ENABLE) |
320 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _READMCREFLECTMEMERR, _ENABLE) |
321 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _EXTAREMAPTAB_ADDRTYPEERR, _ENABLE) |
322 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _EXTBREMAPTAB_ADDRTYPEERR, _ENABLE) |
323 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCREMAPTAB_ADDRTYPEERR, _ENABLE);
324 }
325
326 static void
_nvswitch_initialize_egress_interrupts(nvswitch_device * device)327 _nvswitch_initialize_egress_interrupts
328 (
329 nvswitch_device *device
330 )
331 {
332 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
333
334 chip_device->intr_mask.egress[0].fatal =
335 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _EGRESSBUFERR, _ENABLE) |
336 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _PKTROUTEERR, _ENABLE) |
337 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _SEQIDERR, _ENABLE) |
338 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NXBAR_HDR_ECC_DBE_ERR, _ENABLE) |
339 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _RAM_OUT_HDR_ECC_DBE_ERR, _ENABLE) |
340 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NCISOCCREDITOVFL, _ENABLE) |
341 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _REQTGTIDMISMATCHERR, _ENABLE) |
342 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _RSPREQIDMISMATCHERR, _ENABLE) |
343 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NXBAR_HDR_PARITY_ERR, _ENABLE) |
344 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NCISOC_CREDIT_PARITY_ERR, _ENABLE) |
345 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NXBAR_FLITTYPE_MISMATCH_ERR, _ENABLE) |
346 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _CREDIT_TIME_OUT_ERR, _ENABLE) |
347 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _INVALIDVCSET_ERR, _ENABLE) |
348 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NXBAR_SIDEBAND_PD_PARITY_ERR, _ENABLE) |
349 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _URRSPERR, _ENABLE) |
350 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _HWRSPERR, _ENABLE);
351
352 chip_device->intr_mask.egress[0].nonfatal =
353 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_0, _NXBAR_HDR_ECC_LIMIT_ERR, _ENABLE) |
354 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RAM_OUT_HDR_ECC_LIMIT_ERR, _ENABLE) |
355 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_0, _PRIVRSPERR, _ENABLE) |
356 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RFU, _DISABLE);
357
358 chip_device->intr_mask.egress[1].fatal =
359
360 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_1, _MCRSPCTRLSTORE_ECC_DBE_ERR, _ENABLE) |
361 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_1, _RBCTRLSTORE_ECC_DBE_ERR, _ENABLE) |
362 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_1, _MCREDSGT_ECC_DBE_ERR, _ENABLE) |
363 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_1, _MCRSP_RAM_HDR_ECC_DBE_ERR, _ENABLE);
364
365 chip_device->intr_mask.egress[1].nonfatal =
366 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, _ENABLE) |
367 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCRSPCTRLSTORE_ECC_LIMIT_ERR, _ENABLE) |
368 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _RBCTRLSTORE_ECC_LIMIT_ERR, _ENABLE) |
369 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCREDSGT_ECC_LIMIT_ERR, _ENABLE) |
370 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCREDBUF_ECC_LIMIT_ERR, _ENABLE) |
371 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCRSP_RAM_HDR_ECC_LIMIT_ERR, _ENABLE) |
372 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _NXBAR_REDUCTION_HDR_ECC_DBE_ERR, _ENABLE) |
373 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _NXBAR_REDUCTION_HDR_PARITY_ERR, _ENABLE) |
374 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _NXBAR_REDUCTION_FLITTYPE_MISMATCH_ERR, _ENABLE) |
375 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCREDBUF_ECC_DBE_ERR, _ENABLE) |
376 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCRSP_CNT_ERR, _ENABLE) |
377 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _RBRSP_CNT_ERR, _ENABLE);
378 }
379
380 static void
_nvswitch_initialize_tstate_interrupts(nvswitch_device * device)381 _nvswitch_initialize_tstate_interrupts
382 (
383 nvswitch_device *device
384 )
385 {
386 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
387
388 chip_device->intr_mask.tstate.fatal =
389 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _TAGPOOLBUFERR, _ENABLE) |
390 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _TAGPOOL_ECC_DBE_ERR, _ENABLE) |
391 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTOREBUFERR, _ENABLE) |
392 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_DBE_ERR, _ENABLE) |
393 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _ATO_ERR, _ENABLE) |
394 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _CAMRSP_ERR, _ENABLE);
395
396 chip_device->intr_mask.tstate.nonfatal =
397 DRF_DEF(_TSTATE, _ERR_NON_FATAL_REPORT_EN_0, _TAGPOOL_ECC_LIMIT_ERR, _ENABLE) |
398 DRF_DEF(_TSTATE, _ERR_NON_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_LIMIT_ERR, _ENABLE);
399 }
400
401 static void
_nvswitch_initialize_sourcetrack_interrupts(nvswitch_device * device)402 _nvswitch_initialize_sourcetrack_interrupts
403 (
404 nvswitch_device *device
405 )
406 {
407 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
408
409 chip_device->intr_mask.sourcetrack.fatal =
410 DRF_DEF(_SOURCETRACK, _ERR_FATAL_REPORT_EN_0, _CREQ_TCEN0_CRUMBSTORE_ECC_DBE_ERR, _ENABLE) |
411 DRF_DEF(_SOURCETRACK, _ERR_FATAL_REPORT_EN_0, _DUP_CREQ_TCEN0_TAG_ERR, _ENABLE) |
412 DRF_DEF(_SOURCETRACK, _ERR_FATAL_REPORT_EN_0, _INVALID_TCEN0_RSP_ERR, _ENABLE) |
413 DRF_DEF(_SOURCETRACK, _ERR_FATAL_REPORT_EN_0, _INVALID_TCEN1_RSP_ERR, _ENABLE) |
414 DRF_DEF(_SOURCETRACK, _ERR_FATAL_REPORT_EN_0, _SOURCETRACK_TIME_OUT_ERR, _ENABLE);
415
416 chip_device->intr_mask.sourcetrack.nonfatal =
417 DRF_DEF(_SOURCETRACK, _ERR_NON_FATAL_REPORT_EN_0, _CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, _ENABLE);
418 }
419
420 static void
_nvswitch_initialize_multicast_tstate_interrupts(nvswitch_device * device)421 _nvswitch_initialize_multicast_tstate_interrupts
422 (
423 nvswitch_device *device
424 )
425 {
426 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
427
428 chip_device->intr_mask.mc_tstate.fatal =
429 DRF_DEF(_MULTICASTTSTATE, _ERR_FATAL_REPORT_EN_0, _TAGPOOL_ECC_DBE_ERR, _ENABLE) |
430 DRF_DEF(_MULTICASTTSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTORE_BUF_OVERWRITE_ERR, _ENABLE) |
431 DRF_DEF(_MULTICASTTSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_DBE_ERR, _ENABLE);
432
433 chip_device->intr_mask.mc_tstate.nonfatal =
434 DRF_DEF(_MULTICASTTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _TAGPOOL_ECC_LIMIT_ERR, _ENABLE) |
435 DRF_DEF(_MULTICASTTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_LIMIT_ERR, _ENABLE) |
436 DRF_DEF(_MULTICASTTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _CRUMBSTORE_MCTO_ERR, _ENABLE);
437 }
438
439 static void
_nvswitch_initialize_reduction_tstate_interrupts(nvswitch_device * device)440 _nvswitch_initialize_reduction_tstate_interrupts
441 (
442 nvswitch_device *device
443 )
444 {
445 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
446
447 chip_device->intr_mask.red_tstate.fatal =
448 DRF_DEF(_REDUCTIONTSTATE, _ERR_FATAL_REPORT_EN_0, _TAGPOOL_ECC_DBE_ERR, _ENABLE) |
449 DRF_DEF(_REDUCTIONTSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTORE_BUF_OVERWRITE_ERR, _ENABLE) |
450 DRF_DEF(_REDUCTIONTSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_DBE_ERR, _ENABLE);
451
452 chip_device->intr_mask.red_tstate.nonfatal =
453 DRF_DEF(_REDUCTIONTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _TAGPOOL_ECC_LIMIT_ERR, _ENABLE) |
454 DRF_DEF(_REDUCTIONTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_LIMIT_ERR, _ENABLE) |
455 DRF_DEF(_REDUCTIONTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _CRUMBSTORE_RTO_ERR, _ENABLE);
456 }
457
458 void
_nvswitch_initialize_nport_interrupts_ls10(nvswitch_device * device)459 _nvswitch_initialize_nport_interrupts_ls10
460 (
461 nvswitch_device *device
462 )
463 {
464 // Moving this L2 register access to SOE. Refer bug #3747687
465 #if 0
466 NvU32 val;
467
468 val =
469 DRF_NUM(_NPORT, _ERR_CONTROL_COMMON_NPORT, _CORRECTABLEENABLE, 1) |
470 DRF_NUM(_NPORT, _ERR_CONTROL_COMMON_NPORT, _FATALENABLE, 1) |
471 DRF_NUM(_NPORT, _ERR_CONTROL_COMMON_NPORT, _NONFATALENABLE, 1);
472 NVSWITCH_NPORT_BCAST_WR32_LS10(device, _NPORT, _ERR_CONTROL_COMMON_NPORT, val);
473 #endif // 0
474
475 _nvswitch_initialize_route_interrupts(device);
476 _nvswitch_initialize_ingress_interrupts(device);
477 _nvswitch_initialize_egress_interrupts(device);
478 _nvswitch_initialize_tstate_interrupts(device);
479 _nvswitch_initialize_sourcetrack_interrupts(device);
480 _nvswitch_initialize_multicast_tstate_interrupts(device);
481 _nvswitch_initialize_reduction_tstate_interrupts(device);
482 }
483
484 void
_nvswitch_initialize_nxbar_interrupts_ls10(nvswitch_device * device)485 _nvswitch_initialize_nxbar_interrupts_ls10
486 (
487 nvswitch_device *device
488 )
489 {
490 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
491 NvU32 report_fatal;
492
493 report_fatal =
494 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_BUFFER_OVERFLOW, 1) |
495 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_BUFFER_UNDERFLOW, 1) |
496 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _EGRESS_CREDIT_OVERFLOW, 1) |
497 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _EGRESS_CREDIT_UNDERFLOW, 1) |
498 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_NON_BURSTY_PKT, 1) |
499 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_NON_STICKY_PKT, 1) |
500 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_BURST_GT_9_DATA_VC, 1) |
501 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_PKT_INVALID_DST, 1) |
502 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_PKT_PARITY_ERROR, 1) |
503 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_SIDEBAND_PARITY_ERROR, 1) |
504 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_REDUCTION_PKT_ERROR, 1);
505
506 // Moving this L2 register access to SOE. Refer bug #3747687
507 #if 0
508 NVSWITCH_BCAST_WR32_LS10(device, NXBAR, _NXBAR_TILE, _ERR_FATAL_INTR_EN, report_fatal);
509 #endif // 0
510
511 chip_device->intr_mask.tile.fatal = report_fatal;
512 chip_device->intr_mask.tile.nonfatal = 0;
513
514 report_fatal =
515 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _INGRESS_BUFFER_OVERFLOW, 1) |
516 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _INGRESS_BUFFER_UNDERFLOW, 1) |
517 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _EGRESS_CREDIT_OVERFLOW, 1) |
518 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _EGRESS_CREDIT_UNDERFLOW, 1) |
519 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _INGRESS_NON_BURSTY_PKT, 1) |
520 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _INGRESS_NON_STICKY_PKT, 1) |
521 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _INGRESS_BURST_GT_9_DATA_VC, 1) |
522 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _EGRESS_CDT_PARITY_ERROR, 1);
523
524 // Moving this L2 register access to SOE. Refer bug #3747687
525 #if 0
526 NVSWITCH_BCAST_WR32_LS10(device, NXBAR, _NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, report_fatal);
527 #endif // 0
528
529 chip_device->intr_mask.tileout.fatal = report_fatal;
530 chip_device->intr_mask.tileout.nonfatal = 0;
531 }
532
533 /*
534 * @brief Service MINION Falcon interrupts on the requested interrupt tree
535 * Falcon Interrupts are a little unique in how they are handled:#include <assert.h>
536 * IRQSTAT is used to read in interrupt status from FALCON
537 * IRQMASK is used to read in mask of interrupts
538 * IRQDEST is used to read in enabled interrupts that are routed to the HOST
539 *
540 * IRQSTAT & IRQMASK gives the pending interrupting on this minion
541 *
542 * @param[in] device MINION on this device
543 * @param[in] instance MINION instance
544 *
545 */
546 NvlStatus
nvswitch_minion_service_falcon_interrupts_ls10(nvswitch_device * device,NvU32 instance)547 nvswitch_minion_service_falcon_interrupts_ls10
548 (
549 nvswitch_device *device,
550 NvU32 instance
551 )
552 {
553 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
554 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
555 NvU32 pending, bit, unhandled, intr, link;
556
557 link = instance * NVSWITCH_LINKS_PER_MINION_LS10;
558 report.raw_pending = NVSWITCH_MINION_RD32_LS10(device, instance, _CMINION, _FALCON_IRQSTAT);
559 report.raw_enable = chip_device->intr_minion_dest;
560 report.mask = NVSWITCH_MINION_RD32_LS10(device, instance, _CMINION, _FALCON_IRQMASK);
561
562 pending = report.raw_pending & report.mask;
563
564 if (pending == 0)
565 {
566 return -NVL_NOT_FOUND;
567 }
568
569 unhandled = pending;
570
571 bit = DRF_NUM(_CMINION_FALCON, _IRQSTAT, _WDTMR, 1);
572 if (nvswitch_test_flags(pending, bit))
573 {
574 NVSWITCH_REPORT_FATAL(_HW_MINION_WATCHDOG, "MINION Watchdog timer ran out", NV_TRUE);
575 nvswitch_clear_flags(&unhandled, bit);
576 }
577
578 bit = DRF_NUM(_CMINION_FALCON, _IRQSTAT, _HALT, 1);
579 if (nvswitch_test_flags(pending, bit))
580 {
581 NVSWITCH_REPORT_FATAL(_HW_MINION_HALT, "MINION HALT", NV_TRUE);
582 nvswitch_clear_flags(&unhandled, bit);
583 }
584
585 bit = DRF_NUM(_CMINION_FALCON, _IRQSTAT, _EXTERR, 1);
586 if (nvswitch_test_flags(pending, bit))
587 {
588 NVSWITCH_REPORT_FATAL(_HW_MINION_EXTERR, "MINION EXTERR", NV_TRUE);
589 nvswitch_clear_flags(&unhandled, bit);
590 }
591
592 bit = DRF_NUM(_CMINION_FALCON, _IRQSTAT, _SWGEN0, 1);
593 if (nvswitch_test_flags(pending, bit))
594 {
595 NVSWITCH_PRINT(device, INFO,
596 "%s: Received MINION Falcon SWGEN0 interrupt on MINION %d.\n",
597 __FUNCTION__, instance);
598 nvswitch_clear_flags(&unhandled, bit);
599 }
600
601 bit = DRF_NUM(_CMINION_FALCON, _IRQSTAT, _SWGEN1, 1);
602 if (nvswitch_test_flags(pending, bit))
603 {
604 NVSWITCH_PRINT(device, INFO,
605 "%s: Received MINION Falcon SWGEN1 interrupt on MINION %d.\n",
606 __FUNCTION__, instance);
607 nvswitch_clear_flags(&unhandled, bit);
608 }
609
610 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
611
612 if (device->link[link].fatal_error_occurred)
613 {
614 intr = NVSWITCH_MINION_RD32_LS10(device, instance, _MINION, _MINION_INTR_STALL_EN);
615 intr = FLD_SET_DRF(_MINION, _MINION_INTR_STALL_EN, _FATAL, _DISABLE, intr);
616 intr = FLD_SET_DRF(_MINION, _MINION_INTR_STALL_EN, _FALCON_STALL, _DISABLE, intr);
617 intr = FLD_SET_DRF(_MINION, _MINION_INTR_STALL_EN, _FATAL, _DISABLE, intr);
618 intr = FLD_SET_DRF(_MINION, _MINION_INTR_STALL_EN, _NONFATAL, _DISABLE, intr);
619 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _MINION_INTR_STALL_EN, intr);
620 }
621
622 // Write to IRQSCLR to clear status of interrupt
623 NVSWITCH_MINION_WR32_LS10(device, instance, _CMINION, _FALCON_IRQSCLR, pending);
624
625 if (unhandled != 0)
626 {
627 return -NVL_MORE_PROCESSING_REQUIRED;
628 }
629
630 return NVL_SUCCESS;
631 }
632
633 /*
634 * @Brief : Send priv ring command and wait for completion
635 *
636 * @Description :
637 *
638 * @param[in] device a reference to the device to initialize
639 * @param[in] cmd encoded priv ring command
640 */
641 static NvlStatus
_nvswitch_ring_master_cmd_ls10(nvswitch_device * device,NvU32 cmd)642 _nvswitch_ring_master_cmd_ls10
643 (
644 nvswitch_device *device,
645 NvU32 cmd
646 )
647 {
648 NvU32 value;
649 NVSWITCH_TIMEOUT timeout;
650 NvBool keepPolling;
651
652 NVSWITCH_ENG_WR32(device, PRI_MASTER_RS, , 0, _PPRIV_MASTER, _RING_COMMAND, cmd);
653
654 nvswitch_timeout_create(NVSWITCH_INTERVAL_5MSEC_IN_NS, &timeout);
655 do
656 {
657 keepPolling = (nvswitch_timeout_check(&timeout)) ? NV_FALSE : NV_TRUE;
658
659 value = NVSWITCH_ENG_RD32(device, PRI_MASTER_RS, , 0, _PPRIV_MASTER, _RING_COMMAND);
660 if (FLD_TEST_DRF(_PPRIV_MASTER, _RING_COMMAND, _CMD, _NO_CMD, value))
661 {
662 break;
663 }
664
665 nvswitch_os_sleep(1);
666 }
667 while (keepPolling);
668
669 if (!FLD_TEST_DRF(_PPRIV_MASTER, _RING_COMMAND, _CMD, _NO_CMD, value))
670 {
671 NVSWITCH_PRINT(device, ERROR,
672 "%s: Timeout waiting for RING_COMMAND == NO_CMD (cmd=0x%x).\n",
673 __FUNCTION__, cmd);
674 return -NVL_INITIALIZATION_TOTAL_FAILURE;
675 }
676
677 return NVL_SUCCESS;
678 }
679
680 static NvlStatus
_nvswitch_service_priv_ring_ls10(nvswitch_device * device)681 _nvswitch_service_priv_ring_ls10
682 (
683 nvswitch_device *device
684 )
685 {
686 NvU32 pending, i;
687 NVSWITCH_PRI_ERROR_LOG_TYPE pri_error;
688 NvlStatus status = NVL_SUCCESS;
689
690 pending = NVSWITCH_ENG_RD32(device, PRI_MASTER_RS, , 0, _PPRIV_MASTER, _RING_INTERRUPT_STATUS0);
691 if (pending == 0)
692 {
693 return -NVL_NOT_FOUND;
694 }
695
696 //
697 // SYS
698 //
699
700 if (FLD_TEST_DRF_NUM(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0,
701 _GBL_WRITE_ERROR_SYS, 1, pending))
702 {
703 pri_error.addr = NVSWITCH_ENG_RD32(device, SYS_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_ADR);
704 pri_error.data = NVSWITCH_ENG_RD32(device, SYS_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_WRDAT);
705 pri_error.info = NVSWITCH_ENG_RD32(device, SYS_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_INFO);
706 pri_error.code = NVSWITCH_ENG_RD32(device, SYS_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_CODE);
707
708 NVSWITCH_REPORT_PRI_ERROR_NONFATAL(_HW_HOST_PRIV_ERROR, "PRI WRITE SYS error", NVSWITCH_PPRIV_WRITE_SYS, 0, pri_error);
709
710 NVSWITCH_PRINT(device, ERROR,
711 "SYS PRI write error addr: 0x%08x data: 0x%08x info: 0x%08x code: 0x%08x\n",
712 pri_error.addr, pri_error.data,
713 pri_error.info, pri_error.code);
714
715 pending = FLD_SET_DRF_NUM(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0,
716 _GBL_WRITE_ERROR_SYS, 0, pending);
717 }
718
719 //
720 // SYSB
721 //
722
723 if (FLD_TEST_DRF_NUM(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0,
724 _GBL_WRITE_ERROR_SYSB, 1, pending))
725 {
726 pri_error.addr = NVSWITCH_ENG_RD32(device, SYSB_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_ADR);
727 pri_error.data = NVSWITCH_ENG_RD32(device, SYSB_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_WRDAT);
728 pri_error.info = NVSWITCH_ENG_RD32(device, SYSB_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_INFO);
729 pri_error.code = NVSWITCH_ENG_RD32(device, SYSB_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_CODE);
730
731 NVSWITCH_REPORT_PRI_ERROR_NONFATAL(_HW_HOST_PRIV_ERROR, "PRI WRITE SYSB error", NVSWITCH_PPRIV_WRITE_SYS, 1, pri_error);
732
733 NVSWITCH_PRINT(device, ERROR,
734 "SYSB PRI write error addr: 0x%08x data: 0x%08x info: 0x%08x code: 0x%08x\n",
735 pri_error.addr, pri_error.data,
736 pri_error.info, pri_error.code);
737
738 pending = FLD_SET_DRF_NUM(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0,
739 _GBL_WRITE_ERROR_SYSB, 0, pending);
740 }
741
742 //
743 // per-PRT
744 //
745
746 for (i = 0; i < NUM_PRT_PRI_HUB_ENGINE_LS10; i++)
747 {
748 if (DRF_VAL(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0,
749 _GBL_WRITE_ERROR_FBP, pending) & NVBIT(i))
750 {
751 pri_error.addr = NVSWITCH_ENG_RD32(device, PRT_PRI_HUB, , i, _PPRIV_PRT, _PRIV_ERROR_ADR);
752 pri_error.data = NVSWITCH_ENG_RD32(device, PRT_PRI_HUB, , i, _PPRIV_PRT, _PRIV_ERROR_WRDAT);
753 pri_error.info = NVSWITCH_ENG_RD32(device, PRT_PRI_HUB, , i, _PPRIV_PRT, _PRIV_ERROR_INFO);
754 pri_error.code = NVSWITCH_ENG_RD32(device, PRT_PRI_HUB, , i, _PPRIV_PRT, _PRIV_ERROR_CODE);
755
756 NVSWITCH_REPORT_PRI_ERROR_NONFATAL(_HW_HOST_PRIV_ERROR, "PRI WRITE PRT error", NVSWITCH_PPRIV_WRITE_PRT, i, pri_error);
757
758 NVSWITCH_PRINT(device, ERROR,
759 "PRT%d PRI write error addr: 0x%08x data: 0x%08x info: 0x%08x code: 0x%08x\n",
760 i, pri_error.addr, pri_error.data, pri_error.info, pri_error.code);
761
762 pending &= ~DRF_NUM(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0,
763 _GBL_WRITE_ERROR_FBP, NVBIT(i));
764 }
765 }
766
767 if (pending != 0)
768 {
769 NVSWITCH_PRINT_SXID(device, NVSWITCH_ERR_HW_HOST_PRIV_ERROR,
770 "Fatal, Unexpected PRI error\n");
771 NVSWITCH_LOG_FATAL_DATA(device, _HW, _HW_HOST_PRIV_ERROR, 2, 0, NV_FALSE, &pending);
772
773 NVSWITCH_PRINT(device, ERROR,
774 "Unexpected PRI error 0x%08x\n", pending);
775 return -NVL_MORE_PROCESSING_REQUIRED;
776 }
777
778 // acknowledge the interrupt to the ringmaster
779 status = _nvswitch_ring_master_cmd_ls10(device,
780 DRF_DEF(_PPRIV_MASTER, _RING_COMMAND, _CMD, _ACK_INTERRUPT));
781 if (status != NVL_SUCCESS)
782 {
783 NVSWITCH_PRINT(device, ERROR, "Timeout ACK'ing PRI error\n");
784 //
785 // Don't return error code -- there is nothing kernel SW can do about it if ACK failed.
786 // Likely it is PLM protected and SOE needs to handle it.
787 //
788 }
789
790 return NVL_SUCCESS;
791 }
792
793 static NvlStatus
_nvswitch_collect_nport_error_info_ls10(nvswitch_device * device,NvU32 link,NVSWITCH_RAW_ERROR_LOG_TYPE * data,NvU32 * idx,NvU32 register_start,NvU32 register_end)794 _nvswitch_collect_nport_error_info_ls10
795 (
796 nvswitch_device *device,
797 NvU32 link,
798 NVSWITCH_RAW_ERROR_LOG_TYPE *data,
799 NvU32 *idx,
800 NvU32 register_start,
801 NvU32 register_end
802 )
803 {
804 NvU32 register_block_size;
805 NvU32 i = *idx;
806
807 if ((register_start > register_end) ||
808 (register_start % sizeof(NvU32) != 0) ||
809 (register_end % sizeof(NvU32) != 0))
810 {
811 return -NVL_BAD_ARGS;
812 }
813
814 register_block_size = (register_end - register_start)/sizeof(NvU32) + 1;
815 if ((i + register_block_size > NVSWITCH_RAW_ERROR_LOG_DATA_SIZE) ||
816 (register_block_size > NVSWITCH_RAW_ERROR_LOG_DATA_SIZE))
817 {
818 return -NVL_BAD_ARGS;
819 }
820
821 do
822 {
823 data->data[i] = NVSWITCH_ENG_OFF_RD32(device, NPORT, , link, register_start);
824 register_start += sizeof(NvU32);
825 i++;
826
827 }
828 while (register_start <= register_end);
829
830 *idx = i;
831 return NVL_SUCCESS;
832 }
833
834 static void
_nvswitch_collect_error_info_ls10(nvswitch_device * device,NvU32 link,NvU32 collect_flags,NVSWITCH_RAW_ERROR_LOG_TYPE * data)835 _nvswitch_collect_error_info_ls10
836 (
837 nvswitch_device *device,
838 NvU32 link,
839 NvU32 collect_flags, // NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_*
840 NVSWITCH_RAW_ERROR_LOG_TYPE *data
841 )
842 {
843 NvU32 val;
844 NvU32 i = 0;
845 NvlStatus status = NVL_SUCCESS;
846
847 //
848 // The requested data 'collect_flags' is captured, if valid.
849 // if the error log buffer fills, then the currently captured data block
850 // could be truncated and subsequent blocks will be skipped.
851 // The 'flags' field in the log structure describes which blocks are
852 // actually captured.
853 // Captured blocks are packed, in order.
854 //
855
856 data->flags = 0;
857
858 // ROUTE
859 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME)
860 {
861 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i,
862 NV_ROUTE_ERR_TIMESTAMP_LOG,
863 NV_ROUTE_ERR_TIMESTAMP_LOG);
864 if (status == NVL_SUCCESS)
865 {
866 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME;
867 NVSWITCH_PRINT(device, INFO,
868 "ROUTE: TIMESTAMP: 0x%08x\n", data->data[i-1]);
869 }
870 }
871
872 val = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_HEADER_LOG_VALID);
873 if (FLD_TEST_DRF_NUM(_ROUTE, _ERR_HEADER_LOG_VALID, _HEADERVALID0, 1, val))
874 {
875 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC)
876 {
877 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i,
878 NV_ROUTE_ERR_MISC_LOG_0,
879 NV_ROUTE_ERR_MISC_LOG_0);
880 if (status == NVL_SUCCESS)
881 {
882 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC;
883 NVSWITCH_PRINT(device, INFO,
884 "ROUTE: MISC: 0x%08x\n", data->data[i-1]);
885 }
886 }
887
888 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR)
889 {
890 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i,
891 NV_ROUTE_ERR_HEADER_LOG_4,
892 NV_ROUTE_ERR_HEADER_LOG_10);
893 if (status == NVL_SUCCESS)
894 {
895 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR;
896 NVSWITCH_PRINT(device, INFO,
897 "ROUTE: HEADER: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x,\n",
898 data->data[i-7], data->data[i-6], data->data[i-5], data->data[i-4],
899 data->data[i-3], data->data[i-2], data->data[i-1]);
900 }
901 }
902 }
903
904 // INGRESS
905 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME)
906 {
907 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i,
908 NV_INGRESS_ERR_TIMESTAMP_LOG,
909 NV_INGRESS_ERR_TIMESTAMP_LOG);
910 if (status == NVL_SUCCESS)
911 {
912 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME;
913 NVSWITCH_PRINT(device, INFO,
914 "INGRESS: TIMESTAMP: 0x%08x\n", data->data[i-1]);
915 }
916 }
917
918 val = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_HEADER_LOG_VALID);
919 if (FLD_TEST_DRF_NUM(_INGRESS, _ERR_HEADER_LOG_VALID, _HEADERVALID0, 1, val))
920 {
921 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC)
922 {
923 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i,
924 NV_INGRESS_ERR_MISC_LOG_0,
925 NV_INGRESS_ERR_MISC_LOG_0);
926 if (status == NVL_SUCCESS)
927 {
928 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC;
929 NVSWITCH_PRINT(device, INFO,
930 "INGRESS: MISC: 0x%08x\n", data->data[i-1]);
931 }
932 }
933
934 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR)
935 {
936 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i,
937 NV_INGRESS_ERR_HEADER_LOG_4,
938 NV_INGRESS_ERR_HEADER_LOG_9);
939 if (status == NVL_SUCCESS)
940 {
941 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR;
942 NVSWITCH_PRINT(device, INFO,
943 "INGRESS: HEADER: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x,\n",
944 data->data[i-6], data->data[i-5], data->data[i-4], data->data[i-3],
945 data->data[i-2], data->data[i-1]);
946 }
947 }
948 }
949
950 // EGRESS
951 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME)
952 {
953 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i,
954 NV_EGRESS_ERR_TIMESTAMP_LOG,
955 NV_EGRESS_ERR_TIMESTAMP_LOG);
956 if (status == NVL_SUCCESS)
957 {
958 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME;
959 NVSWITCH_PRINT(device, INFO,
960 "EGRESS: TIMESTAMP: 0x%08x\n", data->data[i-1]);
961 }
962 }
963
964 val = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_HEADER_LOG_VALID);
965 if (FLD_TEST_DRF_NUM(_EGRESS, _ERR_HEADER_LOG_VALID, _HEADERVALID0, 1, val))
966 {
967 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC)
968 {
969 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i,
970 NV_EGRESS_ERR_MISC_LOG_0,
971 NV_EGRESS_ERR_MISC_LOG_0);
972 if (status == NVL_SUCCESS)
973 {
974 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC;
975 NVSWITCH_PRINT(device, INFO,
976 "EGRESS: MISC: 0x%08x\n", data->data[i-1]);
977 }
978 }
979
980 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR)
981 {
982 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i,
983 NV_EGRESS_ERR_HEADER_LOG_4,
984 NV_EGRESS_ERR_HEADER_LOG_10);
985 if (status == NVL_SUCCESS)
986 {
987 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR;
988 NVSWITCH_PRINT(device, INFO,
989 "EGRESS: HEADER: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
990 data->data[i-7], data->data[i-6], data->data[i-5], data->data[i-4],
991 data->data[i-3], data->data[i-2], data->data[i-1]);
992 }
993 }
994 }
995
996 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_TIME)
997 {
998 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i,
999 NV_EGRESS_MC_ERR_TIMESTAMP_LOG,
1000 NV_EGRESS_MC_ERR_TIMESTAMP_LOG);
1001 if (status == NVL_SUCCESS)
1002 {
1003 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_TIME;
1004 NVSWITCH_PRINT(device, INFO,
1005 "EGRESS: TIME MC: 0x%08x\n", data->data[i-1]);
1006 }
1007 }
1008
1009 val = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _MC_ERR_HEADER_LOG_VALID);
1010 if (FLD_TEST_DRF_NUM(_EGRESS, _MC_ERR_HEADER_LOG_VALID, _HEADERVALID0, 1, val))
1011 {
1012 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_MISC)
1013 {
1014 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i,
1015 NV_EGRESS_MC_ERR_MISC_LOG_0,
1016 NV_EGRESS_MC_ERR_MISC_LOG_0);
1017 if (status == NVL_SUCCESS)
1018 {
1019 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_MISC;
1020 NVSWITCH_PRINT(device, INFO,
1021 "EGRESS: MISC MC: 0x%08x\n", data->data[i-1]);
1022 }
1023 }
1024
1025 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_HDR)
1026 {
1027 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i,
1028 NV_EGRESS_MC_ERR_HEADER_LOG_4,
1029 NV_EGRESS_MC_ERR_HEADER_LOG_10);
1030 if (status == NVL_SUCCESS)
1031 {
1032 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_HDR;
1033 NVSWITCH_PRINT(device, INFO,
1034 "EGRESS MC: HEADER: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x\n",
1035 data->data[i-7], data->data[i-6], data->data[i-5], data->data[i-4],
1036 data->data[i-3], data->data[i-2], data->data[i-1]);
1037 }
1038 }
1039 }
1040
1041 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_MC_TIME)
1042 {
1043 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i,
1044 NV_MULTICASTTSTATE_ERR_TIMESTAMP_LOG,
1045 NV_MULTICASTTSTATE_ERR_TIMESTAMP_LOG);
1046 if (status == NVL_SUCCESS)
1047 {
1048 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_MC_TIME;
1049 NVSWITCH_PRINT(device, INFO,
1050 "MC TSTATE MC: 0x%08x\n",
1051 data->data[i-1]);
1052 }
1053 }
1054
1055 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_RED_TIME)
1056 {
1057 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i,
1058 NV_REDUCTIONTSTATE_ERR_TIMESTAMP_LOG,
1059 NV_REDUCTIONTSTATE_ERR_TIMESTAMP_LOG);
1060 if (status == NVL_SUCCESS)
1061 {
1062 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_RED_TIME;
1063 NVSWITCH_PRINT(device, INFO,
1064 "MC TSTATE RED: 0x%08x\n",
1065 data->data[i-1]);
1066 }
1067 }
1068
1069 while (i < NVSWITCH_RAW_ERROR_LOG_DATA_SIZE)
1070 {
1071 data->data[i++] = 0;
1072 }
1073 }
1074
1075 static NvlStatus
_nvswitch_service_route_fatal_ls10(nvswitch_device * device,NvU32 link)1076 _nvswitch_service_route_fatal_ls10
1077 (
1078 nvswitch_device *device,
1079 NvU32 link
1080 )
1081 {
1082 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
1083 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
1084 NvU32 pending, bit, contain, unhandled;
1085 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }};
1086 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0};
1087
1088 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_STATUS_0);
1089 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_FATAL_REPORT_EN_0);
1090 report.mask = report.raw_enable & chip_device->intr_mask.route.fatal;
1091 pending = report.raw_pending & report.mask;
1092
1093 if (pending == 0)
1094 {
1095 return -NVL_NOT_FOUND;
1096 }
1097
1098 unhandled = pending;
1099
1100 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_FIRST_0);
1101 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_CONTAIN_EN_0);
1102
1103 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _ROUTEBUFERR, 1);
1104 if (nvswitch_test_flags(pending, bit))
1105 {
1106 _nvswitch_collect_error_info_ls10(device, link,
1107 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME,
1108 &data);
1109 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_ROUTEBUFERR, "route buffer over/underflow", NV_FALSE);
1110 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_ROUTEBUFERR, data);
1111 nvswitch_clear_flags(&unhandled, bit);
1112 }
1113
1114 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _GLT_ECC_DBE_ERR, 1);
1115 if (nvswitch_test_flags(pending, bit))
1116 {
1117 NvBool bAddressValid = NV_FALSE;
1118 NvU32 address = 0;
1119 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE,
1120 _ERR_GLT_ECC_ERROR_ADDRESS_VALID);
1121
1122 if (FLD_TEST_DRF(_ROUTE_ERR_GLT, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
1123 addressValid))
1124 {
1125 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE,
1126 _ERR_GLT_ECC_ERROR_ADDRESS);
1127 bAddressValid = NV_TRUE;
1128 }
1129
1130 _nvswitch_collect_error_info_ls10(device, link,
1131 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME |
1132 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC |
1133 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR,
1134 &data);
1135 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_GLT_ECC_DBE_ERR, "route GLT DBE", NV_FALSE);
1136 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_GLT_ECC_DBE_ERR, data);
1137 nvswitch_clear_flags(&unhandled, bit);
1138
1139 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1140 NVSWITCH_ERR_HW_NPORT_ROUTE_GLT_ECC_DBE_ERR, link, bAddressValid,
1141 address, NV_TRUE, 1);
1142
1143 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1144 }
1145
1146 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _PDCTRLPARERR, 1);
1147 if (nvswitch_test_flags(pending, bit))
1148 {
1149 _nvswitch_collect_error_info_ls10(device, link,
1150 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME |
1151 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC |
1152 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR,
1153 &data);
1154 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_PDCTRLPARERR, "route parity", NV_FALSE);
1155 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_PDCTRLPARERR, data);
1156 nvswitch_clear_flags(&unhandled, bit);
1157 }
1158
1159 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _NVS_ECC_DBE_ERR, 1);
1160 if (nvswitch_test_flags(pending, bit))
1161 {
1162 _nvswitch_collect_error_info_ls10(device, link,
1163 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME |
1164 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC |
1165 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR,
1166 &data);
1167 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_NVS_ECC_DBE_ERR, "route incoming DBE", NV_FALSE);
1168 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_NVS_ECC_DBE_ERR, data);
1169 nvswitch_clear_flags(&unhandled, bit);
1170
1171 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1172 NVSWITCH_ERR_HW_NPORT_ROUTE_NVS_ECC_DBE_ERR, link, NV_FALSE, 0,
1173 NV_TRUE, 1);
1174
1175 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1176
1177 // Clear associated LIMIT_ERR interrupt
1178 if (report.raw_pending & DRF_NUM(_ROUTE, _ERR_STATUS_0, _NVS_ECC_LIMIT_ERR, 1))
1179 {
1180 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_STATUS_0,
1181 DRF_NUM(_ROUTE, _ERR_STATUS_0, _NVS_ECC_LIMIT_ERR, 1));
1182 }
1183 }
1184
1185 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _CDTPARERR, 1);
1186 if (nvswitch_test_flags(pending, bit))
1187 {
1188 _nvswitch_collect_error_info_ls10(device, link,
1189 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME,
1190 &data);
1191 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_CDTPARERR, "route credit parity", NV_FALSE);
1192 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_CDTPARERR, data);
1193 nvswitch_clear_flags(&unhandled, bit);
1194
1195 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1196 NVSWITCH_ERR_HW_NPORT_ROUTE_CDTPARERR, link, NV_FALSE, 0,
1197 NV_TRUE, 1);
1198
1199 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1200 }
1201
1202 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _MCRID_ECC_DBE_ERR, 1);
1203 if (nvswitch_test_flags(pending, bit))
1204 {
1205 _nvswitch_collect_error_info_ls10(device, link,
1206 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME |
1207 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC |
1208 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR,
1209 &data);
1210 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_MCRID_ECC_DBE_ERR, "MC route ECC", NV_FALSE);
1211 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_MCRID_ECC_DBE_ERR, data);
1212 nvswitch_clear_flags(&unhandled, bit);
1213
1214 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1215 NVSWITCH_ERR_HW_NPORT_ROUTE_MCRID_ECC_DBE_ERR, link, NV_FALSE, 0,
1216 NV_TRUE, 1);
1217
1218 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1219 }
1220
1221 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _EXTMCRID_ECC_DBE_ERR, 1);
1222 if (nvswitch_test_flags(pending, bit))
1223 {
1224 _nvswitch_collect_error_info_ls10(device, link,
1225 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME |
1226 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC |
1227 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR,
1228 &data);
1229 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_EXTMCRID_ECC_DBE_ERR, "Extd MC route ECC", NV_FALSE);
1230 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_EXTMCRID_ECC_DBE_ERR, data);
1231 nvswitch_clear_flags(&unhandled, bit);
1232
1233 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1234 NVSWITCH_ERR_HW_NPORT_ROUTE_EXTMCRID_ECC_DBE_ERR, link, NV_FALSE, 0,
1235 NV_TRUE, 1);
1236
1237 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1238 }
1239
1240 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _RAM_ECC_DBE_ERR, 1);
1241 if (nvswitch_test_flags(pending, bit))
1242 {
1243 _nvswitch_collect_error_info_ls10(device, link,
1244 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME |
1245 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC |
1246 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR,
1247 &data);
1248 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_RAM_ECC_DBE_ERR, "route RAM ECC", NV_FALSE);
1249 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_RAM_ECC_DBE_ERR, data);
1250 nvswitch_clear_flags(&unhandled, bit);
1251
1252 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1253 NVSWITCH_ERR_HW_NPORT_ROUTE_RAM_ECC_DBE_ERR, link, NV_FALSE, 0,
1254 NV_TRUE, 1);
1255
1256 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1257 }
1258
1259 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
1260
1261 // Disable interrupts that have occurred after fatal error.
1262 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
1263 if (device->link[link].fatal_error_occurred)
1264 {
1265 if (nvswitch_is_soe_supported(device))
1266 {
1267 nvswitch_soe_disable_nport_fatal_interrupts_ls10(device, link,
1268 report.raw_enable & ~pending, RM_SOE_CORE_NPORT_ROUTE_INTERRUPT);
1269 }
1270 else
1271 {
1272 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_FATAL_REPORT_EN_0,
1273 report.raw_enable & ~pending);
1274 }
1275 }
1276
1277 if (report.raw_first & report.mask)
1278 {
1279 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_FIRST_0,
1280 report.raw_first & report.mask);
1281 }
1282 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_STATUS_0, pending);
1283
1284 if (unhandled != 0)
1285 {
1286 return -NVL_MORE_PROCESSING_REQUIRED;
1287 }
1288
1289 return NVL_SUCCESS;
1290 }
1291
1292 static NvlStatus
_nvswitch_service_route_nonfatal_ls10(nvswitch_device * device,NvU32 link)1293 _nvswitch_service_route_nonfatal_ls10
1294 (
1295 nvswitch_device *device,
1296 NvU32 link
1297 )
1298 {
1299 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
1300 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
1301 NvU32 pending, bit, unhandled;
1302 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }};
1303 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0};
1304
1305 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_STATUS_0);
1306 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_NON_FATAL_REPORT_EN_0);
1307 report.mask = report.raw_enable & chip_device->intr_mask.route.nonfatal;
1308 pending = report.raw_pending & report.mask;
1309
1310 if (pending == 0)
1311 {
1312 return -NVL_NOT_FOUND;
1313 }
1314
1315 unhandled = pending;
1316 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_FIRST_0);
1317
1318 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _NOPORTDEFINEDERR, 1);
1319 if (nvswitch_test_flags(pending, bit))
1320 {
1321 _nvswitch_collect_error_info_ls10(device, link,
1322 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME |
1323 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC |
1324 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR,
1325 &data);
1326 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_NOPORTDEFINEDERR, "route undefined route");
1327 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_NOPORTDEFINEDERR, data);
1328 nvswitch_clear_flags(&unhandled, bit);
1329 }
1330
1331 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _INVALIDROUTEPOLICYERR, 1);
1332 if (nvswitch_test_flags(pending, bit))
1333 {
1334 _nvswitch_collect_error_info_ls10(device, link,
1335 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME |
1336 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC |
1337 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR,
1338 &data);
1339 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_INVALIDROUTEPOLICYERR, "route invalid policy");
1340 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_INVALIDROUTEPOLICYERR, data);
1341 nvswitch_clear_flags(&unhandled, bit);
1342 }
1343
1344 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _NVS_ECC_LIMIT_ERR, 1);
1345 if (nvswitch_test_flags(pending, bit))
1346 {
1347 // Ignore LIMIT error if DBE is pending
1348 if (!(nvswitch_test_flags(report.raw_pending,
1349 DRF_NUM(_ROUTE, _ERR_STATUS_0, _NVS_ECC_DBE_ERR, 1))))
1350 {
1351 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_NVS_ECC_ERROR_COUNTER);
1352 _nvswitch_collect_error_info_ls10(device, link,
1353 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME,
1354 &data);
1355 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, "route incoming ECC limit");
1356 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, data);
1357
1358 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1359 NVSWITCH_ERR_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, link, NV_FALSE, 0,
1360 NV_FALSE, 1);
1361
1362 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1363 }
1364
1365 nvswitch_clear_flags(&unhandled, bit);
1366 }
1367
1368 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _GLT_ECC_LIMIT_ERR, 1);
1369 if (nvswitch_test_flags(pending, bit))
1370 {
1371 // Ignore LIMIT error if DBE is pending
1372 if (!(nvswitch_test_flags(report.raw_pending,
1373 DRF_NUM(_ROUTE, _ERR_STATUS_0, _GLT_ECC_DBE_ERR, 1))))
1374 {
1375 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_GLT_ECC_ERROR_COUNTER);
1376 _nvswitch_collect_error_info_ls10(device, link,
1377 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME,
1378 &data);
1379 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, "GLT ECC limit");
1380 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_GLT_ECC_LIMIT_ERR, data);
1381
1382 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1383 NVSWITCH_ERR_HW_NPORT_ROUTE_GLT_ECC_LIMIT_ERR, link, NV_FALSE, 0,
1384 NV_FALSE, 1);
1385
1386 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1387 }
1388
1389 nvswitch_clear_flags(&unhandled, bit);
1390 }
1391
1392 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _MCRID_ECC_LIMIT_ERR, 1);
1393 if (nvswitch_test_flags(pending, bit))
1394 {
1395 // Ignore LIMIT error if DBE is pending
1396 if (!(nvswitch_test_flags(report.raw_pending,
1397 DRF_NUM(_ROUTE, _ERR_STATUS_0, _MCRID_ECC_DBE_ERR, 1))))
1398 {
1399 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_MCRID_ECC_ERROR_COUNTER);
1400 _nvswitch_collect_error_info_ls10(device, link,
1401 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME,
1402 &data);
1403 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, "MCRID ECC limit");
1404 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_MCRID_ECC_LIMIT_ERR, data);
1405
1406 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1407 NVSWITCH_ERR_HW_NPORT_ROUTE_MCRID_ECC_LIMIT_ERR, link, NV_FALSE, 0,
1408 NV_FALSE, 1);
1409
1410 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1411 }
1412
1413 nvswitch_clear_flags(&unhandled, bit);
1414 }
1415
1416 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _EXTMCRID_ECC_LIMIT_ERR, 1);
1417 if (nvswitch_test_flags(pending, bit))
1418 {
1419 // Ignore LIMIT error if DBE is pending
1420 if (!(nvswitch_test_flags(report.raw_pending,
1421 DRF_NUM(_ROUTE, _ERR_STATUS_0, _EXTMCRID_ECC_DBE_ERR, 1))))
1422 {
1423 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_EXTMCRID_ECC_ERROR_COUNTER);
1424 _nvswitch_collect_error_info_ls10(device, link,
1425 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME,
1426 &data);
1427 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, "EXTMCRID ECC limit");
1428 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_EXTMCRID_ECC_LIMIT_ERR, data);
1429
1430 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1431 NVSWITCH_ERR_HW_NPORT_ROUTE_EXTMCRID_ECC_LIMIT_ERR, link, NV_FALSE, 0,
1432 NV_FALSE, 1);
1433
1434 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1435 }
1436
1437 nvswitch_clear_flags(&unhandled, bit);
1438 }
1439
1440 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _RAM_ECC_LIMIT_ERR, 1);
1441 if (nvswitch_test_flags(pending, bit))
1442 {
1443 // Ignore LIMIT error if DBE is pending
1444 if (!(nvswitch_test_flags(report.raw_pending,
1445 DRF_NUM(_ROUTE, _ERR_STATUS_0, _RAM_ECC_DBE_ERR, 1))))
1446 {
1447 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_RAM_ECC_ERROR_COUNTER);
1448 _nvswitch_collect_error_info_ls10(device, link,
1449 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME,
1450 &data);
1451 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_RAM_ECC_LIMIT_ERR, "RAM ECC limit");
1452 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_RAM_ECC_LIMIT_ERR, data);
1453
1454 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1455 NVSWITCH_ERR_HW_NPORT_ROUTE_RAM_ECC_LIMIT_ERR, link, NV_FALSE, 0,
1456 NV_FALSE, 1);
1457
1458 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1459 }
1460
1461 nvswitch_clear_flags(&unhandled, bit);
1462 }
1463
1464 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _INVALID_MCRID_ERR, 1);
1465 if (nvswitch_test_flags(pending, bit))
1466 {
1467 _nvswitch_collect_error_info_ls10(device, link,
1468 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME,
1469 &data);
1470 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_INVALID_MCRID_ERR, "invalid MC route");
1471 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_INVALID_MCRID_ERR, data);
1472 nvswitch_clear_flags(&unhandled, bit);
1473 }
1474
1475 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
1476
1477 // Disable interrupts that have occurred after fatal error.
1478 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
1479 if (device->link[link].fatal_error_occurred)
1480 {
1481 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_NON_FATAL_REPORT_EN_0,
1482 report.raw_enable & ~pending);
1483 }
1484
1485 if (report.raw_first & report.mask)
1486 {
1487 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_FIRST_0,
1488 report.raw_first & report.mask);
1489 }
1490
1491 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_STATUS_0, pending);
1492
1493 //
1494 // Note, when traffic is flowing, if we reset ERR_COUNT before ERR_STATUS
1495 // register, we won't see an interrupt again until counter wraps around.
1496 // In that case, we will miss writing back many ECC victim entries. Hence,
1497 // always clear _ERR_COUNT only after _ERR_STATUS register is cleared!
1498 //
1499 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_NVS_ECC_ERROR_COUNTER, 0x0);
1500
1501 if (unhandled != 0)
1502 {
1503 return -NVL_MORE_PROCESSING_REQUIRED;
1504 }
1505
1506 return NVL_SUCCESS;
1507 }
1508
1509 //
1510 // Ingress
1511 //
1512
1513 static NvlStatus
_nvswitch_service_ingress_fatal_ls10(nvswitch_device * device,NvU32 link)1514 _nvswitch_service_ingress_fatal_ls10
1515 (
1516 nvswitch_device *device,
1517 NvU32 link
1518 )
1519 {
1520 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
1521 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
1522 NvU32 pending, bit, contain, unhandled;
1523 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }};
1524 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0};
1525
1526 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_STATUS_0);
1527 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_FATAL_REPORT_EN_0);
1528 report.mask = report.raw_enable & chip_device->intr_mask.ingress[0].fatal;
1529 pending = report.raw_pending & report.mask;
1530
1531 if (pending == 0)
1532 {
1533 return -NVL_NOT_FOUND;
1534 }
1535
1536 unhandled = pending;
1537 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_FIRST_0);
1538 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_CONTAIN_EN_0);
1539 _nvswitch_collect_error_info_ls10(device, link,
1540 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME |
1541 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC |
1542 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR,
1543 &data);
1544
1545 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _CMDDECODEERR, 1);
1546 if (nvswitch_test_flags(pending, bit))
1547 {
1548 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_CMDDECODEERR, "ingress invalid command", NV_FALSE);
1549 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_CMDDECODEERR, data);
1550 nvswitch_clear_flags(&unhandled, bit);
1551 }
1552
1553 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_ECC_DBE_ERR, 1);
1554 if (nvswitch_test_flags(pending, bit))
1555 {
1556 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTAREMAPTAB_ECC_ERROR_COUNTER);
1557 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTAREMAPTAB_ECC_ERROR_ADDRESS);
1558 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTAREMAPTAB_ECC_ERROR_ADDRESS_VALID);
1559 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_DBE_ERR, "ingress ExtA remap DBE", NV_FALSE);
1560 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_DBE_ERR, data);
1561 nvswitch_clear_flags(&unhandled, bit);
1562
1563 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1564 NVSWITCH_ERR_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_DBE_ERR, link, NV_FALSE, 0,
1565 NV_TRUE, 1);
1566
1567 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1568 }
1569
1570 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_HDR_ECC_DBE_ERR, 1);
1571 if (nvswitch_test_flags(pending, bit))
1572 {
1573 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_NCISOC_HDR_ECC_ERROR_COUNTER);
1574 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_NCISOC_HDR_ECC_DBE_ERR, "ingress header DBE", NV_FALSE);
1575 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_NCISOC_HDR_ECC_DBE_ERR, data);
1576 nvswitch_clear_flags(&unhandled, bit);
1577
1578 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1579 NVSWITCH_ERR_HW_NPORT_INGRESS_NCISOC_HDR_ECC_DBE_ERR, link, NV_FALSE, 0,
1580 NV_TRUE, 1);
1581
1582 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1583
1584 // Clear associated LIMIT_ERR interrupt
1585 if (report.raw_pending & DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_HDR_ECC_LIMIT_ERR, 1))
1586 {
1587 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_STATUS_0,
1588 DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_HDR_ECC_LIMIT_ERR, 1));
1589 }
1590 }
1591
1592 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _INVALIDVCSET, 1);
1593 if (nvswitch_test_flags(pending, bit))
1594 {
1595 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_INVALIDVCSET, "ingress invalid VCSet", NV_FALSE);
1596 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_INVALIDVCSET, data);
1597 nvswitch_clear_flags(&unhandled, bit);
1598 }
1599
1600 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _REMAPTAB_ECC_DBE_ERR, 1);
1601 if (nvswitch_test_flags(pending, bit))
1602 {
1603 NvBool bAddressValid = NV_FALSE;
1604 NvU32 address = 0;
1605 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS,
1606 _ERR_REMAPTAB_ECC_ERROR_ADDRESS);
1607
1608 if (FLD_TEST_DRF(_INGRESS_ERR_REMAPTAB, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
1609 addressValid))
1610 {
1611 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS,
1612 _ERR_REMAPTAB_ECC_ERROR_ADDRESS);
1613 bAddressValid = NV_TRUE;
1614 }
1615
1616 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_REMAPTAB_ECC_ERROR_COUNTER);
1617 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_REMAPTAB_ECC_ERROR_ADDRESS);
1618 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_REMAPTAB_ECC_ERROR_ADDRESS_VALID);
1619 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_REMAPTAB_ECC_DBE_ERR, "ingress Remap DBE", NV_FALSE);
1620 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_REMAPTAB_ECC_DBE_ERR, data);
1621 nvswitch_clear_flags(&unhandled, bit);
1622
1623 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1624 NVSWITCH_ERR_HW_NPORT_INGRESS_REMAPTAB_ECC_DBE_ERR, link, bAddressValid,
1625 address, NV_TRUE, 1);
1626
1627 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1628 }
1629
1630 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RIDTAB_ECC_DBE_ERR, 1);
1631 if (nvswitch_test_flags(pending, bit))
1632 {
1633 NvBool bAddressValid = NV_FALSE;
1634 NvU32 address = 0;
1635 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS,
1636 _ERR_RIDTAB_ECC_ERROR_ADDRESS_VALID);
1637
1638 if (FLD_TEST_DRF(_INGRESS_ERR_RIDTAB, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
1639 addressValid))
1640 {
1641 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS,
1642 _ERR_RIDTAB_ECC_ERROR_ADDRESS);
1643 bAddressValid = NV_TRUE;
1644 }
1645
1646 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RIDTAB_ECC_ERROR_COUNTER);
1647 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RIDTAB_ECC_ERROR_ADDRESS);
1648 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RIDTAB_ECC_ERROR_ADDRESS_VALID);
1649 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_RIDTAB_ECC_DBE_ERR, "ingress RID DBE", NV_FALSE);
1650 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_RIDTAB_ECC_DBE_ERR, data);
1651 nvswitch_clear_flags(&unhandled, bit);
1652
1653 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1654 NVSWITCH_ERR_HW_NPORT_INGRESS_RIDTAB_ECC_DBE_ERR, link, bAddressValid,
1655 address, NV_TRUE, 1);
1656
1657 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1658 }
1659
1660 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RLANTAB_ECC_DBE_ERR, 1);
1661 if (nvswitch_test_flags(pending, bit))
1662 {
1663 NvBool bAddressValid = NV_FALSE;
1664 NvU32 address = 0;
1665 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS,
1666 _ERR_RLANTAB_ECC_ERROR_ADDRESS_VALID);
1667
1668 if (FLD_TEST_DRF(_INGRESS_ERR_RLANTAB, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
1669 addressValid))
1670 {
1671 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS,
1672 _ERR_RLANTAB_ECC_ERROR_ADDRESS);
1673 bAddressValid = NV_TRUE;
1674 }
1675
1676 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RLANTAB_ECC_ERROR_COUNTER);
1677 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RLANTAB_ECC_ERROR_ADDRESS);
1678 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RLANTAB_ECC_ERROR_ADDRESS_VALID);
1679 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_RLANTAB_ECC_DBE_ERR, "ingress RLAN DBE", NV_FALSE);
1680 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_RLANTAB_ECC_DBE_ERR, data);
1681 nvswitch_clear_flags(&unhandled, bit);
1682
1683 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1684 NVSWITCH_ERR_HW_NPORT_INGRESS_RLANTAB_ECC_DBE_ERR, link, bAddressValid,
1685 address, NV_TRUE, 1);
1686
1687 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1688 }
1689
1690 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_PARITY_ERR, 1);
1691 if (nvswitch_test_flags(pending, bit))
1692 {
1693 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_NCISOC_PARITY_ERR, "ingress control parity", NV_FALSE);
1694 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_NCISOC_PARITY_ERR, data);
1695 nvswitch_clear_flags(&unhandled, bit);
1696
1697 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1698 NVSWITCH_ERR_HW_NPORT_INGRESS_NCISOC_PARITY_ERR, link, NV_FALSE, 0,
1699 NV_TRUE, 1);
1700
1701 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1702 }
1703
1704 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_ECC_DBE_ERR, 1);
1705 if (nvswitch_test_flags(pending, bit))
1706 {
1707 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTBREMAPTAB_ECC_ERROR_COUNTER);
1708 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTBREMAPTAB_ECC_ERROR_ADDRESS);
1709 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTBREMAPTAB_ECC_ERROR_ADDRESS_VALID);
1710 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_DBE_ERR, "ingress ExtB remap DBE", NV_FALSE);
1711 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_DBE_ERR, data);
1712 nvswitch_clear_flags(&unhandled, bit);
1713
1714 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1715 NVSWITCH_ERR_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_DBE_ERR, link, NV_FALSE, 0,
1716 NV_TRUE, 1);
1717
1718 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1719 }
1720
1721 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_ECC_DBE_ERR, 1);
1722 if (nvswitch_test_flags(pending, bit))
1723 {
1724 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_MCREMAPTAB_ECC_ERROR_COUNTER);
1725 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_MCREMAPTAB_ECC_ERROR_ADDRESS);
1726 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_MCREMAPTAB_ECC_ERROR_ADDRESS_VALID);
1727 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_MCREMAPTAB_ECC_DBE_ERR, "ingress MC remap DBE", NV_FALSE);
1728 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_ECC_DBE_ERR, data);
1729 nvswitch_clear_flags(&unhandled, bit);
1730
1731 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1732 NVSWITCH_ERR_HW_NPORT_INGRESS_MCREMAPTAB_ECC_DBE_ERR, link, NV_FALSE, 0,
1733 NV_TRUE, 1);
1734
1735 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1736 }
1737
1738 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
1739
1740 // Disable interrupts that have occurred after fatal error.
1741 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
1742 if (device->link[link].fatal_error_occurred)
1743 {
1744 if (nvswitch_is_soe_supported(device))
1745 {
1746 nvswitch_soe_disable_nport_fatal_interrupts_ls10(device, link,
1747 report.raw_enable & ~pending, RM_SOE_CORE_NPORT_INGRESS_INTERRUPT);
1748 }
1749 else
1750 {
1751 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_FATAL_REPORT_EN_0,
1752 report.raw_enable & ~pending);
1753 }
1754 }
1755
1756 if (report.raw_first & report.mask)
1757 {
1758 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_FIRST_0,
1759 report.raw_first & report.mask);
1760 }
1761
1762 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_STATUS_0, pending);
1763
1764 if (unhandled != 0)
1765 {
1766 return -NVL_MORE_PROCESSING_REQUIRED;
1767 }
1768
1769 return NVL_SUCCESS;
1770 }
1771
1772 static NvlStatus
_nvswitch_service_ingress_nonfatal_ls10(nvswitch_device * device,NvU32 link)1773 _nvswitch_service_ingress_nonfatal_ls10
1774 (
1775 nvswitch_device *device,
1776 NvU32 link
1777 )
1778 {
1779 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
1780 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
1781 NvU32 pending, bit, unhandled;
1782 NvU32 pending_0, pending_1;
1783 NvU32 raw_pending_0;
1784 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }};
1785 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0};
1786 NvlStatus status = NVL_SUCCESS;
1787
1788 //
1789 // _ERR_STATUS_0
1790 //
1791 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_STATUS_0);
1792 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_NON_FATAL_REPORT_EN_0);
1793 report.mask = report.raw_enable & chip_device->intr_mask.ingress[0].nonfatal;
1794
1795 raw_pending_0 = report.raw_pending;
1796 pending = (report.raw_pending & report.mask);
1797 pending_0 = pending;
1798
1799 if (pending == 0)
1800 {
1801 goto _nvswitch_service_ingress_nonfatal_ls10_err_status_1;
1802 }
1803
1804 unhandled = pending;
1805 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_FIRST_0);
1806 _nvswitch_collect_error_info_ls10(device, link,
1807 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME |
1808 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC |
1809 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR,
1810 &data);
1811
1812 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _REQCONTEXTMISMATCHERR, 1);
1813 if (nvswitch_test_flags(pending, bit))
1814 {
1815 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_REQCONTEXTMISMATCHERR, "ingress request context mismatch");
1816 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_REQCONTEXTMISMATCHERR, data);
1817 nvswitch_clear_flags(&unhandled, bit);
1818 }
1819
1820 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _ACLFAIL, 1);
1821 if (nvswitch_test_flags(pending, bit))
1822 {
1823 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_ACLFAIL, "ingress invalid ACL");
1824 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_ACLFAIL, data);
1825 nvswitch_clear_flags(&unhandled, bit);
1826 }
1827
1828 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_HDR_ECC_LIMIT_ERR, 1);
1829 if (nvswitch_test_flags(pending, bit))
1830 {
1831 // Ignore LIMIT error if DBE is pending
1832 if (!(nvswitch_test_flags(report.raw_pending,
1833 DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_HDR_ECC_DBE_ERR, 1))))
1834 {
1835 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_NCISOC_HDR_ECC_ERROR_COUNTER);
1836 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_NCISOC_HDR_ECC_LIMIT_ERR, "ingress header ECC");
1837 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_NCISOC_HDR_ECC_LIMIT_ERR, data);
1838
1839 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1840 NVSWITCH_ERR_HW_NPORT_INGRESS_NCISOC_HDR_ECC_LIMIT_ERR, link, NV_FALSE, 0,
1841 NV_FALSE, 1);
1842
1843 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1844 }
1845
1846 nvswitch_clear_flags(&unhandled, bit);
1847 }
1848
1849 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _ADDRBOUNDSERR, 1);
1850 if (nvswitch_test_flags(pending, bit))
1851 {
1852 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_ADDRBOUNDSERR, "ingress address bounds");
1853 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_ADDRBOUNDSERR, data);
1854 nvswitch_clear_flags(&unhandled, bit);
1855 }
1856
1857 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RIDTABCFGERR, 1);
1858 if (nvswitch_test_flags(pending, bit))
1859 {
1860 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_RIDTABCFGERR, "ingress RID packet");
1861 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_RIDTABCFGERR, data);
1862 nvswitch_clear_flags(&unhandled, bit);
1863 }
1864
1865 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RLANTABCFGERR, 1);
1866 if (nvswitch_test_flags(pending, bit))
1867 {
1868 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_RLANTABCFGERR, "ingress RLAN packet");
1869 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_RLANTABCFGERR, data);
1870 nvswitch_clear_flags(&unhandled, bit);
1871 }
1872
1873
1874 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _REMAPTAB_ECC_LIMIT_ERR, 1);
1875 if (nvswitch_test_flags(pending, bit))
1876 {
1877 // Ignore LIMIT error if DBE is pending
1878 if (!(nvswitch_test_flags(report.raw_pending,
1879 DRF_NUM(_INGRESS, _ERR_STATUS_0, _REMAPTAB_ECC_DBE_ERR, 1))))
1880 {
1881 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_REMAPTAB_ECC_ERROR_COUNTER);
1882 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_REMAPTAB_ECC_LIMIT_ERR, "ingress remap ECC");
1883 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_REMAPTAB_ECC_LIMIT_ERR, data);
1884
1885 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1886 NVSWITCH_ERR_HW_NPORT_INGRESS_REMAPTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0,
1887 NV_FALSE, 1);
1888
1889 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1890 }
1891
1892 nvswitch_clear_flags(&unhandled, bit);
1893 }
1894
1895 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RIDTAB_ECC_LIMIT_ERR, 1);
1896 if (nvswitch_test_flags(pending, bit))
1897 {
1898 // Ignore LIMIT error if DBE is pending
1899 if (!(nvswitch_test_flags(report.raw_pending,
1900 DRF_NUM(_INGRESS, _ERR_STATUS_0, _RIDTAB_ECC_DBE_ERR, 1))))
1901 {
1902 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RIDTAB_ECC_ERROR_COUNTER);
1903 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_RIDTAB_ECC_LIMIT_ERR, "ingress RID ECC");
1904 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_RIDTAB_ECC_LIMIT_ERR, data);
1905
1906 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1907 NVSWITCH_ERR_HW_NPORT_INGRESS_RIDTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0,
1908 NV_FALSE, 1);
1909
1910 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1911 }
1912
1913 nvswitch_clear_flags(&unhandled, bit);
1914 }
1915
1916 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RLANTAB_ECC_LIMIT_ERR, 1);
1917 if (nvswitch_test_flags(pending, bit))
1918 {
1919 // Ignore LIMIT error if DBE is pending
1920 if (!(nvswitch_test_flags(report.raw_pending,
1921 DRF_NUM(_INGRESS, _ERR_STATUS_0, _RLANTAB_ECC_DBE_ERR, 1))))
1922 {
1923 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RLANTAB_ECC_ERROR_COUNTER);
1924 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_RLANTAB_ECC_LIMIT_ERR, "ingress RLAN ECC");
1925 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_RLANTAB_ECC_LIMIT_ERR, data);
1926
1927 _nvswitch_construct_ecc_error_event_ls10(&err_event,
1928 NVSWITCH_ERR_HW_NPORT_INGRESS_RLANTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0,
1929 NV_FALSE, 1);
1930
1931 nvswitch_inforom_ecc_log_err_event(device, &err_event);
1932 }
1933
1934 nvswitch_clear_flags(&unhandled, bit);
1935 }
1936
1937
1938 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _ADDRTYPEERR, 1);
1939 if (nvswitch_test_flags(pending, bit))
1940 {
1941 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_ADDRTYPEERR, "ingress illegal address");
1942 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_ADDRTYPEERR, data);
1943 nvswitch_clear_flags(&unhandled, bit);
1944 }
1945
1946
1947 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_INDEX_ERR, 1);
1948 if (nvswitch_test_flags(pending, bit))
1949 {
1950 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_INDEX_ERR, "ingress ExtA remap index");
1951 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_INDEX_ERR, data);
1952 nvswitch_clear_flags(&unhandled, bit);
1953 }
1954
1955 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_INDEX_ERR, 1);
1956 if (nvswitch_test_flags(pending, bit))
1957 {
1958 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_INDEX_ERR, "ingress ExtB remap index");
1959 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_INDEX_ERR, data);
1960 nvswitch_clear_flags(&unhandled, bit);
1961 }
1962
1963 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_INDEX_ERR, 1);
1964 if (nvswitch_test_flags(pending, bit))
1965 {
1966 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_INDEX_ERR, "ingress MC remap index");
1967 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_INDEX_ERR, data);
1968 nvswitch_clear_flags(&unhandled, bit);
1969 }
1970
1971 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_REQCONTEXTMISMATCHERR, 1);
1972 if (nvswitch_test_flags(pending, bit))
1973 {
1974 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_REQCONTEXTMISMATCHERR, "ingress ExtA request context mismatch");
1975 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_REQCONTEXTMISMATCHERR, data);
1976 nvswitch_clear_flags(&unhandled, bit);
1977 }
1978
1979 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_REQCONTEXTMISMATCHERR, 1);
1980 if (nvswitch_test_flags(pending, bit))
1981 {
1982 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_REQCONTEXTMISMATCHERR, "ingress ExtB request context mismatch");
1983 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_REQCONTEXTMISMATCHERR, data);
1984 nvswitch_clear_flags(&unhandled, bit);
1985 }
1986
1987 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_REQCONTEXTMISMATCHERR, 1);
1988 if (nvswitch_test_flags(pending, bit))
1989 {
1990 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_REQCONTEXTMISMATCHERR, "ingress MC request context mismatch");
1991 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_REQCONTEXTMISMATCHERR, data);
1992 nvswitch_clear_flags(&unhandled, bit);
1993 }
1994
1995 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_ACLFAIL, 1);
1996 if (nvswitch_test_flags(pending, bit))
1997 {
1998 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_ACLFAIL, "ingress invalid ExtA ACL");
1999 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_ACLFAIL, data);
2000 nvswitch_clear_flags(&unhandled, bit);
2001 }
2002
2003 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_ACLFAIL, 1);
2004 if (nvswitch_test_flags(pending, bit))
2005 {
2006 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_ACLFAIL, "ingress invalid ExtB ACL");
2007 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_ACLFAIL, data);
2008 nvswitch_clear_flags(&unhandled, bit);
2009 }
2010
2011 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_ACLFAIL, 1);
2012 if (nvswitch_test_flags(pending, bit))
2013 {
2014 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_ACLFAIL, "ingress invalid MC ACL");
2015 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_ACLFAIL, data);
2016 nvswitch_clear_flags(&unhandled, bit);
2017 }
2018
2019 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_ADDRBOUNDSERR, 1);
2020 if (nvswitch_test_flags(pending, bit))
2021 {
2022 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_ADDRBOUNDSERR, "ingress ExtA address bounds");
2023 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_ADDRBOUNDSERR, data);
2024 nvswitch_clear_flags(&unhandled, bit);
2025 }
2026
2027 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_ADDRBOUNDSERR, 1);
2028 if (nvswitch_test_flags(pending, bit))
2029 {
2030 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_ADDRBOUNDSERR, "ingress ExtB address bounds");
2031 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_ADDRBOUNDSERR, data);
2032 nvswitch_clear_flags(&unhandled, bit);
2033 }
2034
2035 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_ADDRBOUNDSERR, 1);
2036 if (nvswitch_test_flags(pending, bit))
2037 {
2038 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_ADDRBOUNDSERR, "ingress MC address bounds");
2039 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_ADDRBOUNDSERR, data);
2040 nvswitch_clear_flags(&unhandled, bit);
2041 }
2042
2043 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
2044
2045 // Disable interrupts that have occurred after fatal error.
2046 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
2047 if (device->link[link].fatal_error_occurred)
2048 {
2049 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_NON_FATAL_REPORT_EN_0,
2050 report.raw_enable & ~pending);
2051 }
2052
2053 if (report.raw_first & report.mask)
2054 {
2055 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_FIRST_0,
2056 report.raw_first & report.mask);
2057 }
2058
2059 if (unhandled != 0)
2060 {
2061 status = -NVL_MORE_PROCESSING_REQUIRED;
2062 }
2063
2064 _nvswitch_service_ingress_nonfatal_ls10_err_status_1:
2065 //
2066 // _ERR_STATUS_1
2067 //
2068 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_STATUS_1);
2069 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_NON_FATAL_REPORT_EN_1);
2070 report.mask = report.raw_enable & chip_device->intr_mask.ingress[1].nonfatal;
2071
2072 pending = (report.raw_pending & report.mask);
2073 pending_1 = pending;
2074
2075 if ((pending_0 == 0) && (pending_1 == 0))
2076 {
2077 return -NVL_NOT_FOUND;
2078 }
2079
2080 unhandled = pending;
2081 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_FIRST_1);
2082
2083 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _EXTAREMAPTAB_ECC_LIMIT_ERR, 1);
2084 if (nvswitch_test_flags(pending, bit))
2085 {
2086 // Ignore LIMIT error if DBE is pending
2087 if (!(nvswitch_test_flags(raw_pending_0,
2088 DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_ECC_DBE_ERR, 1))))
2089 {
2090 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTAREMAPTAB_ECC_ERROR_COUNTER);
2091 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_LIMIT_ERR, "ingress ExtA remap ECC");
2092 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_LIMIT_ERR, data);
2093
2094 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2095 NVSWITCH_ERR_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0,
2096 NV_FALSE, 1);
2097
2098 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2099 }
2100
2101 nvswitch_clear_flags(&unhandled, bit);
2102 }
2103
2104 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _EXTBREMAPTAB_ECC_LIMIT_ERR, 1);
2105 if (nvswitch_test_flags(pending, bit))
2106 {
2107 // Ignore LIMIT error if DBE is pending
2108 if (!(nvswitch_test_flags(raw_pending_0,
2109 DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_ECC_DBE_ERR, 1))))
2110 {
2111 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTBREMAPTAB_ECC_ERROR_COUNTER);
2112 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_LIMIT_ERR, "ingress ExtB remap ECC");
2113 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_LIMIT_ERR, data);
2114
2115 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2116 NVSWITCH_ERR_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0,
2117 NV_FALSE, 1);
2118
2119 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2120 }
2121
2122 nvswitch_clear_flags(&unhandled, bit);
2123 }
2124
2125 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _MCREMAPTAB_ECC_LIMIT_ERR, 1);
2126 if (nvswitch_test_flags(pending, bit))
2127 {
2128 // Ignore LIMIT error if DBE is pending
2129 if (!(nvswitch_test_flags(raw_pending_0,
2130 DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_ECC_DBE_ERR, 1))))
2131 {
2132 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_MCREMAPTAB_ECC_ERROR_COUNTER);
2133 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_ECC_LIMIT_ERR, "ingress MC remap ECC");
2134 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_ECC_LIMIT_ERR, data);
2135
2136 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2137 NVSWITCH_ERR_HW_NPORT_INGRESS_MCREMAPTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0,
2138 NV_FALSE, 1);
2139
2140 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2141 }
2142
2143 nvswitch_clear_flags(&unhandled, bit);
2144 }
2145
2146 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _MCCMDTOUCADDRERR, 1);
2147 if (nvswitch_test_flags(pending, bit))
2148 {
2149 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCCMDTOUCADDRERR, "ingress MC command to uc");
2150 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCCMDTOUCADDRERR, data);
2151 nvswitch_clear_flags(&unhandled, bit);
2152 }
2153
2154 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _READMCREFLECTMEMERR, 1);
2155 if (nvswitch_test_flags(pending, bit))
2156 {
2157 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_READMCREFLECTMEMERR, "ingress read reflective");
2158 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_READMCREFLECTMEMERR, data);
2159 nvswitch_clear_flags(&unhandled, bit);
2160 }
2161
2162 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _EXTAREMAPTAB_ADDRTYPEERR, 1);
2163 if (nvswitch_test_flags(pending, bit))
2164 {
2165 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_ADDRTYPEERR, "ingress ExtA address type");
2166 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_ADDRTYPEERR, data);
2167 nvswitch_clear_flags(&unhandled, bit);
2168 }
2169
2170 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _EXTBREMAPTAB_ADDRTYPEERR, 1);
2171 if (nvswitch_test_flags(pending, bit))
2172 {
2173 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_ADDRTYPEERR, "ingress ExtB address type");
2174 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_ADDRTYPEERR, data);
2175 nvswitch_clear_flags(&unhandled, bit);
2176 }
2177
2178 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _MCREMAPTAB_ADDRTYPEERR, 1);
2179 if (nvswitch_test_flags(pending, bit))
2180 {
2181 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_ADDRTYPEERR, "ingress MC address type");
2182 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_ADDRTYPEERR, data);
2183 nvswitch_clear_flags(&unhandled, bit);
2184 }
2185
2186 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
2187
2188 // Disable interrupts that have occurred after fatal error.
2189 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
2190 if (device->link[link].fatal_error_occurred)
2191 {
2192 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_NON_FATAL_REPORT_EN_1,
2193 report.raw_enable & ~pending);
2194 }
2195
2196 if (report.raw_first & report.mask)
2197 {
2198 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_FIRST_1,
2199 report.raw_first & report.mask);
2200 }
2201
2202 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_STATUS_0, pending_0);
2203 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_STATUS_1, pending_1);
2204
2205 if (unhandled != 0)
2206 {
2207 status = -NVL_MORE_PROCESSING_REQUIRED;
2208 }
2209
2210 return status;
2211 }
2212
2213 //
2214 // Tstate
2215 //
2216
2217 static NvlStatus
_nvswitch_service_tstate_nonfatal_ls10(nvswitch_device * device,NvU32 link)2218 _nvswitch_service_tstate_nonfatal_ls10
2219 (
2220 nvswitch_device *device,
2221 NvU32 link
2222 )
2223 {
2224 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
2225 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
2226 NvU32 pending, bit, unhandled;
2227 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }};
2228 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0};
2229
2230 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0);
2231 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_NON_FATAL_REPORT_EN_0);
2232 report.mask = report.raw_enable & chip_device->intr_mask.tstate.nonfatal;
2233 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_MISC_LOG_0);
2234 pending = report.raw_pending & report.mask;
2235
2236 if (pending == 0)
2237 {
2238 return -NVL_NOT_FOUND;
2239 }
2240
2241 unhandled = pending;
2242 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_FIRST_0);
2243
2244 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1);
2245 if (nvswitch_test_flags(pending, bit))
2246 {
2247 // Ignore LIMIT error if DBE is pending
2248 if(!(nvswitch_test_flags(report.raw_pending,
2249 DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1))))
2250 {
2251 NvBool bAddressValid = NV_FALSE;
2252 NvU32 address = 0;
2253 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE,
2254 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID);
2255
2256 if (FLD_TEST_DRF(_TSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
2257 addressValid))
2258 {
2259 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE,
2260 _ERR_TAGPOOL_ECC_ERROR_ADDRESS);
2261 bAddressValid = NV_TRUE;
2262 }
2263
2264 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER);
2265 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER,
2266 DRF_DEF(_TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT));
2267 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_TSTATE_TAGPOOL_ECC_LIMIT_ERR, "TS tag store single-bit threshold");
2268 _nvswitch_collect_error_info_ls10(device, link,
2269 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME |
2270 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC |
2271 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR,
2272 &data);
2273 NVSWITCH_REPORT_DATA(_HW_NPORT_TSTATE_TAGPOOL_ECC_LIMIT_ERR, data);
2274
2275 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2276 NVSWITCH_ERR_HW_NPORT_TSTATE_TAGPOOL_ECC_LIMIT_ERR, link,
2277 bAddressValid, address, NV_FALSE, 1);
2278
2279 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2280 }
2281
2282 nvswitch_clear_flags(&unhandled, bit);
2283 }
2284
2285 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1);
2286 if (nvswitch_test_flags(pending, bit))
2287 {
2288 // Ignore LIMIT error if DBE is pending
2289 if(!(nvswitch_test_flags(report.raw_pending,
2290 DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1))))
2291 {
2292 NvBool bAddressValid = NV_FALSE;
2293 NvU32 address = 0;
2294 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE,
2295 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID);
2296
2297 if (FLD_TEST_DRF(_TSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
2298 addressValid))
2299 {
2300 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE,
2301 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS);
2302 bAddressValid = NV_TRUE;
2303 }
2304
2305 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER);
2306 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER,
2307 DRF_DEF(_TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT));
2308 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_TSTATE_CRUMBSTORE_ECC_LIMIT_ERR, "TS crumbstore single-bit threshold");
2309 _nvswitch_collect_error_info_ls10(device, link,
2310 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME |
2311 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC |
2312 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR,
2313 &data);
2314 NVSWITCH_REPORT_DATA(_HW_NPORT_TSTATE_CRUMBSTORE_ECC_LIMIT_ERR, data);
2315
2316 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2317 NVSWITCH_ERR_HW_NPORT_TSTATE_CRUMBSTORE_ECC_LIMIT_ERR, link,
2318 bAddressValid, address, NV_FALSE, 1);
2319
2320 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2321 }
2322
2323 nvswitch_clear_flags(&unhandled, bit);
2324 }
2325
2326 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
2327
2328 // Disable interrupts that have occurred after fatal error.
2329 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
2330 if (device->link[link].fatal_error_occurred)
2331 {
2332 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_NON_FATAL_REPORT_EN_0,
2333 report.raw_enable & ~pending);
2334 }
2335
2336 if (report.raw_first & report.mask)
2337 {
2338 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_FIRST_0,
2339 report.raw_first & report.mask);
2340 }
2341
2342 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0, pending);
2343
2344 if (unhandled != 0)
2345 {
2346 return -NVL_MORE_PROCESSING_REQUIRED;
2347 }
2348
2349 return NVL_SUCCESS;
2350 }
2351
2352 static NvlStatus
_nvswitch_service_tstate_fatal_ls10(nvswitch_device * device,NvU32 link)2353 _nvswitch_service_tstate_fatal_ls10
2354 (
2355 nvswitch_device *device,
2356 NvU32 link
2357 )
2358 {
2359 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
2360 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
2361 NvU32 pending, bit, contain, unhandled;
2362 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }};
2363 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0};
2364
2365 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0);
2366 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_FATAL_REPORT_EN_0);
2367 report.mask = report.raw_enable & chip_device->intr_mask.tstate.fatal;
2368 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_MISC_LOG_0);
2369 pending = report.raw_pending & report.mask;
2370
2371 if (pending == 0)
2372 {
2373 return -NVL_NOT_FOUND;
2374 }
2375
2376 unhandled = pending;
2377 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_FIRST_0);
2378 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_CONTAIN_EN_0);
2379
2380 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOLBUFERR, 1);
2381 if (nvswitch_test_flags(pending, bit))
2382 {
2383 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_TAGPOOLBUFERR, "TS pointer crossover", NV_FALSE);
2384 _nvswitch_collect_error_info_ls10(device, link,
2385 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME |
2386 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC |
2387 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR,
2388 &data);
2389 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_TSTATE_TAGPOOLBUFERR, data);
2390 nvswitch_clear_flags(&unhandled, bit);
2391 }
2392
2393 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1);
2394 if (nvswitch_test_flags(pending, bit))
2395 {
2396 NvBool bAddressValid = NV_FALSE;
2397 NvU32 address = 0;
2398 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE,
2399 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID);
2400
2401 if (FLD_TEST_DRF(_TSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
2402 addressValid))
2403 {
2404 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE,
2405 _ERR_TAGPOOL_ECC_ERROR_ADDRESS);
2406 bAddressValid = NV_TRUE;
2407 }
2408
2409 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER);
2410 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER,
2411 DRF_DEF(_TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT));
2412 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_TAGPOOL_ECC_DBE_ERR, "TS tag store fatal ECC", NV_FALSE);
2413 _nvswitch_collect_error_info_ls10(device, link,
2414 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME |
2415 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC |
2416 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR,
2417 &data);
2418 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_TSTATE_TAGPOOL_ECC_DBE_ERR, data);
2419 nvswitch_clear_flags(&unhandled, bit);
2420
2421 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2422 NVSWITCH_ERR_HW_NPORT_TSTATE_TAGPOOL_ECC_DBE_ERR, link, bAddressValid,
2423 address, NV_TRUE, 1);
2424
2425 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2426
2427 // Clear associated LIMIT_ERR interrupt
2428 if (report.raw_pending & DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1))
2429 {
2430 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0,
2431 DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1));
2432 }
2433 }
2434
2435 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTOREBUFERR, 1);
2436 if (nvswitch_test_flags(pending, bit))
2437 {
2438 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_CRUMBSTOREBUFERR, "TS crumbstore", NV_FALSE);
2439 _nvswitch_collect_error_info_ls10(device, link,
2440 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME |
2441 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC |
2442 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR,
2443 &data);
2444 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_TSTATE_CRUMBSTOREBUFERR, data);
2445 nvswitch_clear_flags(&unhandled, bit);
2446 }
2447
2448 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1);
2449 if (nvswitch_test_flags(pending, bit))
2450 {
2451 NvBool bAddressValid = NV_FALSE;
2452 NvU32 address = 0;
2453 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE,
2454 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID);
2455
2456 if (FLD_TEST_DRF(_TSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
2457 addressValid))
2458 {
2459 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE,
2460 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS);
2461 bAddressValid = NV_TRUE;
2462 }
2463
2464 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER);
2465 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER,
2466 DRF_DEF(_TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT));
2467 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_CRUMBSTORE_ECC_DBE_ERR, "TS crumbstore fatal ECC", NV_FALSE);
2468 _nvswitch_collect_error_info_ls10(device, link,
2469 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME |
2470 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC |
2471 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR,
2472 &data);
2473 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_TSTATE_CRUMBSTORE_ECC_DBE_ERR, data);
2474 nvswitch_clear_flags(&unhandled, bit);
2475
2476 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2477 NVSWITCH_ERR_HW_NPORT_TSTATE_CRUMBSTORE_ECC_DBE_ERR, link, bAddressValid,
2478 address, NV_TRUE, 1);
2479
2480 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2481
2482 // Clear associated LIMIT_ERR interrupt
2483 if (report.raw_pending & DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1))
2484 {
2485 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0,
2486 DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1));
2487 }
2488 }
2489
2490 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _ATO_ERR, 1);
2491 if (nvswitch_test_flags(pending, bit))
2492 {
2493 if (FLD_TEST_DRF_NUM(_TSTATE, _ERR_FIRST_0, _ATO_ERR, 1, report.raw_first))
2494 {
2495 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_DEBUG);
2496 }
2497 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_ATO_ERR, "TS ATO timeout", NV_FALSE);
2498 nvswitch_clear_flags(&unhandled, bit);
2499 }
2500
2501 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _CAMRSP_ERR, 1);
2502 if (nvswitch_test_flags(pending, bit))
2503 {
2504 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_CAMRSP_ERR, "Rsp Tag value out of range", NV_FALSE);
2505 _nvswitch_collect_error_info_ls10(device, link,
2506 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME |
2507 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC |
2508 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR,
2509 &data);
2510 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_TSTATE_CAMRSP_ERR, data);
2511 nvswitch_clear_flags(&unhandled, bit);
2512 }
2513
2514 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
2515
2516 // Disable interrupts that have occurred after fatal error.
2517 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
2518 if (device->link[link].fatal_error_occurred)
2519 {
2520 if (nvswitch_is_soe_supported(device))
2521 {
2522 nvswitch_soe_disable_nport_fatal_interrupts_ls10(device, link,
2523 report.raw_enable & ~pending, RM_SOE_CORE_NPORT_TSTATE_INTERRUPT);
2524 }
2525 else
2526 {
2527 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_FATAL_REPORT_EN_0,
2528 report.raw_enable & ~pending);
2529 }
2530 }
2531
2532 if (report.raw_first & report.mask)
2533 {
2534 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_FIRST_0,
2535 report.raw_first & report.mask);
2536 }
2537
2538 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0, pending);
2539
2540 if (unhandled != 0)
2541 {
2542 return -NVL_MORE_PROCESSING_REQUIRED;
2543 }
2544
2545 return NVL_SUCCESS;
2546 }
2547
2548 //
2549 // Egress
2550 //
2551
2552 static NvlStatus
_nvswitch_service_egress_nonfatal_ls10(nvswitch_device * device,NvU32 link)2553 _nvswitch_service_egress_nonfatal_ls10
2554 (
2555 nvswitch_device *device,
2556 NvU32 link
2557 )
2558 {
2559 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
2560 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
2561 NvU32 pending, bit, unhandled;
2562 NvU32 pending_0, pending_1;
2563 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }};
2564 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0};
2565 NvlStatus status = NVL_SUCCESS;
2566
2567 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0);
2568 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_NON_FATAL_REPORT_EN_0);
2569 report.mask = report.raw_enable & chip_device->intr_mask.egress[0].nonfatal;
2570 pending = report.raw_pending & report.mask;
2571 pending_0 = pending;
2572
2573 if (pending == 0)
2574 {
2575 goto _nvswitch_service_egress_nonfatal_ls10_err_status_1;
2576 }
2577
2578 unhandled = pending;
2579 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FIRST_0);
2580 _nvswitch_collect_error_info_ls10(device, link,
2581 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME |
2582 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC |
2583 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR,
2584 &data);
2585
2586 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_ECC_LIMIT_ERR, 1);
2587 if (nvswitch_test_flags(pending, bit))
2588 {
2589 // Ignore LIMIT error if DBE is pending
2590 if (!(nvswitch_test_flags(report.raw_pending,
2591 DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_ECC_DBE_ERR, 1))))
2592 {
2593 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_NXBAR_ECC_ERROR_COUNTER);
2594 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_NXBAR_HDR_ECC_LIMIT_ERR, "egress input ECC error limit");
2595 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_NXBAR_HDR_ECC_LIMIT_ERR, data);
2596
2597 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2598 NVSWITCH_ERR_HW_NPORT_EGRESS_NXBAR_HDR_ECC_LIMIT_ERR, link, NV_FALSE, 0,
2599 NV_FALSE, 1);
2600
2601 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2602 }
2603
2604 nvswitch_clear_flags(&unhandled, bit);
2605 }
2606
2607 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _RAM_OUT_HDR_ECC_LIMIT_ERR, 1);
2608 if (nvswitch_test_flags(pending, bit))
2609 {
2610 // Ignore LIMIT error if DBE is pending
2611 if(!(nvswitch_test_flags(report.raw_pending,
2612 DRF_NUM(_EGRESS, _ERR_STATUS_0, _RAM_OUT_HDR_ECC_DBE_ERR, 1))))
2613 {
2614 NvBool bAddressValid = NV_FALSE;
2615 NvU32 address = 0;
2616 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS,
2617 _ERR_RAM_OUT_ECC_ERROR_ADDRESS_VALID);
2618
2619 if (FLD_TEST_DRF(_EGRESS_ERR_RAM_OUT, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
2620 addressValid))
2621 {
2622 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS,
2623 _ERR_RAM_OUT_ECC_ERROR_ADDRESS);
2624 bAddressValid = NV_TRUE;
2625 }
2626
2627 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_RAM_OUT_ECC_ERROR_COUNTER);
2628 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_RAM_OUT_ECC_ERROR_ADDRESS);
2629 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_LIMIT_ERR, "egress output ECC error limit");
2630 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_LIMIT_ERR, data);
2631
2632 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2633 NVSWITCH_ERR_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_LIMIT_ERR, link, bAddressValid, address,
2634 NV_FALSE, 1);
2635
2636 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2637 }
2638
2639 nvswitch_clear_flags(&unhandled, bit);
2640 }
2641
2642 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _PRIVRSPERR, 1);
2643 if (nvswitch_test_flags(pending, bit))
2644 {
2645 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_PRIVRSPERR, "egress non-posted PRIV error");
2646 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_PRIVRSPERR, data);
2647 nvswitch_clear_flags(&unhandled, bit);
2648 }
2649
2650 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
2651
2652 // Disable interrupts that have occurred after fatal error.
2653 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
2654 if (device->link[link].fatal_error_occurred)
2655 {
2656 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_NON_FATAL_REPORT_EN_0,
2657 report.raw_enable & ~pending);
2658 }
2659
2660 if (report.raw_first & report.mask)
2661 {
2662 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FIRST_0,
2663 report.raw_first & report.mask);
2664 }
2665
2666 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0, pending);
2667
2668 // HACK: Clear all pending interrupts!
2669 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0, 0xFFFFFFFF);
2670
2671 if (unhandled != 0)
2672 {
2673 status = -NVL_MORE_PROCESSING_REQUIRED;
2674 }
2675
2676 _nvswitch_service_egress_nonfatal_ls10_err_status_1:
2677 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1);
2678 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_NON_FATAL_REPORT_EN_1);
2679 report.mask = report.raw_enable & chip_device->intr_mask.egress[1].nonfatal;
2680 pending = report.raw_pending & report.mask;
2681 pending_1 = pending;
2682
2683 if ((pending_0 == 0) && (pending_1 == 0))
2684 {
2685 return -NVL_NOT_FOUND;
2686 }
2687
2688 unhandled = pending;
2689 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FIRST_1);
2690
2691 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, 1);
2692 if (nvswitch_test_flags(pending, bit))
2693 {
2694 // Ignore LIMIT error if DBE is pending
2695 if (!(nvswitch_test_flags(report.raw_pending,
2696 DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_ECC_DBE_ERR, 1))))
2697 {
2698 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, "egress reduction header ECC error limit");
2699 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_NXBAR_HDR_ECC_LIMIT_ERR, data);
2700
2701 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2702 NVSWITCH_ERR_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, link, NV_FALSE, 0,
2703 NV_FALSE, 1);
2704
2705 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2706 }
2707
2708 nvswitch_clear_flags(&unhandled, bit);
2709 }
2710
2711 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSPCTRLSTORE_ECC_LIMIT_ERR, 1);
2712 if (nvswitch_test_flags(pending, bit))
2713 {
2714 // Ignore LIMIT error if DBE is pending
2715 if (!(nvswitch_test_flags(report.raw_pending,
2716 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSPCTRLSTORE_ECC_DBE_ERR, 1))))
2717 {
2718 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_LIMIT_ERR, "egress MC response ECC error limit");
2719 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_LIMIT_ERR, data);
2720
2721 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2722 NVSWITCH_ERR_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_LIMIT_ERR, link, NV_FALSE, 0,
2723 NV_FALSE, 1);
2724
2725 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2726 }
2727
2728 nvswitch_clear_flags(&unhandled, bit);
2729 }
2730
2731 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBCTRLSTORE_ECC_LIMIT_ERR, 1);
2732 if (nvswitch_test_flags(pending, bit))
2733 {
2734 // Ignore LIMIT error if DBE is pending
2735 if (!(nvswitch_test_flags(report.raw_pending,
2736 DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBCTRLSTORE_ECC_DBE_ERR, 1))))
2737 {
2738 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_LIMIT_ERR, "egress RB ECC error limit");
2739 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_LIMIT_ERR, data);
2740
2741 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2742 NVSWITCH_ERR_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_LIMIT_ERR, link, NV_FALSE, 0,
2743 NV_FALSE, 1);
2744
2745 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2746 }
2747
2748 nvswitch_clear_flags(&unhandled, bit);
2749 }
2750
2751 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDSGT_ECC_LIMIT_ERR, 1);
2752 if (nvswitch_test_flags(pending, bit))
2753 {
2754 // Ignore LIMIT error if DBE is pending
2755 if (!(nvswitch_test_flags(report.raw_pending,
2756 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDSGT_ECC_DBE_ERR, 1))))
2757 {
2758 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCREDSGT_ECC_LIMIT_ERR, "egress RSG ECC error limit");
2759 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCREDSGT_ECC_LIMIT_ERR, data);
2760
2761 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2762 NVSWITCH_ERR_HW_NPORT_EGRESS_MCREDSGT_ECC_LIMIT_ERR, link, NV_FALSE, 0,
2763 NV_FALSE, 1);
2764
2765 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2766 }
2767
2768 nvswitch_clear_flags(&unhandled, bit);
2769 }
2770
2771 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDBUF_ECC_LIMIT_ERR, 1);
2772 if (nvswitch_test_flags(pending, bit))
2773 {
2774 // Ignore LIMIT error if DBE is pending
2775 if (!(nvswitch_test_flags(report.raw_pending,
2776 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDBUF_ECC_DBE_ERR, 1))))
2777 {
2778 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCREDBUF_ECC_LIMIT_ERR, "egress MCRB ECC error limit");
2779 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCREDBUF_ECC_LIMIT_ERR, data);
2780
2781 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2782 NVSWITCH_ERR_HW_NPORT_EGRESS_MCREDBUF_ECC_LIMIT_ERR, link, NV_FALSE, 0,
2783 NV_FALSE, 1);
2784
2785 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2786 }
2787
2788 nvswitch_clear_flags(&unhandled, bit);
2789 }
2790
2791 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_RAM_HDR_ECC_LIMIT_ERR, 1);
2792 if (nvswitch_test_flags(pending, bit))
2793 {
2794 // Ignore LIMIT error if DBE is pending
2795 if (!(nvswitch_test_flags(report.raw_pending,
2796 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_RAM_HDR_ECC_DBE_ERR, 1))))
2797 {
2798 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_LIMIT_ERR, "egress MC header ECC error limit");
2799 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_LIMIT_ERR, data);
2800
2801 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2802 NVSWITCH_ERR_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_LIMIT_ERR, link, NV_FALSE, 0,
2803 NV_FALSE, 1);
2804
2805 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2806 }
2807
2808 nvswitch_clear_flags(&unhandled, bit);
2809 }
2810
2811 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_ECC_DBE_ERR, 1);
2812 if (nvswitch_test_flags(pending, bit))
2813 {
2814 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_ECC_DBE_ERR, "egress reduction header ECC DBE error");
2815 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_ECC_DBE_ERR, data);
2816 nvswitch_clear_flags(&unhandled, bit);
2817
2818 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2819 NVSWITCH_ERR_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_ECC_DBE_ERR, link, NV_FALSE, 0,
2820 NV_TRUE, 1);
2821
2822 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2823
2824 // Clear associated LIMIT_ERR interrupt
2825 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, 1))
2826 {
2827 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1,
2828 DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, 1));
2829 }
2830 }
2831
2832 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_PARITY_ERR, 1);
2833 if (nvswitch_test_flags(pending, bit))
2834 {
2835 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_PARITY_ERR, "egress reduction header parity error");
2836 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_PARITY_ERR, data);
2837 nvswitch_clear_flags(&unhandled, bit);
2838 }
2839
2840 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_FLITTYPE_MISMATCH_ERR, 1);
2841 if (nvswitch_test_flags(pending, bit))
2842 {
2843 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_NXBAR_REDUCTION_FLITTYPE_MISMATCH_ERR, "egress reduction flit mismatch error");
2844 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_NXBAR_REDUCTION_FLITTYPE_MISMATCH_ERR, data);
2845 nvswitch_clear_flags(&unhandled, bit);
2846 }
2847
2848 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDBUF_ECC_DBE_ERR, 1);
2849 if (nvswitch_test_flags(pending, bit))
2850 {
2851 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCREDBUF_ECC_DBE_ERR, "egress reduction buffer ECC DBE error");
2852 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCREDBUF_ECC_DBE_ERR, data);
2853 nvswitch_clear_flags(&unhandled, bit);
2854
2855 _nvswitch_construct_ecc_error_event_ls10(&err_event,
2856 NVSWITCH_ERR_HW_NPORT_EGRESS_MCREDBUF_ECC_DBE_ERR, link, NV_FALSE, 0,
2857 NV_TRUE, 1);
2858
2859 nvswitch_inforom_ecc_log_err_event(device, &err_event);
2860
2861 // Clear associated LIMIT_ERR interrupt
2862 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDBUF_ECC_LIMIT_ERR, 1))
2863 {
2864 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1,
2865 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDBUF_ECC_LIMIT_ERR, 1));
2866 }
2867 }
2868
2869 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_CNT_ERR, 1);
2870 if (nvswitch_test_flags(pending, bit))
2871 {
2872 _nvswitch_collect_error_info_ls10(device, link,
2873 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_TIME |
2874 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_MISC |
2875 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_HDR,
2876 &data);
2877 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCRSP_CNT_ERR, "egress MC response count error");
2878 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCRSP_CNT_ERR, data);
2879 nvswitch_clear_flags(&unhandled, bit);
2880 }
2881
2882 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBRSP_CNT_ERR, 1);
2883 if (nvswitch_test_flags(pending, bit))
2884 {
2885 _nvswitch_collect_error_info_ls10(device, link,
2886 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_TIME |
2887 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_MISC |
2888 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_HDR,
2889 &data);
2890 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_RBRSP_CNT_ERR, "egress reduction response count error");
2891 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_RBRSP_CNT_ERR, data);
2892 nvswitch_clear_flags(&unhandled, bit);
2893 }
2894
2895 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
2896
2897 // Disable interrupts that have occurred after fatal error.
2898 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
2899 if (device->link[link].fatal_error_occurred)
2900 {
2901 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_NON_FATAL_REPORT_EN_1,
2902 report.raw_enable & ~pending);
2903 }
2904
2905 if (report.raw_first & report.mask)
2906 {
2907 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FIRST_1,
2908 report.raw_first & report.mask);
2909 }
2910
2911 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, pending);
2912
2913 // Clear all pending interrupts!
2914 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 0xFFFFFFFF);
2915
2916 if (unhandled != 0)
2917 {
2918 status = -NVL_MORE_PROCESSING_REQUIRED;
2919 }
2920
2921 return status;
2922 }
2923
2924 static NvlStatus
_nvswitch_service_egress_fatal_ls10(nvswitch_device * device,NvU32 link)2925 _nvswitch_service_egress_fatal_ls10
2926 (
2927 nvswitch_device *device,
2928 NvU32 link
2929 )
2930 {
2931 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
2932 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
2933 NvU32 pending, bit, contain, unhandled;
2934 NvU32 pending_0, pending_1;
2935 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }};
2936 NVSWITCH_RAW_ERROR_LOG_TYPE credit_data = {0, { 0 }};
2937 NVSWITCH_RAW_ERROR_LOG_TYPE buffer_data = {0, { 0 }};
2938 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0};
2939 NvlStatus status = NVL_SUCCESS;
2940
2941 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0);
2942 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FATAL_REPORT_EN_0);
2943 report.mask = report.raw_enable & chip_device->intr_mask.egress[0].fatal;
2944 pending = report.raw_pending & report.mask;
2945 pending_0 = pending;
2946
2947 if (pending == 0)
2948 {
2949 goto _nvswitch_service_egress_fatal_ls10_err_status_1;
2950 }
2951
2952 unhandled = pending;
2953 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FIRST_0);
2954 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_CONTAIN_EN_0);
2955 _nvswitch_collect_error_info_ls10(device, link,
2956 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME |
2957 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC |
2958 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR,
2959 &data);
2960
2961 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _EGRESSBUFERR, 1);
2962 if (nvswitch_test_flags(pending, bit))
2963 {
2964 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_EGRESSBUFERR, "egress crossbar overflow", NV_TRUE);
2965 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_EGRESSBUFERR, data);
2966
2967 buffer_data.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS0);
2968 buffer_data.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS1);
2969 buffer_data.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS2);
2970 buffer_data.data[3] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS3);
2971 buffer_data.data[4] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS4);
2972 buffer_data.data[5] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS5);
2973 buffer_data.data[6] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS6);
2974 buffer_data.data[7] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS7);
2975 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_EGRESSBUFERR, buffer_data);
2976 nvswitch_clear_flags(&unhandled, bit);
2977 }
2978
2979 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _PKTROUTEERR, 1);
2980 if (nvswitch_test_flags(pending, bit))
2981 {
2982 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_PKTROUTEERR, "egress packet route", NV_TRUE);
2983 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_PKTROUTEERR, data);
2984 nvswitch_clear_flags(&unhandled, bit);
2985 }
2986
2987 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _SEQIDERR, 1);
2988 if (nvswitch_test_flags(pending, bit))
2989 {
2990 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_SEQIDERR, "egress sequence ID error", NV_TRUE);
2991 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_SEQIDERR, data);
2992 nvswitch_clear_flags(&unhandled, bit);
2993 }
2994
2995 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_ECC_DBE_ERR, 1);
2996 if (nvswitch_test_flags(pending, bit))
2997 {
2998 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NXBAR_HDR_ECC_DBE_ERR, "egress input ECC DBE error", NV_FALSE);
2999 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NXBAR_HDR_ECC_DBE_ERR, data);
3000 nvswitch_clear_flags(&unhandled, bit);
3001
3002 _nvswitch_construct_ecc_error_event_ls10(&err_event,
3003 NVSWITCH_ERR_HW_NPORT_EGRESS_NXBAR_HDR_ECC_DBE_ERR, link, NV_FALSE, 0,
3004 NV_TRUE, 1);
3005
3006 nvswitch_inforom_ecc_log_err_event(device, &err_event);
3007
3008 // Clear associated LIMIT_ERR interrupt
3009 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_ECC_LIMIT_ERR, 1))
3010 {
3011 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0,
3012 DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_ECC_LIMIT_ERR, 1));
3013 }
3014 }
3015
3016 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _RAM_OUT_HDR_ECC_DBE_ERR, 1);
3017 if (nvswitch_test_flags(pending, bit))
3018 {
3019 NvBool bAddressValid = NV_FALSE;
3020 NvU32 address = 0;
3021 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS,
3022 _ERR_RAM_OUT_ECC_ERROR_ADDRESS_VALID);
3023
3024 if (FLD_TEST_DRF(_EGRESS_ERR_RAM_OUT, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
3025 addressValid))
3026 {
3027 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS,
3028 _ERR_RAM_OUT_ECC_ERROR_ADDRESS);
3029 bAddressValid = NV_TRUE;
3030 }
3031
3032 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_DBE_ERR, "egress output ECC DBE error", NV_FALSE);
3033 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_DBE_ERR, data);
3034 nvswitch_clear_flags(&unhandled, bit);
3035
3036 _nvswitch_construct_ecc_error_event_ls10(&err_event,
3037 NVSWITCH_ERR_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_DBE_ERR, link, bAddressValid,
3038 address, NV_TRUE, 1);
3039
3040 nvswitch_inforom_ecc_log_err_event(device, &err_event);
3041
3042 // Clear associated LIMIT_ERR interrupt
3043 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_0, _RAM_OUT_HDR_ECC_LIMIT_ERR, 1))
3044 {
3045 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0,
3046 DRF_NUM(_EGRESS, _ERR_STATUS_0, _RAM_OUT_HDR_ECC_LIMIT_ERR, 1));
3047 }
3048 }
3049
3050 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NCISOCCREDITOVFL, 1);
3051 if (nvswitch_test_flags(pending, bit))
3052 {
3053 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NCISOCCREDITOVFL, "egress credit overflow", NV_FALSE);
3054 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NCISOCCREDITOVFL, data);
3055
3056 credit_data.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT0);
3057 credit_data.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT1);
3058 credit_data.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT2);
3059 credit_data.data[3] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT3);
3060 credit_data.data[4] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT4);
3061 credit_data.data[5] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT5);
3062 credit_data.data[6] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT6);
3063 credit_data.data[7] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT7);
3064 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NCISOCCREDITOVFL, credit_data);
3065 nvswitch_clear_flags(&unhandled, bit);
3066 }
3067
3068 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _REQTGTIDMISMATCHERR, 1);
3069 if (nvswitch_test_flags(pending, bit))
3070 {
3071 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_REQTGTIDMISMATCHERR, "egress destination request ID error", NV_FALSE);
3072 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_REQTGTIDMISMATCHERR, data);
3073 nvswitch_clear_flags(&unhandled, bit);
3074 }
3075
3076 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _RSPREQIDMISMATCHERR, 1);
3077 if (nvswitch_test_flags(pending, bit))
3078 {
3079 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_RSPREQIDMISMATCHERR, "egress destination response ID error", NV_FALSE);
3080 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_RSPREQIDMISMATCHERR, data);
3081 nvswitch_clear_flags(&unhandled, bit);
3082 }
3083
3084 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _URRSPERR, 1);
3085 if (nvswitch_test_flags(pending, bit))
3086 {
3087 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_DROPNPURRSPERR, "egress non-posted UR error", NV_FALSE);
3088 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_DROPNPURRSPERR, data);
3089 nvswitch_clear_flags(&unhandled, bit);
3090 }
3091
3092 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _HWRSPERR, 1);
3093 if (nvswitch_test_flags(pending, bit))
3094 {
3095 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_HWRSPERR, "egress non-posted HW error", NV_FALSE);
3096 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_HWRSPERR, data);
3097 nvswitch_clear_flags(&unhandled, bit);
3098 }
3099
3100 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_PARITY_ERR, 1);
3101 if (nvswitch_test_flags(pending, bit))
3102 {
3103 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NXBAR_HDR_PARITY_ERR, "egress control parity error", NV_FALSE);
3104 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NXBAR_HDR_PARITY_ERR, data);
3105 nvswitch_clear_flags(&unhandled, bit);
3106
3107 _nvswitch_construct_ecc_error_event_ls10(&err_event,
3108 NVSWITCH_ERR_HW_NPORT_EGRESS_NXBAR_HDR_PARITY_ERR, link, NV_FALSE, 0,
3109 NV_TRUE, 1);
3110
3111 nvswitch_inforom_ecc_log_err_event(device, &err_event);
3112 }
3113
3114 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NCISOC_CREDIT_PARITY_ERR, 1);
3115 if (nvswitch_test_flags(pending, bit))
3116 {
3117 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NCISOC_CREDIT_PARITY_ERR, "egress credit parity error", NV_FALSE);
3118 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NCISOC_CREDIT_PARITY_ERR, data);
3119
3120 credit_data.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT0);
3121 credit_data.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT1);
3122 credit_data.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT2);
3123 credit_data.data[3] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT3);
3124 credit_data.data[4] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT4);
3125 credit_data.data[5] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT5);
3126 credit_data.data[6] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT6);
3127 credit_data.data[7] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT7);
3128 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NCISOC_CREDIT_PARITY_ERR, credit_data);
3129 nvswitch_clear_flags(&unhandled, bit);
3130
3131 _nvswitch_construct_ecc_error_event_ls10(&err_event,
3132 NVSWITCH_ERR_HW_NPORT_EGRESS_NCISOC_CREDIT_PARITY_ERR, link, NV_FALSE, 0,
3133 NV_TRUE, 1);
3134
3135 nvswitch_inforom_ecc_log_err_event(device, &err_event);
3136 }
3137
3138 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_FLITTYPE_MISMATCH_ERR, 1);
3139 if (nvswitch_test_flags(pending, bit))
3140 {
3141 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NXBAR_FLITTYPE_MISMATCH_ERR, "egress flit type mismatch", NV_FALSE);
3142 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NXBAR_FLITTYPE_MISMATCH_ERR, data);
3143 nvswitch_clear_flags(&unhandled, bit);
3144 }
3145
3146 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _CREDIT_TIME_OUT_ERR, 1);
3147 if (nvswitch_test_flags(pending, bit))
3148 {
3149 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_CREDIT_TIME_OUT_ERR, "egress credit timeout", NV_FALSE);
3150 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_CREDIT_TIME_OUT_ERR, data);
3151 nvswitch_clear_flags(&unhandled, bit);
3152 }
3153
3154 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_SIDEBAND_PD_PARITY_ERR, 1);
3155 if (nvswitch_test_flags(pending, bit))
3156 {
3157 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NXBAR_SIDEBAND_PD_PARITY_ERR, "egress crossbar SB parity", NV_FALSE);
3158 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NXBAR_SIDEBAND_PD_PARITY_ERR, data);
3159 nvswitch_clear_flags(&unhandled, bit);
3160 }
3161
3162 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _INVALIDVCSET_ERR, 1);
3163 if (nvswitch_test_flags(pending, bit))
3164 {
3165 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_INVALIDVCSET_ERR, "egress invalid VC set", NV_FALSE);
3166 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_INVALIDVCSET_ERR, data);
3167 nvswitch_clear_flags(&unhandled, bit);
3168 }
3169
3170 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
3171
3172 // Disable interrupts that have occurred after fatal error.
3173 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
3174 if (device->link[link].fatal_error_occurred)
3175 {
3176 if (nvswitch_is_soe_supported(device))
3177 {
3178 nvswitch_soe_disable_nport_fatal_interrupts_ls10(device, link,
3179 report.raw_enable & ~pending, RM_SOE_CORE_NPORT_EGRESS_0_INTERRUPT);
3180 }
3181 else
3182 {
3183 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FATAL_REPORT_EN_0,
3184 report.raw_enable & ~pending);
3185 }
3186 }
3187
3188 if (report.raw_first & report.mask)
3189 {
3190 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FIRST_0,
3191 report.raw_first & report.mask);
3192 }
3193
3194 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0, pending);
3195
3196 if (unhandled != 0)
3197 {
3198 status = -NVL_MORE_PROCESSING_REQUIRED;
3199 }
3200
3201 _nvswitch_service_egress_fatal_ls10_err_status_1:
3202 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1);
3203 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FATAL_REPORT_EN_1);
3204 report.mask = report.raw_enable & chip_device->intr_mask.egress[1].fatal;
3205 pending = report.raw_pending & report.mask;
3206 pending_1 = pending;
3207
3208 if ((pending_0 == 0) && (pending_1 == 0))
3209 {
3210 return -NVL_NOT_FOUND;
3211 }
3212
3213 unhandled = pending;
3214 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FIRST_1);
3215 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_CONTAIN_EN_1);
3216
3217 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSPCTRLSTORE_ECC_DBE_ERR, 1);
3218 if (nvswitch_test_flags(pending, bit))
3219 {
3220 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_DBE_ERR, "egress MC response ECC DBE error", NV_FALSE);
3221 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_DBE_ERR, data);
3222 nvswitch_clear_flags(&unhandled, bit);
3223
3224 _nvswitch_construct_ecc_error_event_ls10(&err_event,
3225 NVSWITCH_ERR_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_DBE_ERR, link, NV_FALSE, 0,
3226 NV_TRUE, 1);
3227
3228 nvswitch_inforom_ecc_log_err_event(device, &err_event);
3229
3230 // Clear associated LIMIT_ERR interrupt
3231 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSPCTRLSTORE_ECC_LIMIT_ERR, 1))
3232 {
3233 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1,
3234 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSPCTRLSTORE_ECC_LIMIT_ERR, 1));
3235 }
3236 }
3237
3238 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBCTRLSTORE_ECC_DBE_ERR, 1);
3239 if (nvswitch_test_flags(pending, bit))
3240 {
3241 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_DBE_ERR, "egress reduction ECC DBE error", NV_FALSE);
3242 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_DBE_ERR, data);
3243 nvswitch_clear_flags(&unhandled, bit);
3244
3245 _nvswitch_construct_ecc_error_event_ls10(&err_event,
3246 NVSWITCH_ERR_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_DBE_ERR, link, NV_FALSE, 0,
3247 NV_TRUE, 1);
3248
3249 nvswitch_inforom_ecc_log_err_event(device, &err_event);
3250
3251 // Clear associated LIMIT_ERR interrupt
3252 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBCTRLSTORE_ECC_LIMIT_ERR, 1))
3253 {
3254 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1,
3255 DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBCTRLSTORE_ECC_LIMIT_ERR, 1));
3256 }
3257 }
3258
3259 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDSGT_ECC_DBE_ERR, 1);
3260 if (nvswitch_test_flags(pending, bit))
3261 {
3262 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_MCREDSGT_ECC_DBE_ERR, "egress MC SG ECC DBE error", NV_FALSE);
3263 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_MCREDSGT_ECC_DBE_ERR, data);
3264 nvswitch_clear_flags(&unhandled, bit);
3265
3266 _nvswitch_construct_ecc_error_event_ls10(&err_event,
3267 NVSWITCH_ERR_HW_NPORT_EGRESS_MCREDSGT_ECC_DBE_ERR, link, NV_FALSE, 0,
3268 NV_TRUE, 1);
3269
3270 nvswitch_inforom_ecc_log_err_event(device, &err_event);
3271
3272 // Clear associated LIMIT_ERR interrupt
3273 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDSGT_ECC_LIMIT_ERR, 1))
3274 {
3275 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1,
3276 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDSGT_ECC_LIMIT_ERR, 1));
3277 }
3278 }
3279
3280 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_RAM_HDR_ECC_DBE_ERR, 1);
3281 if (nvswitch_test_flags(pending, bit))
3282 {
3283 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_DBE_ERR, "egress MC ram ECC DBE error", NV_FALSE);
3284 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_DBE_ERR, data);
3285 nvswitch_clear_flags(&unhandled, bit);
3286
3287 _nvswitch_construct_ecc_error_event_ls10(&err_event,
3288 NVSWITCH_ERR_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_DBE_ERR, link, NV_FALSE, 0,
3289 NV_TRUE, 1);
3290
3291 nvswitch_inforom_ecc_log_err_event(device, &err_event);
3292
3293 // Clear associated LIMIT_ERR interrupt
3294 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_RAM_HDR_ECC_LIMIT_ERR, 1))
3295 {
3296 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1,
3297 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_RAM_HDR_ECC_LIMIT_ERR, 1));
3298 }
3299 }
3300
3301 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
3302
3303 // Disable interrupts that have occurred after fatal error.
3304 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
3305 if (device->link[link].fatal_error_occurred)
3306 {
3307 if (nvswitch_is_soe_supported(device))
3308 {
3309 nvswitch_soe_disable_nport_fatal_interrupts_ls10(device, link,
3310 report.raw_enable & ~pending, RM_SOE_CORE_NPORT_EGRESS_1_INTERRUPT);
3311 }
3312 else
3313 {
3314 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FATAL_REPORT_EN_1,
3315 report.raw_enable & ~pending);
3316 }
3317 }
3318
3319 if (report.raw_first & report.mask)
3320 {
3321 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FIRST_1,
3322 report.raw_first & report.mask);
3323 }
3324
3325 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, pending);
3326
3327 // Clear all pending interrupts!
3328 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 0xFFFFFFFF);
3329
3330 if (unhandled != 0)
3331 {
3332 status = -NVL_MORE_PROCESSING_REQUIRED;
3333 }
3334
3335 return status;
3336 }
3337
3338 static NvlStatus
_nvswitch_service_sourcetrack_nonfatal_ls10(nvswitch_device * device,NvU32 link)3339 _nvswitch_service_sourcetrack_nonfatal_ls10
3340 (
3341 nvswitch_device *device,
3342 NvU32 link
3343 )
3344 {
3345 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
3346 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
3347 NvU32 pending, bit, unhandled;
3348 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0};
3349
3350 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link,
3351 _SOURCETRACK, _ERR_STATUS_0);
3352 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link,
3353 _SOURCETRACK, _ERR_NON_FATAL_REPORT_EN_0);
3354 report.mask = report.raw_enable & chip_device->intr_mask.sourcetrack.nonfatal;
3355
3356 pending = report.raw_pending & report.mask;
3357
3358 if (pending == 0)
3359 {
3360 return -NVL_NOT_FOUND;
3361 }
3362
3363 unhandled = pending;
3364 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, _ERR_FIRST_0);
3365
3366 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, 1);
3367 if (nvswitch_test_flags(pending, bit))
3368 {
3369 // Ignore LIMIT error if DBE is pending
3370 if (!(nvswitch_test_flags(report.raw_pending,
3371 DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _CREQ_TCEN0_CRUMBSTORE_ECC_DBE_ERR, 1))))
3372 {
3373 NvBool bAddressValid = NV_FALSE;
3374 NvU32 address = 0;
3375 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK,
3376 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID);
3377
3378 if (FLD_TEST_DRF(_SOURCETRACK_ERR_CREQ_TCEN0_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID,
3379 _VALID, _VALID, addressValid))
3380 {
3381 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK,
3382 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS);
3383 bAddressValid = NV_TRUE;
3384 }
3385
3386 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK,
3387 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_COUNTER);
3388 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK,
3389 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS);
3390 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK,
3391 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID);
3392 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_SOURCETRACK_CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR,
3393 "sourcetrack TCEN0 crumbstore ECC limit err");
3394
3395 _nvswitch_construct_ecc_error_event_ls10(&err_event,
3396 NVSWITCH_ERR_HW_NPORT_SOURCETRACK_CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, link,
3397 bAddressValid, address, NV_FALSE, 1);
3398
3399 nvswitch_inforom_ecc_log_err_event(device, &err_event);
3400 }
3401
3402 nvswitch_clear_flags(&unhandled, bit);
3403 }
3404
3405 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
3406
3407 //
3408 // Disable interrupts that have occurred after fatal error.
3409 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
3410 //
3411 if (device->link[link].fatal_error_occurred)
3412 {
3413 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_NON_FATAL_REPORT_EN_0,
3414 report.raw_enable & ~pending);
3415 }
3416
3417 if (report.raw_first & report.mask)
3418 {
3419 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_FIRST_0,
3420 report.raw_first & report.mask);
3421 }
3422
3423 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_STATUS_0, pending);
3424
3425 if (unhandled != 0)
3426 {
3427 return -NVL_MORE_PROCESSING_REQUIRED;
3428 }
3429
3430 return NVL_SUCCESS;
3431 }
3432
3433 static NvlStatus
_nvswitch_service_sourcetrack_fatal_ls10(nvswitch_device * device,NvU32 link)3434 _nvswitch_service_sourcetrack_fatal_ls10
3435 (
3436 nvswitch_device *device,
3437 NvU32 link
3438 )
3439 {
3440 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
3441 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
3442 NvU32 pending, bit, contain, unhandled;
3443 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0};
3444
3445 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link,
3446 _SOURCETRACK, _ERR_STATUS_0);
3447 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link,
3448 _SOURCETRACK, _ERR_FATAL_REPORT_EN_0);
3449 report.mask = report.raw_enable & chip_device->intr_mask.sourcetrack.fatal;
3450 pending = report.raw_pending & report.mask;
3451
3452 if (pending == 0)
3453 {
3454 return -NVL_NOT_FOUND;
3455 }
3456
3457 unhandled = pending;
3458 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, _ERR_FIRST_0);
3459 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, _ERR_CONTAIN_EN_0);
3460
3461 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _CREQ_TCEN0_CRUMBSTORE_ECC_DBE_ERR, 1);
3462 if (nvswitch_test_flags(pending, bit))
3463 {
3464 NvBool bAddressValid = NV_FALSE;
3465 NvU32 address = 0;
3466 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK,
3467 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID);
3468
3469 if (FLD_TEST_DRF(_SOURCETRACK_ERR_CREQ_TCEN0_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID,
3470 _VALID, _VALID, addressValid))
3471 {
3472 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK,
3473 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS);
3474 bAddressValid = NV_TRUE;
3475 }
3476
3477 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK,
3478 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS);
3479 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK,
3480 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID);
3481 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_SOURCETRACK_CREQ_TCEN0_CRUMBSTORE_ECC_DBE_ERR,
3482 "sourcetrack TCEN0 crumbstore DBE", NV_FALSE);
3483 nvswitch_clear_flags(&unhandled, bit);
3484
3485 _nvswitch_construct_ecc_error_event_ls10(&err_event,
3486 NVSWITCH_ERR_HW_NPORT_SOURCETRACK_CREQ_TCEN0_CRUMBSTORE_ECC_DBE_ERR,
3487 link, bAddressValid, address, NV_TRUE, 1);
3488
3489 nvswitch_inforom_ecc_log_err_event(device, &err_event);
3490
3491 // Clear associated LIMIT_ERR interrupt
3492 if (report.raw_pending & DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, 1))
3493 {
3494 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_STATUS_0,
3495 DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, 1));
3496 }
3497 }
3498
3499 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _DUP_CREQ_TCEN0_TAG_ERR, 1);
3500 if (nvswitch_test_flags(pending, bit))
3501 {
3502 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_SOURCETRACK_DUP_CREQ_TCEN0_TAG_ERR,
3503 "sourcetrack duplicate CREQ", NV_FALSE);
3504 nvswitch_clear_flags(&unhandled, bit);
3505 }
3506
3507 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _INVALID_TCEN0_RSP_ERR, 1);
3508 if (nvswitch_test_flags(pending, bit))
3509 {
3510 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_SOURCETRACK_INVALID_TCEN0_RSP_ERR,
3511 "sourcetrack invalid TCEN0 CREQ", NV_FALSE);
3512 nvswitch_clear_flags(&unhandled, bit);
3513 }
3514
3515 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _INVALID_TCEN1_RSP_ERR, 1);
3516 if (nvswitch_test_flags(pending, bit))
3517 {
3518 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_SOURCETRACK_INVALID_TCEN1_RSP_ERR,
3519 "sourcetrack invalid TCEN1 CREQ", NV_FALSE);
3520 nvswitch_clear_flags(&unhandled, bit);
3521 }
3522
3523 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _SOURCETRACK_TIME_OUT_ERR, 1);
3524 if (nvswitch_test_flags(pending, bit))
3525 {
3526 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_SOURCETRACK_SOURCETRACK_TIME_OUT_ERR,
3527 "sourcetrack timeout error", NV_FALSE);
3528 nvswitch_clear_flags(&unhandled, bit);
3529 }
3530
3531 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
3532
3533 //
3534 // Disable interrupts that have occurred after fatal error.
3535 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
3536 //
3537 if (device->link[link].fatal_error_occurred)
3538 {
3539 if (nvswitch_is_soe_supported(device))
3540 {
3541 nvswitch_soe_disable_nport_fatal_interrupts_ls10(device, link,
3542 report.raw_enable & ~pending, RM_SOE_CORE_NPORT_SOURCETRACK_INTERRUPT);
3543 }
3544 else
3545 {
3546 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_FATAL_REPORT_EN_0,
3547 report.raw_enable & ~pending);
3548 }
3549 }
3550
3551 if (report.raw_first & report.mask)
3552 {
3553 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_FIRST_0,
3554 report.raw_first & report.mask);
3555 }
3556
3557 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_STATUS_0, pending);
3558
3559 if (unhandled != 0)
3560 {
3561 return -NVL_MORE_PROCESSING_REQUIRED;
3562 }
3563
3564 return NVL_SUCCESS;
3565
3566 }
3567
3568 //
3569 // Multicast Tstate
3570 //
3571
3572 static NvlStatus
_nvswitch_service_multicast_nonfatal_ls10(nvswitch_device * device,NvU32 link)3573 _nvswitch_service_multicast_nonfatal_ls10
3574 (
3575 nvswitch_device *device,
3576 NvU32 link
3577 )
3578 {
3579 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
3580 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
3581 NvU32 pending, bit, unhandled;
3582 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }};
3583 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0};
3584
3585 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0);
3586 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_NON_FATAL_REPORT_EN_0);
3587 report.mask = report.raw_enable & chip_device->intr_mask.mc_tstate.nonfatal;
3588 pending = report.raw_pending & report.mask;
3589
3590 if (pending == 0)
3591 {
3592 return -NVL_NOT_FOUND;
3593 }
3594
3595 unhandled = pending;
3596 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FIRST_0);
3597 _nvswitch_collect_error_info_ls10(device, link,
3598 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_MC_TIME,
3599 &data);
3600
3601 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1);
3602 if (nvswitch_test_flags(pending, bit))
3603 {
3604 // Ignore LIMIT error if DBE is pending
3605 if(!(nvswitch_test_flags(report.raw_pending,
3606 DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1))))
3607 {
3608 NvBool bAddressValid = NV_FALSE;
3609 NvU32 address = 0;
3610 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE,
3611 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID);
3612
3613 if (FLD_TEST_DRF(_MULTICASTTSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
3614 addressValid))
3615 {
3616 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE,
3617 _ERR_TAGPOOL_ECC_ERROR_ADDRESS);
3618 bAddressValid = NV_TRUE;
3619 }
3620
3621 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER);
3622 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER,
3623 DRF_DEF(_MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT));
3624 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_LIMIT_ERR, "MC TS tag store single-bit threshold");
3625 NVSWITCH_REPORT_DATA(_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_LIMIT_ERR, data);
3626
3627 _nvswitch_construct_ecc_error_event_ls10(&err_event,
3628 NVSWITCH_ERR_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_LIMIT_ERR, link,
3629 bAddressValid, address, NV_FALSE, 1);
3630
3631 nvswitch_inforom_ecc_log_err_event(device, &err_event);
3632 }
3633
3634 nvswitch_clear_flags(&unhandled, bit);
3635 }
3636
3637 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1);
3638 if (nvswitch_test_flags(pending, bit))
3639 {
3640 // Ignore LIMIT error if DBE is pending
3641 if(!(nvswitch_test_flags(report.raw_pending,
3642 DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1))))
3643 {
3644 NvBool bAddressValid = NV_FALSE;
3645 NvU32 address = 0;
3646 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE,
3647 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID);
3648
3649 if (FLD_TEST_DRF(_MULTICASTTSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
3650 addressValid))
3651 {
3652 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE,
3653 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS);
3654 bAddressValid = NV_TRUE;
3655 }
3656
3657 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER);
3658 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER,
3659 DRF_DEF(_MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT));
3660 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, "MC TS crumbstore single-bit threshold");
3661 NVSWITCH_REPORT_DATA(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, data);
3662
3663 _nvswitch_construct_ecc_error_event_ls10(&err_event,
3664 NVSWITCH_ERR_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, link,
3665 bAddressValid, address, NV_FALSE, 1);
3666
3667 nvswitch_inforom_ecc_log_err_event(device, &err_event);
3668 }
3669
3670 nvswitch_clear_flags(&unhandled, bit);
3671 }
3672
3673 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_MCTO_ERR, 1);
3674 if (nvswitch_test_flags(pending, bit))
3675 {
3676 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_MCTO_ERR, "MC TS crumbstore MCTO");
3677 _nvswitch_collect_error_info_ls10(device, link,
3678 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME |
3679 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC |
3680 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR,
3681 &data);
3682 NVSWITCH_REPORT_DATA(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_MCTO_ERR, data);
3683
3684 nvswitch_clear_flags(&unhandled, bit);
3685 }
3686
3687 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
3688
3689 // Disable interrupts that have occurred after fatal error.
3690 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
3691 if (device->link[link].fatal_error_occurred)
3692 {
3693 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_NON_FATAL_REPORT_EN_0,
3694 report.raw_enable & ~pending);
3695 }
3696
3697 if (report.raw_first & report.mask)
3698 {
3699 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FIRST_0,
3700 report.raw_first & report.mask);
3701 }
3702
3703 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0, pending);
3704
3705 if (unhandled != 0)
3706 {
3707 return -NVL_MORE_PROCESSING_REQUIRED;
3708 }
3709
3710 return NVL_SUCCESS;
3711 }
3712
3713 static NvlStatus
_nvswitch_service_multicast_fatal_ls10(nvswitch_device * device,NvU32 link)3714 _nvswitch_service_multicast_fatal_ls10
3715 (
3716 nvswitch_device *device,
3717 NvU32 link
3718 )
3719 {
3720 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
3721 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
3722 NvU32 pending, bit, contain, unhandled;
3723 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }};
3724 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0};
3725
3726 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0);
3727 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FATAL_REPORT_EN_0);
3728 report.mask = report.raw_enable & chip_device->intr_mask.mc_tstate.fatal;
3729 pending = report.raw_pending & report.mask;
3730
3731 if (pending == 0)
3732 {
3733 return -NVL_NOT_FOUND;
3734 }
3735
3736 unhandled = pending;
3737 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FIRST_0);
3738 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_CONTAIN_EN_0);
3739 _nvswitch_collect_error_info_ls10(device, link,
3740 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_MC_TIME,
3741 &data);
3742
3743 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1);
3744 if (nvswitch_test_flags(pending, bit))
3745 {
3746 NvBool bAddressValid = NV_FALSE;
3747 NvU32 address = 0;
3748 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE,
3749 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID);
3750
3751 if (FLD_TEST_DRF(_MULTICASTTSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
3752 addressValid))
3753 {
3754 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE,
3755 _ERR_TAGPOOL_ECC_ERROR_ADDRESS);
3756 bAddressValid = NV_TRUE;
3757 }
3758
3759 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER);
3760 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER,
3761 DRF_DEF(_MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT));
3762 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_DBE_ERR, "MC TS tag store fatal ECC", NV_FALSE);
3763 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_DBE_ERR, data);
3764 nvswitch_clear_flags(&unhandled, bit);
3765
3766 _nvswitch_construct_ecc_error_event_ls10(&err_event,
3767 NVSWITCH_ERR_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_DBE_ERR, link, bAddressValid,
3768 address, NV_TRUE, 1);
3769
3770 nvswitch_inforom_ecc_log_err_event(device, &err_event);
3771
3772 // Clear associated LIMIT_ERR interrupt
3773 if (report.raw_pending & DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1))
3774 {
3775 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0,
3776 DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1));
3777 }
3778 }
3779
3780 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1);
3781 if (nvswitch_test_flags(pending, bit))
3782 {
3783 NvBool bAddressValid = NV_FALSE;
3784 NvU32 address = 0;
3785 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE,
3786 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID);
3787
3788 if (FLD_TEST_DRF(_MULTICASTTSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
3789 addressValid))
3790 {
3791 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE,
3792 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS);
3793 bAddressValid = NV_TRUE;
3794 }
3795
3796 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER);
3797 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER,
3798 DRF_DEF(_MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT));
3799 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_DBE_ERR, "MC TS crumbstore fatal ECC", NV_FALSE);
3800 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_DBE_ERR, data);
3801 nvswitch_clear_flags(&unhandled, bit);
3802
3803 _nvswitch_construct_ecc_error_event_ls10(&err_event,
3804 NVSWITCH_ERR_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_DBE_ERR, link, bAddressValid,
3805 address, NV_TRUE, 1);
3806
3807 nvswitch_inforom_ecc_log_err_event(device, &err_event);
3808
3809 // Clear associated LIMIT_ERR interrupt
3810 if (report.raw_pending & DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1))
3811 {
3812 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0,
3813 DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1));
3814 }
3815 }
3816
3817 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_BUF_OVERWRITE_ERR, 1);
3818 if (nvswitch_test_flags(pending, bit))
3819 {
3820 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_BUF_OVERWRITE_ERR, "MC crumbstore overwrite", NV_FALSE);
3821 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_BUF_OVERWRITE_ERR, data);
3822 nvswitch_clear_flags(&unhandled, bit);
3823 }
3824
3825 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
3826
3827 // Disable interrupts that have occurred after fatal error.
3828 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
3829 if (device->link[link].fatal_error_occurred)
3830 {
3831 if (nvswitch_is_soe_supported(device))
3832 {
3833 nvswitch_soe_disable_nport_fatal_interrupts_ls10(device, link,
3834 report.raw_enable & ~pending, RM_SOE_CORE_NPORT_MULTICAST_INTERRUPT);
3835 }
3836 else
3837 {
3838 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FATAL_REPORT_EN_0,
3839 report.raw_enable & ~pending);
3840 }
3841 }
3842
3843 if (report.raw_first & report.mask)
3844 {
3845 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FIRST_0,
3846 report.raw_first & report.mask);
3847 }
3848
3849 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0, pending);
3850
3851 if (unhandled != 0)
3852 {
3853 return -NVL_MORE_PROCESSING_REQUIRED;
3854 }
3855
3856 return NVL_SUCCESS;
3857 }
3858
3859 //
3860 // Reduction Tstate
3861 //
3862
3863 static NvlStatus
_nvswitch_service_reduction_nonfatal_ls10(nvswitch_device * device,NvU32 link)3864 _nvswitch_service_reduction_nonfatal_ls10
3865 (
3866 nvswitch_device *device,
3867 NvU32 link
3868 )
3869 {
3870 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
3871 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
3872 NvU32 pending, bit, unhandled;
3873 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }};
3874 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0};
3875
3876 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0);
3877 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_NON_FATAL_REPORT_EN_0);
3878 report.mask = report.raw_enable & chip_device->intr_mask.mc_tstate.nonfatal;
3879 pending = report.raw_pending & report.mask;
3880
3881 if (pending == 0)
3882 {
3883 return -NVL_NOT_FOUND;
3884 }
3885
3886 unhandled = pending;
3887 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FIRST_0);
3888 _nvswitch_collect_error_info_ls10(device, link,
3889 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_RED_TIME,
3890 &data);
3891
3892 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1);
3893 if (nvswitch_test_flags(pending, bit))
3894 {
3895 // Ignore LIMIT error if DBE is pending
3896 if(!(nvswitch_test_flags(report.raw_pending,
3897 DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1))))
3898 {
3899 NvBool bAddressValid = NV_FALSE;
3900 NvU32 address = 0;
3901 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE,
3902 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID);
3903
3904 if (FLD_TEST_DRF(_REDUCTIONTSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
3905 addressValid))
3906 {
3907 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE,
3908 _ERR_TAGPOOL_ECC_ERROR_ADDRESS);
3909 bAddressValid = NV_TRUE;
3910 }
3911
3912 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER);
3913 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER,
3914 DRF_DEF(_REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT));
3915 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_LIMIT_ERR, "Red TS tag store single-bit threshold");
3916 NVSWITCH_REPORT_DATA(_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_LIMIT_ERR, data);
3917
3918 _nvswitch_construct_ecc_error_event_ls10(&err_event,
3919 NVSWITCH_ERR_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_LIMIT_ERR, link,
3920 bAddressValid, address, NV_FALSE, 1);
3921
3922 nvswitch_inforom_ecc_log_err_event(device, &err_event);
3923 }
3924
3925 nvswitch_clear_flags(&unhandled, bit);
3926 }
3927
3928 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1);
3929 if (nvswitch_test_flags(pending, bit))
3930 {
3931 // Ignore LIMIT error if DBE is pending
3932 if(!(nvswitch_test_flags(report.raw_pending,
3933 DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1))))
3934 {
3935 NvBool bAddressValid = NV_FALSE;
3936 NvU32 address = 0;
3937 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE,
3938 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID);
3939
3940 if (FLD_TEST_DRF(_REDUCTIONTSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
3941 addressValid))
3942 {
3943 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE,
3944 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS);
3945 bAddressValid = NV_TRUE;
3946 }
3947
3948 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER);
3949 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER,
3950 DRF_DEF(_REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT));
3951 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, "Red TS crumbstore single-bit threshold");
3952 NVSWITCH_REPORT_DATA(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, data);
3953
3954 _nvswitch_construct_ecc_error_event_ls10(&err_event,
3955 NVSWITCH_ERR_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, link,
3956 bAddressValid, address, NV_FALSE, 1);
3957
3958 nvswitch_inforom_ecc_log_err_event(device, &err_event);
3959 }
3960
3961 nvswitch_clear_flags(&unhandled, bit);
3962 }
3963
3964 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_RTO_ERR, 1);
3965 if (nvswitch_test_flags(pending, bit))
3966 {
3967 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_RTO_ERR, "Red TS crumbstore RTO");
3968 NVSWITCH_REPORT_DATA(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_RTO_ERR, data);
3969
3970 nvswitch_clear_flags(&unhandled, bit);
3971 }
3972
3973 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
3974
3975 // Disable interrupts that have occurred after fatal error.
3976 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
3977 if (device->link[link].fatal_error_occurred)
3978 {
3979 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_NON_FATAL_REPORT_EN_0,
3980 report.raw_enable & ~pending);
3981 }
3982
3983 if (report.raw_first & report.mask)
3984 {
3985 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FIRST_0,
3986 report.raw_first & report.mask);
3987 }
3988
3989 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0, pending);
3990
3991 if (unhandled != 0)
3992 {
3993 return -NVL_MORE_PROCESSING_REQUIRED;
3994 }
3995
3996 return NVL_SUCCESS;
3997 }
3998
3999 static NvlStatus
_nvswitch_service_reduction_fatal_ls10(nvswitch_device * device,NvU32 link)4000 _nvswitch_service_reduction_fatal_ls10
4001 (
4002 nvswitch_device *device,
4003 NvU32 link
4004 )
4005 {
4006 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
4007 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
4008 NvU32 pending, bit, contain, unhandled;
4009 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }};
4010 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0};
4011
4012 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0);
4013 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FATAL_REPORT_EN_0);
4014 report.mask = report.raw_enable & chip_device->intr_mask.mc_tstate.fatal;
4015 pending = report.raw_pending & report.mask;
4016
4017 if (pending == 0)
4018 {
4019 return -NVL_NOT_FOUND;
4020 }
4021
4022 unhandled = pending;
4023 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FIRST_0);
4024 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_CONTAIN_EN_0);
4025 _nvswitch_collect_error_info_ls10(device, link,
4026 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_RED_TIME,
4027 &data);
4028
4029 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1);
4030 if (nvswitch_test_flags(pending, bit))
4031 {
4032 NvBool bAddressValid = NV_FALSE;
4033 NvU32 address = 0;
4034 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE,
4035 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID);
4036
4037 if (FLD_TEST_DRF(_REDUCTIONTSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
4038 addressValid))
4039 {
4040 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE,
4041 _ERR_TAGPOOL_ECC_ERROR_ADDRESS);
4042 bAddressValid = NV_TRUE;
4043 }
4044
4045 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER);
4046 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER,
4047 DRF_DEF(_REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT));
4048 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_DBE_ERR, "Red TS tag store fatal ECC", NV_FALSE);
4049 _nvswitch_collect_error_info_ls10(device, link,
4050 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME |
4051 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC |
4052 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR,
4053 &data);
4054 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_DBE_ERR, data);
4055 nvswitch_clear_flags(&unhandled, bit);
4056
4057 _nvswitch_construct_ecc_error_event_ls10(&err_event,
4058 NVSWITCH_ERR_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_DBE_ERR, link, bAddressValid,
4059 address, NV_TRUE, 1);
4060
4061 nvswitch_inforom_ecc_log_err_event(device, &err_event);
4062
4063 // Clear associated LIMIT_ERR interrupt
4064 if (report.raw_pending & DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1))
4065 {
4066 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0,
4067 DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1));
4068 }
4069 }
4070
4071 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1);
4072 if (nvswitch_test_flags(pending, bit))
4073 {
4074 NvBool bAddressValid = NV_FALSE;
4075 NvU32 address = 0;
4076 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE,
4077 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID);
4078
4079 if (FLD_TEST_DRF(_REDUCTIONTSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID,
4080 addressValid))
4081 {
4082 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE,
4083 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS);
4084 bAddressValid = NV_TRUE;
4085 }
4086
4087 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER);
4088 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER,
4089 DRF_DEF(_REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT));
4090 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_DBE_ERR, "Red TS crumbstore fatal ECC", NV_FALSE);
4091 _nvswitch_collect_error_info_ls10(device, link,
4092 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME |
4093 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC |
4094 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR,
4095 &data);
4096 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_DBE_ERR, data);
4097 nvswitch_clear_flags(&unhandled, bit);
4098
4099 _nvswitch_construct_ecc_error_event_ls10(&err_event,
4100 NVSWITCH_ERR_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_DBE_ERR, link, bAddressValid,
4101 address, NV_TRUE, 1);
4102
4103 nvswitch_inforom_ecc_log_err_event(device, &err_event);
4104
4105 // Clear associated LIMIT_ERR interrupt
4106 if (report.raw_pending & DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1))
4107 {
4108 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0,
4109 DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1));
4110 }
4111 }
4112
4113 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_BUF_OVERWRITE_ERR, 1);
4114 if (nvswitch_test_flags(pending, bit))
4115 {
4116 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_BUF_OVERWRITE_ERR, "Red crumbstore overwrite", NV_FALSE);
4117 _nvswitch_collect_error_info_ls10(device, link,
4118 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME |
4119 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC |
4120 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR,
4121 &data);
4122 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_BUF_OVERWRITE_ERR, data);
4123 nvswitch_clear_flags(&unhandled, bit);
4124 }
4125
4126 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
4127
4128 // Disable interrupts that have occurred after fatal error.
4129 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
4130 if (device->link[link].fatal_error_occurred)
4131 {
4132 if (nvswitch_is_soe_supported(device))
4133 {
4134 nvswitch_soe_disable_nport_fatal_interrupts_ls10(device, link,
4135 report.raw_enable & ~pending, RM_SOE_CORE_NPORT_REDUCTION_INTERRUPT);
4136 }
4137 else
4138 {
4139 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FATAL_REPORT_EN_0,
4140 report.raw_enable & ~pending);
4141 }
4142 }
4143
4144 if (report.raw_first & report.mask)
4145 {
4146 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FIRST_0,
4147 report.raw_first & report.mask);
4148 }
4149
4150 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0, pending);
4151
4152 if (unhandled != 0)
4153 {
4154 return -NVL_MORE_PROCESSING_REQUIRED;
4155 }
4156
4157 return NVL_SUCCESS;
4158 }
4159
4160 static NvlStatus
_nvswitch_service_nport_fatal_ls10(nvswitch_device * device,NvU32 link)4161 _nvswitch_service_nport_fatal_ls10
4162 (
4163 nvswitch_device *device,
4164 NvU32 link
4165 )
4166 {
4167 NvlStatus status[7];
4168
4169 status[0] = _nvswitch_service_route_fatal_ls10(device, link);
4170 status[1] = _nvswitch_service_ingress_fatal_ls10(device, link);
4171 status[2] = _nvswitch_service_egress_fatal_ls10(device, link);
4172 status[3] = _nvswitch_service_tstate_fatal_ls10(device, link);
4173 status[4] = _nvswitch_service_sourcetrack_fatal_ls10(device, link);
4174 status[5] = _nvswitch_service_multicast_fatal_ls10(device, link);
4175 status[6] = _nvswitch_service_reduction_fatal_ls10(device, link);
4176
4177 if ((status[0] != NVL_SUCCESS) &&
4178 (status[1] != NVL_SUCCESS) &&
4179 (status[2] != NVL_SUCCESS) &&
4180 (status[3] != NVL_SUCCESS) &&
4181 (status[4] != NVL_SUCCESS) &&
4182 (status[5] != NVL_SUCCESS) &&
4183 (status[6] != NVL_SUCCESS))
4184 {
4185 return -NVL_MORE_PROCESSING_REQUIRED;
4186 }
4187
4188 return NVL_SUCCESS;
4189 }
4190
4191 static NvlStatus
_nvswitch_service_npg_fatal_ls10(nvswitch_device * device,NvU32 npg)4192 _nvswitch_service_npg_fatal_ls10
4193 (
4194 nvswitch_device *device,
4195 NvU32 npg
4196 )
4197 {
4198 NvU32 pending, mask, bit, unhandled;
4199 NvU32 nport;
4200 NvU32 link;
4201
4202 pending = NVSWITCH_ENG_RD32(device, NPG, , npg, _NPG, _NPG_INTERRUPT_STATUS);
4203
4204 if (pending == 0)
4205 {
4206 return -NVL_NOT_FOUND;
4207 }
4208
4209 mask =
4210 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV0_INT_STATUS, _FATAL) |
4211 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV1_INT_STATUS, _FATAL) |
4212 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV2_INT_STATUS, _FATAL) |
4213 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV3_INT_STATUS, _FATAL);
4214 pending &= mask;
4215 unhandled = pending;
4216
4217 for (nport = 0; nport < NVSWITCH_NPORT_PER_NPG_LS10; nport++)
4218 {
4219 switch (nport)
4220 {
4221 case 0:
4222 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV0_INT_STATUS, _FATAL);
4223 break;
4224 case 1:
4225 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV1_INT_STATUS, _FATAL);
4226 break;
4227 case 2:
4228 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV2_INT_STATUS, _FATAL);
4229 break;
4230 case 3:
4231 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV3_INT_STATUS, _FATAL);
4232 break;
4233 }
4234 if (nvswitch_test_flags(pending, bit))
4235 {
4236 link = NPORT_TO_LINK_LS10(device, npg, nport);
4237 if (NVSWITCH_ENG_IS_VALID(device, NPORT, link))
4238 {
4239 if (_nvswitch_service_nport_fatal_ls10(device, link) == NVL_SUCCESS)
4240 {
4241 nvswitch_clear_flags(&unhandled, bit);
4242 }
4243 }
4244 }
4245 }
4246
4247 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
4248
4249 if (unhandled != 0)
4250 {
4251 return -NVL_MORE_PROCESSING_REQUIRED;
4252 }
4253
4254 return NVL_SUCCESS;
4255 }
4256
4257 static NvlStatus
_nvswitch_service_nport_nonfatal_ls10(nvswitch_device * device,NvU32 link)4258 _nvswitch_service_nport_nonfatal_ls10
4259 (
4260 nvswitch_device *device,
4261 NvU32 link
4262 )
4263 {
4264 NvlStatus status[7];
4265
4266 status[0] = _nvswitch_service_route_nonfatal_ls10(device, link);
4267 status[1] = _nvswitch_service_ingress_nonfatal_ls10(device, link);
4268 status[2] = _nvswitch_service_egress_nonfatal_ls10(device, link);
4269 status[3] = _nvswitch_service_tstate_nonfatal_ls10(device, link);
4270 status[4] = _nvswitch_service_sourcetrack_nonfatal_ls10(device, link);
4271 status[5] = _nvswitch_service_multicast_nonfatal_ls10(device, link);
4272 status[6] = _nvswitch_service_reduction_nonfatal_ls10(device, link);
4273
4274 if ((status[0] != NVL_SUCCESS) &&
4275 (status[1] != NVL_SUCCESS) &&
4276 (status[2] != NVL_SUCCESS) &&
4277 (status[3] != NVL_SUCCESS) &&
4278 (status[4] != NVL_SUCCESS) &&
4279 (status[5] != NVL_SUCCESS) &&
4280 (status[6] != NVL_SUCCESS))
4281 {
4282 return -NVL_MORE_PROCESSING_REQUIRED;
4283 }
4284
4285 return NVL_SUCCESS;
4286 }
4287
4288 static NvlStatus
_nvswitch_service_npg_nonfatal_ls10(nvswitch_device * device,NvU32 npg)4289 _nvswitch_service_npg_nonfatal_ls10
4290 (
4291 nvswitch_device *device,
4292 NvU32 npg
4293 )
4294 {
4295 NvU32 pending, mask, bit, unhandled;
4296 NvU32 nport;
4297 NvU32 link;
4298
4299 pending = NVSWITCH_ENG_RD32(device, NPG, , npg, _NPG, _NPG_INTERRUPT_STATUS);
4300
4301 if (pending == 0)
4302 {
4303 return -NVL_NOT_FOUND;
4304 }
4305
4306 mask =
4307 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV0_INT_STATUS, _NONFATAL) |
4308 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV1_INT_STATUS, _NONFATAL) |
4309 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV2_INT_STATUS, _NONFATAL) |
4310 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV3_INT_STATUS, _NONFATAL);
4311 pending &= mask;
4312 unhandled = pending;
4313
4314 for (nport = 0; nport < NVSWITCH_NPORT_PER_NPG_LS10; nport++)
4315 {
4316 switch (nport)
4317 {
4318 case 0:
4319 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV0_INT_STATUS, _NONFATAL);
4320 break;
4321 case 1:
4322 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV1_INT_STATUS, _NONFATAL);
4323 break;
4324 case 2:
4325 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV2_INT_STATUS, _NONFATAL);
4326 break;
4327 case 3:
4328 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV3_INT_STATUS, _NONFATAL);
4329 break;
4330 }
4331 if (nvswitch_test_flags(pending, bit))
4332 {
4333 link = NPORT_TO_LINK_LS10(device, npg, nport);
4334 if (NVSWITCH_ENG_IS_VALID(device, NPORT, link))
4335 {
4336 if (_nvswitch_service_nport_nonfatal_ls10(device, link) == NVL_SUCCESS)
4337 {
4338 nvswitch_clear_flags(&unhandled, bit);
4339 }
4340 }
4341 }
4342 }
4343
4344 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
4345
4346 if (unhandled != 0)
4347 {
4348 return -NVL_MORE_PROCESSING_REQUIRED;
4349 }
4350
4351 return NVL_SUCCESS;
4352 }
4353
4354 static NvlStatus
_nvswitch_service_nvldl_fatal_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU64 intrLinkMask)4355 _nvswitch_service_nvldl_fatal_ls10
4356 (
4357 nvswitch_device *device,
4358 NvU32 nvlipt_instance,
4359 NvU64 intrLinkMask
4360 )
4361 {
4362 NvU64 enabledLinkMask, localLinkMask, localIntrLinkMask, runtimeErrorMask = 0;
4363 NvU32 i;
4364 nvlink_link *link;
4365 NvU32 clocksMask = NVSWITCH_PER_LINK_CLOCK_SET(RXCLK) | NVSWITCH_PER_LINK_CLOCK_SET(TXCLK);
4366 NvlStatus status = -NVL_MORE_PROCESSING_REQUIRED;
4367 NVSWITCH_LINK_TRAINING_ERROR_INFO linkTrainingErrorInfo = { 0 };
4368 NVSWITCH_LINK_RUNTIME_ERROR_INFO linkRuntimeErrorInfo = { 0 };
4369
4370 //
4371 // The passed in interruptLinkMask should contain a link that is part of the
4372 // given nvlipt instance
4373 //
4374 enabledLinkMask = nvswitch_get_enabled_link_mask(device);
4375 localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(nvlipt_instance);
4376 localIntrLinkMask = localLinkMask & intrLinkMask & enabledLinkMask;
4377
4378 if (localIntrLinkMask == 0)
4379 {
4380 NVSWITCH_PRINT(device, ERROR, "%s: Bad link mask provided for link interrupt servicing!\n", __FUNCTION__);
4381 NVSWITCH_ASSERT(0);
4382 return -NVL_BAD_ARGS;
4383 }
4384
4385 FOR_EACH_INDEX_IN_MASK(64, i, localIntrLinkMask)
4386 {
4387 link = nvswitch_get_link(device, i);
4388 if (link == NULL)
4389 {
4390 // An interrupt on an invalid link should never occur
4391 NVSWITCH_ASSERT(link != NULL);
4392 continue;
4393 }
4394
4395 if (NVSWITCH_GET_LINK_ENG_INST(device, i, NVLIPT) != nvlipt_instance)
4396 {
4397 NVSWITCH_ASSERT(0);
4398 break;
4399 }
4400
4401 if (nvswitch_is_link_in_reset(device, link) ||
4402 !nvswitch_are_link_clocks_on_ls10(device, link, clocksMask))
4403 {
4404 continue;
4405 }
4406
4407 if (device->hal.nvswitch_service_nvldl_fatal_link(device, nvlipt_instance, i) == NVL_SUCCESS)
4408 {
4409 runtimeErrorMask |= NVBIT64(i);
4410 status = NVL_SUCCESS;
4411 }
4412 }
4413 FOR_EACH_INDEX_IN_MASK_END;
4414
4415 linkTrainingErrorInfo.isValid = NV_FALSE;
4416 linkRuntimeErrorInfo.isValid = NV_TRUE;
4417 linkRuntimeErrorInfo.mask0 = runtimeErrorMask;
4418
4419 // Check runtimeErrorMask is non-zero before consuming it further.
4420 if ((runtimeErrorMask != 0) &&
4421 (nvswitch_smbpbi_set_link_error_info(device,
4422 &linkTrainingErrorInfo, &linkRuntimeErrorInfo) != NVL_SUCCESS))
4423 {
4424 NVSWITCH_PRINT(device, ERROR,
4425 "%s: NVLDL[0x%x, 0x%llx]: Unable to send Runtime Error bitmask: 0x%llx,\n",
4426 __FUNCTION__,
4427 nvlipt_instance, localIntrLinkMask,
4428 runtimeErrorMask);
4429 }
4430
4431 return status;
4432 }
4433
4434 static NvlStatus
_nvswitch_service_nvltlc_tx_sys_fatal_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)4435 _nvswitch_service_nvltlc_tx_sys_fatal_ls10
4436 (
4437 nvswitch_device *device,
4438 NvU32 nvlipt_instance,
4439 NvU32 link
4440 )
4441 {
4442 NvU32 pending, bit, unhandled;
4443 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
4444 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 };
4445
4446 report.raw_pending = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_STATUS_0);
4447 report.raw_enable = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_FATAL_REPORT_EN_0);
4448 report.mask = report.raw_enable;
4449 pending = report.raw_pending & report.mask;
4450
4451 error_event.nvliptInstance = (NvU8) nvlipt_instance;
4452 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
4453
4454 if (pending == 0)
4455 {
4456 return -NVL_NOT_FOUND;
4457 }
4458
4459 unhandled = pending;
4460 report.raw_first = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_FIRST_0);
4461
4462 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _NCISOC_PARITY_ERR, 1);
4463 if (nvswitch_test_flags(pending, bit))
4464 {
4465 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_NCISOC_PARITY_ERR, "NCISOC Parity Error", NV_FALSE);
4466 nvswitch_clear_flags(&unhandled, bit);
4467 {
4468 error_event.error = INFOROM_NVLINK_TLC_TX_NCISOC_PARITY_ERR_FATAL;
4469 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4470 }
4471 }
4472
4473 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _NCISOC_HDR_ECC_DBE_ERR, 1);
4474 if (nvswitch_test_flags(pending, bit))
4475 {
4476 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_NCISOC_HDR_ECC_DBE_ERR, "NCISOC HDR ECC DBE Error", NV_FALSE);
4477 nvswitch_clear_flags(&unhandled, bit);
4478 {
4479 error_event.error = INFOROM_NVLINK_TLC_TX_NCISOC_HDR_ECC_DBE_FATAL;
4480 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4481 }
4482 }
4483
4484 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _NCISOC_DAT_ECC_DBE_ERR, 1);
4485 if (nvswitch_test_flags(pending, bit))
4486 {
4487 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_NCISOC_DAT_ECC_DBE_ERR, "NCISOC DAT ECC DBE Error", NV_FALSE);
4488 nvswitch_clear_flags(&unhandled, bit);
4489 }
4490
4491 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _NCISOC_ECC_LIMIT_ERR, 1);
4492 if (nvswitch_test_flags(pending, bit))
4493 {
4494 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_NCISOC_ECC_LIMIT_ERR, "NCISOC ECC Limit Error", NV_FALSE);
4495 nvswitch_clear_flags(&unhandled, bit);
4496 }
4497
4498 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _TXPOISONDET, 1);
4499 if (nvswitch_test_flags(pending, bit))
4500 {
4501 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TXPOISONDET, "Poison Error", NV_FALSE);
4502 nvswitch_clear_flags(&unhandled, bit);
4503 }
4504
4505 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _TXRSPSTATUS_HW_ERR, 1);
4506 if (nvswitch_test_flags(pending, bit))
4507 {
4508 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_TXRSPSTATUS_HW_ERR, "TX Response Status HW Error", NV_FALSE);
4509 nvswitch_clear_flags(&unhandled, bit);
4510 }
4511
4512 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _TXRSPSTATUS_UR_ERR, 1);
4513 if (nvswitch_test_flags(pending, bit))
4514 {
4515 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_TXRSPSTATUS_UR_ERR, "TX Response Status UR Error", NV_FALSE);
4516 nvswitch_clear_flags(&unhandled, bit);
4517 }
4518
4519 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _TXRSPSTATUS_PRIV_ERR, 1);
4520 if (nvswitch_test_flags(pending, bit))
4521 {
4522 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_TXRSPSTATUS_PRIV_ERR, "TX Response Status PRIV Error", NV_FALSE);
4523 nvswitch_clear_flags(&unhandled, bit);
4524 }
4525
4526 if (report.raw_first & report.mask)
4527 {
4528 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_FIRST_0,
4529 report.raw_first & report.mask);
4530 }
4531
4532 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
4533
4534 // Disable interrupts that have occurred after fatal error.
4535 if (device->link[link].fatal_error_occurred)
4536 {
4537 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_FATAL_REPORT_EN_0,
4538 report.raw_enable & ~pending);
4539 }
4540
4541 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_STATUS_0, pending);
4542
4543 if (unhandled != 0)
4544 {
4545 NVSWITCH_PRINT(device, WARN,
4546 "%s: Unhandled NVLTLC_TX_SYS interrupts, link: %d pending: 0x%x enabled: 0x%x.\n",
4547 __FUNCTION__, link, pending, report.raw_enable);
4548 return -NVL_MORE_PROCESSING_REQUIRED;
4549 }
4550
4551 return NVL_SUCCESS;
4552 }
4553
4554 static NvlStatus
_nvswitch_service_nvltlc_rx_sys_fatal_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)4555 _nvswitch_service_nvltlc_rx_sys_fatal_ls10
4556 (
4557 nvswitch_device *device,
4558 NvU32 nvlipt_instance,
4559 NvU32 link
4560 )
4561 {
4562 NvU32 pending, bit, unhandled;
4563 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
4564 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 };
4565
4566 report.raw_pending = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_STATUS_0);
4567 report.raw_enable = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_FATAL_REPORT_EN_0);
4568 report.mask = report.raw_enable;
4569 pending = report.raw_pending & report.mask;
4570
4571 error_event.nvliptInstance = (NvU8) nvlipt_instance;
4572 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
4573
4574 if (pending == 0)
4575 {
4576 return -NVL_NOT_FOUND;
4577 }
4578
4579 unhandled = pending;
4580 report.raw_first = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_FIRST_0);
4581
4582 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _NCISOC_PARITY_ERR, 1);
4583 if (nvswitch_test_flags(pending, bit))
4584 {
4585 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_NCISOC_PARITY_ERR, "NCISOC Parity Error", NV_FALSE);
4586 nvswitch_clear_flags(&unhandled, bit);
4587 }
4588
4589 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _HDR_RAM_ECC_DBE_ERR, 1);
4590 if (nvswitch_test_flags(pending, bit))
4591 {
4592 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_HDR_RAM_ECC_DBE_ERR, "HDR RAM ECC DBE Error", NV_FALSE);
4593 nvswitch_clear_flags(&unhandled, bit);
4594 {
4595 // TODO 3014908 log these in the NVL object until we have ECC object support
4596 error_event.error = INFOROM_NVLINK_TLC_RX_HDR_RAM_ECC_DBE_FATAL;
4597 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4598 }
4599 }
4600
4601 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _HDR_RAM_ECC_LIMIT_ERR, 1);
4602 if (nvswitch_test_flags(pending, bit))
4603 {
4604 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_HDR_RAM_ECC_LIMIT_ERR, "HDR RAM ECC Limit Error", NV_FALSE);
4605 nvswitch_clear_flags(&unhandled, bit);
4606 }
4607
4608 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _DAT0_RAM_ECC_DBE_ERR, 1);
4609 if (nvswitch_test_flags(pending, bit))
4610 {
4611 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_DAT0_RAM_ECC_DBE_ERR, "DAT0 RAM ECC DBE Error", NV_FALSE);
4612 nvswitch_clear_flags(&unhandled, bit);
4613 {
4614 // TODO 3014908 log these in the NVL object until we have ECC object support
4615 error_event.error = INFOROM_NVLINK_TLC_RX_DAT0_RAM_ECC_DBE_FATAL;
4616 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4617 }
4618 }
4619
4620 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _DAT0_RAM_ECC_LIMIT_ERR, 1);
4621 if (nvswitch_test_flags(pending, bit))
4622 {
4623 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_DAT0_RAM_ECC_LIMIT_ERR, "DAT0 RAM ECC Limit Error", NV_FALSE);
4624 nvswitch_clear_flags(&unhandled, bit);
4625 }
4626
4627 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _DAT1_RAM_ECC_DBE_ERR, 1);
4628 if (nvswitch_test_flags(pending, bit))
4629 {
4630 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_DAT1_RAM_ECC_DBE_ERR, "DAT1 RAM ECC DBE Error", NV_FALSE);
4631 nvswitch_clear_flags(&unhandled, bit);
4632 {
4633 // TODO 3014908 log these in the NVL object until we have ECC object support
4634 error_event.error = INFOROM_NVLINK_TLC_RX_DAT1_RAM_ECC_DBE_FATAL;
4635 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4636 }
4637 }
4638
4639 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _DAT1_RAM_ECC_LIMIT_ERR, 1);
4640 if (nvswitch_test_flags(pending, bit))
4641 {
4642 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_DAT1_RAM_ECC_LIMIT_ERR, "DAT1 RAM ECC Limit Error", NV_FALSE);
4643 nvswitch_clear_flags(&unhandled, bit);
4644 }
4645
4646 if (report.raw_first & report.mask)
4647 {
4648 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_FIRST_0,
4649 report.raw_first & report.mask);
4650 }
4651
4652 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
4653
4654 // Disable interrupts that have occurred after fatal error.
4655 if (device->link[link].fatal_error_occurred)
4656 {
4657 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_FATAL_REPORT_EN_0,
4658 report.raw_enable & ~pending);
4659 }
4660
4661 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_STATUS_0, pending);
4662
4663 if (unhandled != 0)
4664 {
4665 NVSWITCH_PRINT(device, WARN,
4666 "%s: Unhandled NVLTLC_RX_SYS interrupts, link: %d pending: 0x%x enabled: 0x%x.\n",
4667 __FUNCTION__, link, pending, report.raw_enable);
4668 return -NVL_MORE_PROCESSING_REQUIRED;
4669 }
4670
4671 return NVL_SUCCESS;
4672 }
4673
4674 static NvlStatus
_nvswitch_service_nvltlc_tx_lnk_fatal_0_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)4675 _nvswitch_service_nvltlc_tx_lnk_fatal_0_ls10
4676 (
4677 nvswitch_device *device,
4678 NvU32 nvlipt_instance,
4679 NvU32 link
4680 )
4681 {
4682 NvU32 pending, bit, unhandled;
4683 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
4684 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 };
4685
4686 report.raw_pending = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_0);
4687 report.raw_enable = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FATAL_REPORT_EN_0);
4688 report.mask = report.raw_enable;
4689 pending = report.raw_pending & report.mask;
4690
4691 error_event.nvliptInstance = (NvU8) nvlipt_instance;
4692 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
4693
4694 if (pending == 0)
4695 {
4696 return -NVL_NOT_FOUND;
4697 }
4698
4699 unhandled = pending;
4700 report.raw_first = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_0);
4701
4702 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _TXDLCREDITPARITYERR, 1);
4703 if (nvswitch_test_flags(pending, bit))
4704 {
4705 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TXDLCREDITPARITYERR, "TX DL Credit Parity Error", NV_FALSE);
4706 nvswitch_clear_flags(&unhandled, bit);
4707 {
4708 error_event.error = INFOROM_NVLINK_TLC_TX_DL_CREDIT_PARITY_ERR_FATAL;
4709 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4710 }
4711 }
4712
4713 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _CREQ_RAM_HDR_ECC_DBE_ERR, 1);
4714 if (nvswitch_test_flags(pending, bit))
4715 {
4716 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_LNK_CREQ_RAM_HDR_ECC_DBE_ERR, "CREQ RAM HDR ECC DBE Error", NV_FALSE);
4717 nvswitch_clear_flags(&unhandled, bit);
4718 }
4719
4720 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP_RAM_HDR_ECC_DBE_ERR, 1);
4721 if (nvswitch_test_flags(pending, bit))
4722 {
4723 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_LNK_RSP_RAM_HDR_ECC_DBE_ERR, "Response RAM HDR ECC DBE Error", NV_FALSE);
4724 nvswitch_clear_flags(&unhandled, bit);
4725 }
4726
4727 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _COM_RAM_HDR_ECC_DBE_ERR, 1);
4728 if (nvswitch_test_flags(pending, bit))
4729 {
4730 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_LNK_COM_RAM_HDR_ECC_DBE_ERR, "COM RAM HDR ECC DBE Error", NV_FALSE);
4731 nvswitch_clear_flags(&unhandled, bit);
4732 }
4733
4734 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP1_RAM_HDR_ECC_DBE_ERR, 1);
4735 if (nvswitch_test_flags(pending, bit))
4736 {
4737 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_LNK_RSP1_RAM_HDR_ECC_DBE_ERR, "RSP1 RAM HDR ECC DBE Error", NV_FALSE);
4738 nvswitch_clear_flags(&unhandled, bit);
4739 }
4740
4741 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP1_RAM_DAT_ECC_DBE_ERR, 1);
4742 if (nvswitch_test_flags(pending, bit))
4743 {
4744 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_LNK_RSP1_RAM_DAT_ECC_DBE_ERR, "RSP1 RAM DAT ECC DBE Error", NV_FALSE);
4745 nvswitch_clear_flags(&unhandled, bit);
4746 {
4747 // TODO 3014908 log these in the NVL object until we have ECC object support
4748 error_event.error = INFOROM_NVLINK_TLC_TX_RSP1_DAT_RAM_ECC_DBE_FATAL;
4749 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4750 }
4751 }
4752
4753 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
4754
4755 // Disable interrupts that have occurred after fatal error.
4756 if (device->link[link].fatal_error_occurred)
4757 {
4758 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FATAL_REPORT_EN_0,
4759 report.raw_enable & ~pending);
4760 }
4761
4762 if (report.raw_first & report.mask)
4763 {
4764 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_0,
4765 report.raw_first & report.mask);
4766 }
4767 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_0, pending);
4768
4769 if (unhandled != 0)
4770 {
4771 NVSWITCH_PRINT(device, WARN,
4772 "%s: Unhandled NVLTLC_TX_LNK _0 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n",
4773 __FUNCTION__, link, pending, report.raw_enable);
4774 return -NVL_MORE_PROCESSING_REQUIRED;
4775 }
4776
4777 return NVL_SUCCESS;
4778 }
4779
4780 static NvlStatus
_nvswitch_service_nvltlc_rx_lnk_fatal_0_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)4781 _nvswitch_service_nvltlc_rx_lnk_fatal_0_ls10
4782 (
4783 nvswitch_device *device,
4784 NvU32 nvlipt_instance,
4785 NvU32 link
4786 )
4787 {
4788 NvU32 pending, bit, unhandled;
4789 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
4790 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 };
4791
4792 report.raw_pending = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_0);
4793 report.raw_enable = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FATAL_REPORT_EN_0);
4794 report.mask = report.raw_enable;
4795 pending = report.raw_pending & report.mask;
4796
4797 error_event.nvliptInstance = (NvU8) nvlipt_instance;
4798 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
4799
4800 if (pending == 0)
4801 {
4802 return -NVL_NOT_FOUND;
4803 }
4804
4805 unhandled = pending;
4806 report.raw_first = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_0);
4807
4808 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXDLHDRPARITYERR, 1);
4809 if (nvswitch_test_flags(pending, bit))
4810 {
4811 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXDLHDRPARITYERR, "RX DL HDR Parity Error", NV_FALSE);
4812 nvswitch_clear_flags(&unhandled, bit);
4813 {
4814 error_event.error = INFOROM_NVLINK_TLC_RX_DL_HDR_PARITY_ERR_FATAL;
4815 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4816 }
4817 }
4818
4819 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXDLDATAPARITYERR, 1);
4820 if (nvswitch_test_flags(pending, bit))
4821 {
4822 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXDLDATAPARITYERR, "RX DL Data Parity Error", NV_FALSE);
4823 nvswitch_clear_flags(&unhandled, bit);
4824 {
4825 error_event.error = INFOROM_NVLINK_TLC_RX_DL_DATA_PARITY_ERR_FATAL;
4826 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4827 }
4828 }
4829
4830 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXDLCTRLPARITYERR, 1);
4831 if (nvswitch_test_flags(pending, bit))
4832 {
4833 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXDLCTRLPARITYERR, "RX DL Ctrl Parity Error", NV_FALSE);
4834 nvswitch_clear_flags(&unhandled, bit);
4835 {
4836 error_event.error = INFOROM_NVLINK_TLC_RX_DL_CTRL_PARITY_ERR_FATAL;
4837 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4838 }
4839 }
4840
4841 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXPKTLENERR, 1);
4842 if (nvswitch_test_flags(pending, bit))
4843 {
4844 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXPKTLENERR, "RX Packet Length Error", NV_FALSE);
4845 nvswitch_clear_flags(&unhandled, bit);
4846 {
4847 error_event.error = INFOROM_NVLINK_TLC_RX_PKTLEN_ERR_FATAL;
4848 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4849 }
4850 }
4851
4852 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RSVCACHEATTRPROBEREQERR, 1);
4853 if (nvswitch_test_flags(pending, bit))
4854 {
4855 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RSVCACHEATTRPROBEREQERR, "RSV Packet Status Error", NV_FALSE);
4856 nvswitch_clear_flags(&unhandled, bit);
4857 {
4858 error_event.error = INFOROM_NVLINK_TLC_RX_RSVD_CACHE_ATTR_PROBE_REQ_ERR_FATAL;
4859 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4860 }
4861 }
4862
4863 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RSVCACHEATTRPROBERSPERR, 1);
4864 if (nvswitch_test_flags(pending, bit))
4865 {
4866 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RSVCACHEATTRPROBERSPERR, "RSV CacheAttr Probe Rsp Error", NV_FALSE);
4867 nvswitch_clear_flags(&unhandled, bit);
4868 {
4869 error_event.error = INFOROM_NVLINK_TLC_RX_RSVD_CACHE_ATTR_PROBE_RSP_ERR_FATAL;
4870 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4871 }
4872 }
4873
4874 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _DATLENGTRMWREQMAXERR, 1);
4875 if (nvswitch_test_flags(pending, bit))
4876 {
4877 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_DATLENGTRMWREQMAXERR, "Data Length RMW Req Max Error", NV_FALSE);
4878 nvswitch_clear_flags(&unhandled, bit);
4879 {
4880 error_event.error = INFOROM_NVLINK_TLC_RX_DATLEN_GT_RMW_REQ_MAX_ERR_FATAL;
4881 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4882 }
4883 }
4884
4885 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _DATLENLTATRRSPMINERR, 1);
4886 if (nvswitch_test_flags(pending, bit))
4887 {
4888 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_DATLENLTATRRSPMINERR, "Data Len Lt ATR RSP Min Error", NV_FALSE);
4889 nvswitch_clear_flags(&unhandled, bit);
4890 {
4891 error_event.error = INFOROM_NVLINK_TLC_RX_DATLEN_LT_ATR_RSP_MIN_ERR_FATAL;
4892 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4893 }
4894 }
4895
4896 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _INVALIDCACHEATTRPOERR, 1);
4897 if (nvswitch_test_flags(pending, bit))
4898 {
4899 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_INVALIDCACHEATTRPOERR, "Invalid Cache Attr PO Error", NV_FALSE);
4900 nvswitch_clear_flags(&unhandled, bit);
4901 {
4902 error_event.error = INFOROM_NVLINK_TLC_RX_INVALID_PO_FOR_CACHE_ATTR_FATAL;
4903 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4904 }
4905 }
4906
4907 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXRSPSTATUS_HW_ERR, 1);
4908 if (nvswitch_test_flags(pending, bit))
4909 {
4910 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_LNK_RXRSPSTATUS_HW_ERR, "RX Rsp Status HW Error", NV_FALSE);
4911 nvswitch_clear_flags(&unhandled, bit);
4912 {
4913 error_event.error = INFOROM_NVLINK_TLC_RX_RSP_STATUS_HW_ERR_NONFATAL;
4914 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4915 }
4916 }
4917
4918 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXRSPSTATUS_UR_ERR, 1);
4919 if (nvswitch_test_flags(pending, bit))
4920 {
4921 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_LNK_RXRSPSTATUS_UR_ERR, "RX Rsp Status UR Error", NV_FALSE);
4922 nvswitch_clear_flags(&unhandled, bit);
4923 {
4924 error_event.error = INFOROM_NVLINK_TLC_RX_RSP_STATUS_UR_ERR_NONFATAL;
4925 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4926 }
4927 }
4928
4929 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _INVALID_COLLAPSED_RESPONSE_ERR, 1);
4930 if (nvswitch_test_flags(pending, bit))
4931 {
4932 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_LNK_INVALID_COLLAPSED_RESPONSE_ERR, "Invalid Collapsed Response Error", NV_FALSE);
4933 nvswitch_clear_flags(&unhandled, bit);
4934 {
4935 error_event.error = INFOROM_NVLINK_TLC_RX_INVALID_COLLAPSED_RESPONSE_FATAL;
4936 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
4937 }
4938 }
4939
4940 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
4941
4942 // Disable interrupts that have occurred after fatal error.
4943 if (device->link[link].fatal_error_occurred)
4944 {
4945 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FATAL_REPORT_EN_0,
4946 report.raw_enable & ~pending);
4947 }
4948
4949 if (report.raw_first & report.mask)
4950 {
4951 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_0,
4952 report.raw_first & report.mask);
4953 }
4954 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_0, pending);
4955
4956 if (unhandled != 0)
4957 {
4958 NVSWITCH_PRINT(device, WARN,
4959 "%s: Unhandled NVLTLC_RX_LNK _0 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n",
4960 __FUNCTION__, link, pending, report.raw_enable);
4961 return -NVL_MORE_PROCESSING_REQUIRED;
4962 }
4963
4964 return NVL_SUCCESS;
4965 }
4966
4967 static NvlStatus
_nvswitch_service_nvltlc_rx_lnk_fatal_1_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)4968 _nvswitch_service_nvltlc_rx_lnk_fatal_1_ls10
4969 (
4970 nvswitch_device *device,
4971 NvU32 nvlipt_instance,
4972 NvU32 link
4973 )
4974 {
4975 NvU32 pending, bit, unhandled;
4976 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
4977 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 };
4978 NvU32 injected;
4979
4980 report.raw_pending = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_1);
4981 report.raw_enable = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FATAL_REPORT_EN_1);
4982 report.mask = report.raw_enable;
4983 pending = report.raw_pending & report.mask;
4984
4985 error_event.nvliptInstance = (NvU8) nvlipt_instance;
4986 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
4987
4988 if (pending == 0)
4989 {
4990 return -NVL_NOT_FOUND;
4991 }
4992
4993 unhandled = pending;
4994 report.raw_first = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_1);
4995 injected = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_REPORT_INJECT_1);
4996
4997 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _RXHDROVFERR, 1);
4998 if (nvswitch_test_flags(pending, bit))
4999 {
5000 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXHDROVFERR, "RX HDR OVF Error", NV_FALSE);
5001 nvswitch_clear_flags(&unhandled, bit);
5002
5003 if (FLD_TEST_DRF_NUM(_NVLTLC, _RX_LNK_ERR_REPORT_INJECT_1, _RXHDROVFERR, 0x0, injected))
5004 {
5005 error_event.error = INFOROM_NVLINK_TLC_RX_HDR_OVERFLOW_FATAL;
5006 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
5007 }
5008 }
5009
5010 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _RXDATAOVFERR, 1);
5011 if (nvswitch_test_flags(pending, bit))
5012 {
5013 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXDATAOVFERR, "RX Data OVF Error", NV_FALSE);
5014 nvswitch_clear_flags(&unhandled, bit);
5015
5016 if (FLD_TEST_DRF_NUM(_NVLTLC, _RX_LNK_ERR_REPORT_INJECT_1, _RXDATAOVFERR, 0x0, injected))
5017 {
5018 error_event.error = INFOROM_NVLINK_TLC_RX_DATA_OVERFLOW_FATAL;
5019 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
5020 }
5021 }
5022
5023 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _STOMPDETERR, 1);
5024 if (nvswitch_test_flags(pending, bit))
5025 {
5026 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_STOMPDETERR, "Stomp Det Error", NV_FALSE);
5027 nvswitch_clear_flags(&unhandled, bit);
5028
5029 if (FLD_TEST_DRF_NUM(_NVLTLC, _RX_LNK_ERR_REPORT_INJECT_1, _STOMPDETERR, 0x0, injected))
5030 {
5031 error_event.error = INFOROM_NVLINK_TLC_RX_STOMP_DETECTED_FATAL;
5032 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
5033 }
5034 }
5035
5036 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _RXPOISONERR, 1);
5037 if (nvswitch_test_flags(pending, bit))
5038 {
5039 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXPOISONERR, "RX Poison Error", NV_FALSE);
5040 nvswitch_clear_flags(&unhandled, bit);
5041 }
5042
5043 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
5044
5045 // Disable interrupts that have occurred after fatal error.
5046 if (device->link[link].fatal_error_occurred)
5047 {
5048 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FATAL_REPORT_EN_1,
5049 report.raw_enable & ~pending);
5050 }
5051
5052 if (report.raw_first & report.mask)
5053 {
5054 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_1,
5055 report.raw_first & report.mask);
5056 }
5057 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_1, pending);
5058
5059 if (unhandled != 0)
5060 {
5061 NVSWITCH_PRINT(device, WARN,
5062 "%s: Unhandled NVLTLC_RX_LNK _1 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n",
5063 __FUNCTION__, link, pending, report.raw_enable);
5064 return -NVL_MORE_PROCESSING_REQUIRED;
5065 }
5066
5067 return NVL_SUCCESS;
5068 }
5069
5070 NvlStatus
_nvswitch_service_nvltlc_fatal_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU64 intrLinkMask)5071 _nvswitch_service_nvltlc_fatal_ls10
5072 (
5073 nvswitch_device *device,
5074 NvU32 nvlipt_instance,
5075 NvU64 intrLinkMask
5076 )
5077 {
5078 NvU64 enabledLinkMask, localLinkMask, localIntrLinkMask;
5079 NvU32 i;
5080 nvlink_link *link;
5081 NvlStatus status = -NVL_MORE_PROCESSING_REQUIRED;
5082
5083 //
5084 // The passed in interruptLinkMask should contain a link that is part of the
5085 // given nvlipt instance
5086 //
5087 enabledLinkMask = nvswitch_get_enabled_link_mask(device);
5088 localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(nvlipt_instance);
5089 localIntrLinkMask = localLinkMask & intrLinkMask & enabledLinkMask;
5090
5091 if (localIntrLinkMask == 0)
5092 {
5093 NVSWITCH_PRINT(device, ERROR, "%s: Bad link mask provided for link interrupt servicing!\n", __FUNCTION__);
5094 NVSWITCH_ASSERT(0);
5095 return -NVL_BAD_ARGS;
5096 }
5097
5098 FOR_EACH_INDEX_IN_MASK(64, i, localIntrLinkMask)
5099 {
5100 link = nvswitch_get_link(device, i);
5101 if (link == NULL)
5102 {
5103 // An interrupt on an invalid link should never occur
5104 NVSWITCH_ASSERT(link != NULL);
5105 continue;
5106 }
5107
5108 if (NVSWITCH_GET_LINK_ENG_INST(device, i, NVLIPT) != nvlipt_instance)
5109 {
5110 NVSWITCH_ASSERT(0);
5111 break;
5112 }
5113
5114 //
5115 // If link is in reset or NCISOC clock is off then
5116 // don't need to check the link for NVLTLC errors
5117 // as the IP's registers are off
5118 //
5119 if (nvswitch_is_link_in_reset(device, link) ||
5120 !nvswitch_are_link_clocks_on_ls10(device, link,NVSWITCH_PER_LINK_CLOCK_SET(NCISOCCLK)))
5121 {
5122 continue;
5123 }
5124
5125 if (_nvswitch_service_nvltlc_tx_sys_fatal_ls10(device, nvlipt_instance, i) == NVL_SUCCESS)
5126 {
5127 status = NVL_SUCCESS;
5128 }
5129
5130 if (_nvswitch_service_nvltlc_rx_sys_fatal_ls10(device, nvlipt_instance, i) == NVL_SUCCESS)
5131 {
5132 status = NVL_SUCCESS;
5133 }
5134
5135 if (_nvswitch_service_nvltlc_tx_lnk_fatal_0_ls10(device, nvlipt_instance, i) == NVL_SUCCESS)
5136 {
5137 status = NVL_SUCCESS;
5138 }
5139
5140 if (_nvswitch_service_nvltlc_rx_lnk_fatal_0_ls10(device, nvlipt_instance, i) == NVL_SUCCESS)
5141 {
5142 status = NVL_SUCCESS;
5143 }
5144
5145 if (_nvswitch_service_nvltlc_rx_lnk_fatal_1_ls10(device, nvlipt_instance, i) == NVL_SUCCESS)
5146 {
5147 status = NVL_SUCCESS;
5148 }
5149 }
5150 FOR_EACH_INDEX_IN_MASK_END;
5151
5152 return status;
5153 }
5154
5155 static NvlStatus
_nvswitch_service_nvlipt_common_fatal_ls10(nvswitch_device * device,NvU32 instance)5156 _nvswitch_service_nvlipt_common_fatal_ls10
5157 (
5158 nvswitch_device *device,
5159 NvU32 instance
5160 )
5161 {
5162 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
5163 NvU32 pending, bit, contain, unhandled;
5164 NvU32 link, local_link_idx;
5165 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 };
5166
5167 report.raw_pending = NVSWITCH_ENG_RD32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_STATUS_0);
5168 report.raw_enable = NVSWITCH_ENG_RD32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_FATAL_REPORT_EN_0);
5169 report.mask = report.raw_enable & (DRF_NUM(_NVLIPT_COMMON, _ERR_STATUS_0, _CLKCTL_ILLEGAL_REQUEST, 1));
5170
5171 pending = report.raw_pending & report.mask;
5172 if (pending == 0)
5173 {
5174 return -NVL_NOT_FOUND;
5175 }
5176
5177 error_event.nvliptInstance = (NvU8) instance;
5178
5179 unhandled = pending;
5180 report.raw_first = NVSWITCH_ENG_RD32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_FIRST_0);
5181 contain = NVSWITCH_ENG_RD32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_CONTAIN_EN_0);
5182
5183 bit = DRF_NUM(_NVLIPT_COMMON, _ERR_STATUS_0, _CLKCTL_ILLEGAL_REQUEST, 1);
5184 if (nvswitch_test_flags(pending, bit))
5185 {
5186 for (local_link_idx = 0; local_link_idx < NVSWITCH_LINKS_PER_NVLIPT_LS10; local_link_idx++)
5187 {
5188 link = (instance * NVSWITCH_LINKS_PER_NVLIPT_LS10) + local_link_idx;
5189 if (nvswitch_is_link_valid(device, link))
5190 {
5191 NVSWITCH_REPORT_CONTAIN(_HW_NVLIPT_CLKCTL_ILLEGAL_REQUEST, "CLKCTL_ILLEGAL_REQUEST", NV_FALSE);
5192 }
5193 }
5194
5195 nvswitch_clear_flags(&unhandled, bit);
5196 {
5197 error_event.error = INFOROM_NVLINK_NVLIPT_CLKCTL_ILLEGAL_REQUEST_FATAL;
5198 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
5199 }
5200 }
5201
5202 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
5203
5204 // Disable interrupts that have occurred after fatal error.
5205 for (local_link_idx = 0; local_link_idx < NVSWITCH_LINKS_PER_NVLIPT_LS10; local_link_idx++)
5206 {
5207 link = (instance * NVSWITCH_LINKS_PER_NVLIPT_LS10) + local_link_idx;
5208 if (nvswitch_is_link_valid(device, link) &&
5209 (device->link[link].fatal_error_occurred))
5210 {
5211 NVSWITCH_ENG_WR32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_FATAL_REPORT_EN_0,
5212 report.raw_enable & ~pending);
5213 break;
5214 }
5215 }
5216
5217 // clear the interrupts
5218 if (report.raw_first & report.mask)
5219 {
5220 NVSWITCH_ENG_WR32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_FIRST_0,
5221 report.raw_first & report.mask);
5222 }
5223 NVSWITCH_ENG_WR32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_STATUS_0, pending);
5224
5225 if (unhandled != 0)
5226 {
5227 NVSWITCH_PRINT(device, WARN,
5228 "%s: Unhandled NVLIPT_COMMON FATAL interrupts, pending: 0x%x enabled: 0x%x.\n",
5229 __FUNCTION__, pending, report.raw_enable);
5230 return -NVL_MORE_PROCESSING_REQUIRED;
5231 }
5232
5233 return NVL_SUCCESS;
5234 }
5235
5236 static NvlStatus
_nvswitch_service_nxbar_tile_ls10(nvswitch_device * device,NvU32 tile)5237 _nvswitch_service_nxbar_tile_ls10
5238 (
5239 nvswitch_device *device,
5240 NvU32 tile
5241 )
5242 {
5243 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
5244 NvU32 pending, bit, unhandled;
5245 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
5246 NvU32 link = tile;
5247
5248 report.raw_pending = NVSWITCH_TILE_RD32(device, tile, _NXBAR_TILE, _ERR_STATUS);
5249 report.raw_enable = NVSWITCH_TILE_RD32(device, tile, _NXBAR_TILE, _ERR_FATAL_INTR_EN);
5250 report.mask = chip_device->intr_mask.tile.fatal;
5251 pending = report.raw_pending & report.mask;
5252
5253 if (pending == 0)
5254 {
5255 return -NVL_NOT_FOUND;
5256 }
5257
5258 unhandled = pending;
5259 report.raw_first = NVSWITCH_TILE_RD32(device, tile, _NXBAR_TILE, _ERR_FIRST);
5260
5261 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_BUFFER_OVERFLOW, 1);
5262 if (nvswitch_test_flags(pending, bit))
5263 {
5264 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_BUFFER_OVERFLOW, "ingress SRC-VC buffer overflow", NV_TRUE);
5265 nvswitch_clear_flags(&unhandled, bit);
5266 }
5267
5268 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_BUFFER_UNDERFLOW, 1);
5269 if (nvswitch_test_flags(pending, bit))
5270 {
5271 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_BUFFER_UNDERFLOW, "ingress SRC-VC buffer underflow", NV_TRUE);
5272 nvswitch_clear_flags(&unhandled, bit);
5273 }
5274
5275 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _EGRESS_CREDIT_OVERFLOW, 1);
5276 if (nvswitch_test_flags(pending, bit))
5277 {
5278 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_EGRESS_CREDIT_OVERFLOW, "egress DST-VC credit overflow", NV_TRUE);
5279 nvswitch_clear_flags(&unhandled, bit);
5280 }
5281
5282 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _EGRESS_CREDIT_UNDERFLOW, 1);
5283 if (nvswitch_test_flags(pending, bit))
5284 {
5285 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_EGRESS_CREDIT_UNDERFLOW, "egress DST-VC credit underflow", NV_TRUE);
5286 nvswitch_clear_flags(&unhandled, bit);
5287 }
5288
5289 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_NON_BURSTY_PKT, 1);
5290 if (nvswitch_test_flags(pending, bit))
5291 {
5292 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_NON_BURSTY_PKT, "ingress packet burst error", NV_TRUE);
5293 nvswitch_clear_flags(&unhandled, bit);
5294 }
5295
5296 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_NON_STICKY_PKT, 1);
5297 if (nvswitch_test_flags(pending, bit))
5298 {
5299 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_NON_STICKY_PKT, "ingress packet sticky error", NV_TRUE);
5300 nvswitch_clear_flags(&unhandled, bit);
5301 }
5302
5303 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_BURST_GT_9_DATA_VC, 1);
5304 if (nvswitch_test_flags(pending, bit))
5305 {
5306 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_BURST_GT_9_DATA_VC, "possible bubbles at ingress", NV_TRUE);
5307 nvswitch_clear_flags(&unhandled, bit);
5308 }
5309
5310 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_PKT_INVALID_DST, 1);
5311 if (nvswitch_test_flags(pending, bit))
5312 {
5313 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_PKT_INVALID_DST, "ingress packet invalid dst error", NV_TRUE);
5314 nvswitch_clear_flags(&unhandled, bit);
5315 }
5316
5317 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_PKT_PARITY_ERROR, 1);
5318 if (nvswitch_test_flags(pending, bit))
5319 {
5320 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_PKT_PARITY_ERROR, "ingress packet parity error", NV_TRUE);
5321 nvswitch_clear_flags(&unhandled, bit);
5322 }
5323
5324 if (report.raw_first & report.mask)
5325 {
5326 NVSWITCH_TILE_WR32(device, tile, _NXBAR_TILE, _ERR_FIRST,
5327 report.raw_first & report.mask);
5328 }
5329
5330 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
5331
5332 // Disable interrupts that have occurred after fatal error.
5333 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
5334 NVSWITCH_TILE_WR32(device, tile, _NXBAR_TILE, _ERR_FATAL_INTR_EN,
5335 report.raw_enable & ~pending);
5336
5337 NVSWITCH_TILE_WR32(device, link, _NXBAR_TILE, _ERR_STATUS, pending);
5338
5339 if (unhandled != 0)
5340 {
5341 return -NVL_MORE_PROCESSING_REQUIRED;
5342 }
5343
5344 return NVL_SUCCESS;
5345 }
5346
5347 static NvlStatus
_nvswitch_service_nxbar_tileout_ls10(nvswitch_device * device,NvU32 tileout)5348 _nvswitch_service_nxbar_tileout_ls10
5349 (
5350 nvswitch_device *device,
5351 NvU32 tileout
5352 )
5353 {
5354 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
5355 NvU32 pending, bit, unhandled;
5356 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
5357 NvU32 link = tileout;
5358
5359 report.raw_pending = NVSWITCH_TILEOUT_RD32(device, tileout, _NXBAR_TILEOUT, _ERR_STATUS);
5360 report.raw_enable = NVSWITCH_TILEOUT_RD32(device, tileout, _NXBAR_TILEOUT, _ERR_FATAL_INTR_EN);
5361 report.mask = chip_device->intr_mask.tileout.fatal;
5362 pending = report.raw_pending & report.mask;
5363
5364 if (pending == 0)
5365 {
5366 return -NVL_NOT_FOUND;
5367 }
5368
5369 unhandled = pending;
5370 report.raw_first = NVSWITCH_TILEOUT_RD32(device, tileout, _NXBAR_TILEOUT, _ERR_FIRST);
5371
5372 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _INGRESS_BUFFER_OVERFLOW, 1);
5373 if (nvswitch_test_flags(pending, bit))
5374 {
5375 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_INGRESS_BUFFER_OVERFLOW, "ingress SRC-VC buffer overflow", NV_TRUE);
5376 nvswitch_clear_flags(&unhandled, bit);
5377 }
5378
5379 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _INGRESS_BUFFER_UNDERFLOW, 1);
5380 if (nvswitch_test_flags(pending, bit))
5381 {
5382 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_INGRESS_BUFFER_UNDERFLOW, "ingress SRC-VC buffer underflow", NV_TRUE);
5383 nvswitch_clear_flags(&unhandled, bit);
5384 }
5385
5386 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _EGRESS_CREDIT_OVERFLOW, 1);
5387 if (nvswitch_test_flags(pending, bit))
5388 {
5389 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_EGRESS_CREDIT_OVERFLOW, "egress DST-VC credit overflow", NV_TRUE);
5390 nvswitch_clear_flags(&unhandled, bit);
5391 }
5392
5393 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _EGRESS_CREDIT_UNDERFLOW, 1);
5394 if (nvswitch_test_flags(pending, bit))
5395 {
5396 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_EGRESS_CREDIT_UNDERFLOW, "egress DST-VC credit underflow", NV_TRUE);
5397 nvswitch_clear_flags(&unhandled, bit);
5398 }
5399
5400 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _INGRESS_NON_BURSTY_PKT, 1);
5401 if (nvswitch_test_flags(pending, bit))
5402 {
5403 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_INGRESS_NON_BURSTY_PKT, "ingress packet burst error", NV_TRUE);
5404 nvswitch_clear_flags(&unhandled, bit);
5405 }
5406
5407 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _INGRESS_NON_STICKY_PKT, 1);
5408 if (nvswitch_test_flags(pending, bit))
5409 {
5410 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_INGRESS_NON_STICKY_PKT, "ingress packet sticky error", NV_TRUE);
5411 nvswitch_clear_flags(&unhandled, bit);
5412 }
5413
5414 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _INGRESS_BURST_GT_9_DATA_VC, 1);
5415 if (nvswitch_test_flags(pending, bit))
5416 {
5417 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_INGRESS_BURST_GT_9_DATA_VC, "possible bubbles at ingress", NV_TRUE);
5418 nvswitch_clear_flags(&unhandled, bit);
5419 }
5420
5421 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _EGRESS_CDT_PARITY_ERROR, 1);
5422 if (nvswitch_test_flags(pending, bit))
5423 {
5424 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_EGRESS_CDT_PARITY_ERROR, "ingress credit parity error", NV_TRUE);
5425 nvswitch_clear_flags(&unhandled, bit);
5426 }
5427
5428 if (report.raw_first & report.mask)
5429 {
5430 NVSWITCH_TILEOUT_WR32(device, tileout, _NXBAR_TILEOUT, _ERR_FIRST,
5431 report.raw_first & report.mask);
5432 }
5433
5434 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
5435
5436 // Disable interrupts that have occurred after fatal error.
5437 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts.
5438 NVSWITCH_TILEOUT_WR32(device, tileout, _NXBAR_TILEOUT, _ERR_FATAL_INTR_EN,
5439 report.raw_enable & ~pending);
5440
5441 NVSWITCH_TILEOUT_WR32(device, tileout, _NXBAR_TILEOUT, _ERR_STATUS, pending);
5442
5443 if (unhandled != 0)
5444 {
5445 return -NVL_MORE_PROCESSING_REQUIRED;
5446 }
5447
5448 return NVL_SUCCESS;
5449 }
5450
5451 static NvlStatus
_nvswitch_service_nxbar_fatal_ls10(nvswitch_device * device,NvU32 nxbar)5452 _nvswitch_service_nxbar_fatal_ls10
5453 (
5454 nvswitch_device *device,
5455 NvU32 nxbar
5456 )
5457 {
5458 NvU32 pending, bit, unhandled;
5459 NvU32 tile_idx;
5460 NvU32 tile, tileout;
5461
5462 pending = NVSWITCH_ENG_RD32(device, NXBAR, , nxbar, _NXBAR, _TCP_ERROR_STATUS);
5463 if (pending == 0)
5464 {
5465 return -NVL_NOT_FOUND;
5466 }
5467
5468 unhandled = pending;
5469
5470 for (tile = 0; tile < NUM_NXBAR_TILES_PER_TC_LS10; tile++)
5471 {
5472 bit = DRF_NUM(_NXBAR, _TCP_ERROR_STATUS, _TILE0, 1) << tile;
5473 if (nvswitch_test_flags(pending, bit))
5474 {
5475 tile_idx = TILE_INDEX_LS10(device, nxbar, tile);
5476 if (NVSWITCH_ENG_VALID_LS10(device, TILE, tile_idx))
5477 {
5478 if (_nvswitch_service_nxbar_tile_ls10(device, tile_idx) == NVL_SUCCESS)
5479 {
5480 nvswitch_clear_flags(&unhandled, bit);
5481 }
5482 }
5483 }
5484 }
5485
5486 for (tileout = 0; tileout < NUM_NXBAR_TILEOUTS_PER_TC_LS10; tileout++)
5487 {
5488 bit = DRF_NUM(_NXBAR, _TCP_ERROR_STATUS, _TILEOUT0, 1) << tileout;
5489 if (nvswitch_test_flags(pending, bit))
5490 {
5491 tile_idx = TILE_INDEX_LS10(device, nxbar, tileout);
5492 if (NVSWITCH_ENG_VALID_LS10(device, TILEOUT, tile_idx))
5493 {
5494 if (_nvswitch_service_nxbar_tileout_ls10(device, tile_idx) == NVL_SUCCESS)
5495 {
5496 nvswitch_clear_flags(&unhandled, bit);
5497 }
5498 }
5499 }
5500 }
5501
5502 // TODO: Perform hot_reset to recover NXBAR
5503
5504 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
5505
5506
5507 if (unhandled != 0)
5508 {
5509 return -NVL_MORE_PROCESSING_REQUIRED;
5510 }
5511
5512 return NVL_SUCCESS;
5513 }
5514
5515 static void
_nvswitch_emit_link_errors_nvldl_fatal_link_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)5516 _nvswitch_emit_link_errors_nvldl_fatal_link_ls10
5517 (
5518 nvswitch_device *device,
5519 NvU32 nvlipt_instance,
5520 NvU32 link
5521 )
5522 {
5523 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
5524 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
5525 NvU32 pending, bit;
5526 INFOROM_NVLINK_ERROR_EVENT error_event;
5527
5528 // Only enabled link errors are deffered
5529 pending = chip_device->deferredLinkErrors[link].data.fatalIntrMask.dl;
5530 report.raw_pending = pending;
5531 report.raw_enable = pending;
5532 report.mask = report.raw_enable;
5533
5534 error_event.nvliptInstance = (NvU8) nvlipt_instance;
5535 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
5536
5537 bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1);
5538 if (nvswitch_test_flags(pending, bit))
5539 {
5540 NVSWITCH_REPORT_FATAL(_HW_DLPL_LTSSM_FAULT_UP, "LTSSM Fault Up", NV_FALSE);
5541 error_event.error = INFOROM_NVLINK_DL_LTSSM_FAULT_UP_FATAL;
5542 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
5543 }
5544
5545 bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_DOWN, 1);
5546 if (nvswitch_test_flags(pending, bit))
5547 {
5548 NVSWITCH_REPORT_FATAL(_HW_DLPL_LTSSM_FAULT_DOWN, "LTSSM Fault Down", NV_FALSE);
5549 }
5550 }
5551
5552 static void
_nvswitch_dump_minion_ali_debug_registers_ls10(nvswitch_device * device,NvU32 link)5553 _nvswitch_dump_minion_ali_debug_registers_ls10
5554 (
5555 nvswitch_device *device,
5556 NvU32 link
5557 )
5558 {
5559 NVSWITCH_MINION_ALI_DEBUG_REGISTERS params;
5560 nvlink_link *nvlink = nvswitch_get_link(device, link);
5561
5562 if ((nvlink != NULL) &&
5563 (nvswitch_minion_get_ali_debug_registers_ls10(device, nvlink, ¶ms) == NVL_SUCCESS))
5564 {
5565 NVSWITCH_PRINT(device, ERROR,
5566 "%s: Minion error on link #%d!:\n"
5567 "Minion DLSTAT MN00 = 0x%x\n"
5568 "Minion DLSTAT UC01 = 0x%x\n"
5569 "Minion DLSTAT UC01 = 0x%x\n",
5570 __FUNCTION__, link,
5571 params.dlstatMn00, params.dlstatUc01, params.dlstatLinkIntr);
5572 }
5573 }
5574
5575 static void
_nvswitch_emit_link_errors_minion_fatal_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)5576 _nvswitch_emit_link_errors_minion_fatal_ls10
5577 (
5578 nvswitch_device *device,
5579 NvU32 nvlipt_instance,
5580 NvU32 link
5581 )
5582 {
5583 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
5584 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
5585 NvU32 regData;
5586 NvU32 enabledLinks;
5587 NvU32 localLinkIdx = NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
5588 NvU32 bit = BIT(localLinkIdx);
5589
5590 if (!chip_device->deferredLinkErrors[link].data.fatalIntrMask.minionLinkIntr.bPending)
5591 {
5592 return;
5593 }
5594
5595 // Grab the cached interrupt data
5596 regData = chip_device->deferredLinkErrors[link].data.fatalIntrMask.minionLinkIntr.regData;
5597
5598 // get all possible interrupting links associated with this minion
5599 report.raw_enable = link;
5600 report.raw_pending = report.raw_enable;
5601 report.mask = report.raw_enable;
5602 report.data[0] = regData;
5603
5604 switch(DRF_VAL(_MINION, _NVLINK_LINK_INTR, _CODE, regData))
5605 {
5606 case NV_MINION_NVLINK_LINK_INTR_CODE_NA:
5607 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link NA interrupt", NV_FALSE);
5608 break;
5609 case NV_MINION_NVLINK_LINK_INTR_CODE_DLCMDFAULT:
5610 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link DLCMDFAULT interrupt", NV_FALSE);
5611 break;
5612 case NV_MINION_NVLINK_LINK_INTR_CODE_NOINIT:
5613 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link NOINIT interrupt", NV_FALSE);
5614 break;
5615 case NV_MINION_NVLINK_LINK_INTR_CODE_LOCAL_CONFIG_ERR:
5616 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link Local-Config-Error interrupt", NV_FALSE);
5617 break;
5618 case NV_MINION_NVLINK_LINK_INTR_CODE_NEGOTIATION_CONFIG_ERR:
5619 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link Negotiation Config Err Interrupt", NV_FALSE);
5620 break;
5621 case NV_MINION_NVLINK_LINK_INTR_CODE_BADINIT:
5622 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link BADINIT interrupt", NV_FALSE);
5623 break;
5624 case NV_MINION_NVLINK_LINK_INTR_CODE_PMFAIL:
5625 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link PMFAIL interrupt", NV_FALSE);
5626 break;
5627 default:
5628 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Interrupt code unknown", NV_FALSE);
5629 }
5630
5631 // Fatal error was hit so disable the interrupt
5632 regData = NVSWITCH_MINION_RD32_LS10(device, nvlipt_instance, _MINION, _MINION_INTR_STALL_EN);
5633 enabledLinks = DRF_VAL(_MINION, _MINION_INTR_STALL_EN, _LINK, regData);
5634 enabledLinks &= ~bit;
5635 regData = DRF_NUM(_MINION, _MINION_INTR_STALL_EN, _LINK, enabledLinks);
5636 NVSWITCH_MINION_LINK_WR32_LS10(device, link, _MINION, _MINION_INTR_STALL_EN, regData);
5637
5638 _nvswitch_dump_minion_ali_debug_registers_ls10(device, link);
5639 }
5640
5641 static void
_nvswitch_emit_link_errors_minion_nonfatal_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)5642 _nvswitch_emit_link_errors_minion_nonfatal_ls10
5643 (
5644 nvswitch_device *device,
5645 NvU32 nvlipt_instance,
5646 NvU32 link
5647 )
5648 {
5649 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
5650 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
5651 NvU32 regData;
5652 NvU32 localLinkIdx = NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
5653 NvU32 bit = BIT(localLinkIdx);
5654
5655 if (!chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.minionLinkIntr.bPending)
5656 {
5657 return;
5658 }
5659
5660 // read in the enaled minion interrupts on this minion
5661 regData = NVSWITCH_MINION_RD32_LS10(device, nvlipt_instance, _MINION, _MINION_INTR_STALL_EN);
5662
5663 // Grab the cached interrupt data
5664 regData = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.minionLinkIntr.regData;
5665
5666 // get all possible interrupting links associated with this minion
5667 report.raw_enable = link;
5668 report.raw_pending = report.raw_enable;
5669 report.mask = report.raw_enable;
5670 report.data[0] = regData;
5671
5672 switch(DRF_VAL(_MINION, _NVLINK_LINK_INTR, _CODE, regData))
5673 {
5674 case NV_MINION_NVLINK_LINK_INTR_CODE_DLREQ:
5675 NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link DLREQ interrupt");
5676 break;
5677 case NV_MINION_NVLINK_LINK_INTR_CODE_PMDISABLED:
5678 NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link PMDISABLED interrupt");
5679 break;
5680 case NV_MINION_NVLINK_LINK_INTR_CODE_DLCMDFAULT:
5681 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link DLCMDFAULT interrupt", NV_FALSE);
5682 break;
5683 case NV_MINION_NVLINK_LINK_INTR_CODE_TLREQ:
5684 NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link TLREQ interrupt");
5685 break;
5686 }
5687
5688 _nvswitch_dump_minion_ali_debug_registers_ls10(device, link);
5689 }
5690
5691 static void
_nvswitch_emit_link_errors_nvldl_nonfatal_link_ls10(nvswitch_device * device,NvU32 link)5692 _nvswitch_emit_link_errors_nvldl_nonfatal_link_ls10
5693 (
5694 nvswitch_device *device,
5695 NvU32 link
5696 )
5697 {
5698 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
5699 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
5700 NvU32 pending, bit, reg;
5701
5702 // Only enabled link errors are deffered
5703 pending = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.dl;
5704 report.raw_pending = pending;
5705 report.raw_enable = pending;
5706 report.mask = report.raw_enable;
5707
5708 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_SHORT_ERROR_RATE, 1);
5709 if (nvswitch_test_flags(pending, bit))
5710 {
5711 // Disable further interrupts
5712 nvlink_link *nvlink = nvswitch_get_link(device, link);
5713 if (nvlink == NULL)
5714 {
5715 // If we get here, it is a bug. Disable interrupt and assert.
5716 reg = NVSWITCH_LINK_RD32_LS10(device, link, NVLDL, _NVLDL_TOP, _INTR_NONSTALL_EN);
5717 reg = FLD_SET_DRF_NUM(_NVLDL_TOP, _INTR_NONSTALL_EN, _RX_SHORT_ERROR_RATE, 0, reg);
5718 NVSWITCH_LINK_WR32_LS10(device, link, NVLDL, _NVLDL_TOP, _INTR_NONSTALL_EN, reg);
5719 NVSWITCH_ASSERT(nvlink != NULL);
5720 }
5721 else
5722 {
5723 nvlink->errorThreshold.bInterruptTrigerred = NV_TRUE;
5724 nvswitch_configure_error_rate_threshold_interrupt_ls10(nvlink, NV_FALSE);
5725 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_RX_SHORT_ERROR_RATE, "RX Short Error Rate");
5726 }
5727 }
5728
5729 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_CRC_COUNTER, 1);
5730 if (nvswitch_test_flags(pending, bit))
5731 {
5732 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_RX_CRC_COUNTER, "RX CRC Error Rate");
5733 }
5734 }
5735
5736 static void
_nvswitch_emit_link_errors_nvltlc_rx_lnk_nonfatal_1_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)5737 _nvswitch_emit_link_errors_nvltlc_rx_lnk_nonfatal_1_ls10
5738 (
5739 nvswitch_device *device,
5740 NvU32 nvlipt_instance,
5741 NvU32 link
5742 )
5743 {
5744 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
5745 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
5746 NvU32 pending, bit;
5747 INFOROM_NVLINK_ERROR_EVENT error_event;
5748 NvU32 injected;
5749
5750 // Only enabled link errors are deffered
5751 pending = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1;
5752 injected = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1Injected;
5753 report.raw_pending = pending;
5754 report.raw_enable = pending;
5755 report.mask = report.raw_enable;
5756
5757 error_event.nvliptInstance = (NvU8) nvlipt_instance;
5758 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
5759
5760
5761 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _HEARTBEAT_TIMEOUT_ERR, 1);
5762 if (nvswitch_test_flags(pending, bit))
5763 {
5764 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_RX_LNK_AN1_HEARTBEAT_TIMEOUT_ERR, "AN1 Heartbeat Timeout Error");
5765
5766 if (FLD_TEST_DRF_NUM(_NVLTLC_RX_LNK, _ERR_REPORT_INJECT_1, _HEARTBEAT_TIMEOUT_ERR, 0x0, injected))
5767 {
5768 error_event.error = INFOROM_NVLINK_TLC_RX_AN1_HEARTBEAT_TIMEOUT_NONFATAL;
5769 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
5770 }
5771 }
5772 }
5773
5774 static void
_nvswitch_emit_link_errors_nvlipt_lnk_nonfatal_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)5775 _nvswitch_emit_link_errors_nvlipt_lnk_nonfatal_ls10
5776 (
5777 nvswitch_device *device,
5778 NvU32 nvlipt_instance,
5779 NvU32 link
5780 )
5781 {
5782 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
5783 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
5784 NvU32 pending, bit;
5785 INFOROM_NVLINK_ERROR_EVENT error_event;
5786
5787 // Only enabled link errors are deffered
5788 pending = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.liptLnk;
5789 report.raw_pending = pending;
5790 report.raw_enable = pending;
5791 report.mask = report.raw_enable;
5792
5793 error_event.nvliptInstance = (NvU8) nvlipt_instance;
5794 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
5795
5796 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _FAILEDMINIONREQUEST, 1);
5797 if (nvswitch_test_flags(pending, bit))
5798 {
5799 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_FAILEDMINIONREQUEST, "_FAILEDMINIONREQUEST");
5800
5801 {
5802 error_event.error = INFOROM_NVLINK_NVLIPT_FAILED_MINION_REQUEST_NONFATAL;
5803 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
5804 }
5805 }
5806 }
5807
5808 static void
_nvswitch_emit_deferred_link_errors_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)5809 _nvswitch_emit_deferred_link_errors_ls10
5810 (
5811 nvswitch_device *device,
5812 NvU32 nvlipt_instance,
5813 NvU32 link
5814 )
5815 {
5816 _nvswitch_emit_link_errors_nvldl_fatal_link_ls10(device, nvlipt_instance, link);
5817 _nvswitch_emit_link_errors_nvldl_nonfatal_link_ls10(device, link);
5818 _nvswitch_emit_link_errors_nvltlc_rx_lnk_nonfatal_1_ls10(device, nvlipt_instance, link);
5819 _nvswitch_emit_link_errors_nvlipt_lnk_nonfatal_ls10(device, nvlipt_instance, link);
5820 _nvswitch_emit_link_errors_minion_fatal_ls10(device, nvlipt_instance, link);
5821 _nvswitch_emit_link_errors_minion_nonfatal_ls10(device, nvlipt_instance, link);
5822
5823 }
5824
5825 static void
_nvswitch_clear_deferred_link_errors_ls10(nvswitch_device * device,NvU32 link)5826 _nvswitch_clear_deferred_link_errors_ls10
5827 (
5828 nvswitch_device *device,
5829 NvU32 link
5830 )
5831 {
5832 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
5833 NVLINK_LINK_ERROR_REPORTING_DATA *pLinkErrorsData;
5834
5835 pLinkErrorsData = &chip_device->deferredLinkErrors[link].data;
5836
5837 nvswitch_os_memset(pLinkErrorsData, 0, sizeof(NVLINK_LINK_ERROR_REPORTING_DATA));
5838 }
5839
5840 static void
_nvswitch_deferred_link_state_check_ls10(nvswitch_device * device,void * fn_args)5841 _nvswitch_deferred_link_state_check_ls10
5842 (
5843 nvswitch_device *device,
5844 void *fn_args
5845 )
5846 {
5847 NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams =
5848 (NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS*)fn_args;
5849 NvU32 nvlipt_instance = pErrorReportParams->nvlipt_instance;
5850 NvU32 link = pErrorReportParams->link;
5851 ls10_device *chip_device;
5852 NvU64 lastLinkUpTime;
5853 NvU64 lastRetrainTime;
5854 NvU64 current_time = nvswitch_os_get_platform_time();
5855 NvBool bRedeferLinkStateCheck;
5856
5857 chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
5858 lastLinkUpTime = chip_device->deferredLinkErrors[link].state.lastLinkUpTime;
5859 lastRetrainTime = chip_device->deferredLinkErrors[link].state.lastRetrainTime;
5860 // Sanity Check
5861 NVSWITCH_ASSERT(nvswitch_is_link_valid(device, link));
5862
5863 nvswitch_os_free(pErrorReportParams);
5864 pErrorReportParams = NULL;
5865 chip_device->deferredLinkErrors[link].state.bLinkStateCallBackEnabled = NV_FALSE;
5866 bRedeferLinkStateCheck = NV_FALSE;
5867
5868 // Ask CCI if link state check should be futher deferred
5869 if (cciIsLinkManaged(device, link) && !cciReportLinkErrors(device, link))
5870 {
5871 bRedeferLinkStateCheck = NV_TRUE;
5872 }
5873
5874 // Link came up after last retrain
5875 if (lastLinkUpTime >= lastRetrainTime)
5876 {
5877 return;
5878 }
5879
5880 //
5881 // If the last time this link was up was before the last
5882 // reset_and_drain execution and not enough time has past since the last
5883 // retrain then schedule another callback.
5884 //
5885 if (lastLinkUpTime < lastRetrainTime)
5886 {
5887 if ((current_time - lastRetrainTime) < NVSWITCH_DEFERRED_LINK_STATE_CHECK_INTERVAL_NS)
5888 {
5889 bRedeferLinkStateCheck = NV_TRUE;
5890 }
5891 }
5892
5893 if (bRedeferLinkStateCheck)
5894 {
5895 nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
5896 return;
5897 }
5898
5899 //
5900 // Otherwise, the link hasn't retrained within the timeout so emit the
5901 // deferred errors.
5902 //
5903 _nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link);
5904 _nvswitch_clear_deferred_link_errors_ls10(device, link);
5905 }
5906
5907 void
nvswitch_create_deferred_link_state_check_task_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)5908 nvswitch_create_deferred_link_state_check_task_ls10
5909 (
5910 nvswitch_device *device,
5911 NvU32 nvlipt_instance,
5912 NvU32 link
5913 )
5914 {
5915 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
5916 NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams;
5917 NvlStatus status;
5918
5919 if (chip_device->deferredLinkErrors[link].state.bLinkStateCallBackEnabled)
5920 {
5921 return;
5922 }
5923
5924 status = NVL_ERR_GENERIC;
5925 pErrorReportParams = nvswitch_os_malloc(sizeof(NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS));
5926 if(pErrorReportParams != NULL)
5927 {
5928 pErrorReportParams->nvlipt_instance = nvlipt_instance;
5929 pErrorReportParams->link = link;
5930
5931 status = nvswitch_task_create_args(device, (void*)pErrorReportParams,
5932 &_nvswitch_deferred_link_state_check_ls10,
5933 NVSWITCH_DEFERRED_LINK_STATE_CHECK_INTERVAL_NS,
5934 NVSWITCH_TASK_TYPE_FLAGS_RUN_ONCE |
5935 NVSWITCH_TASK_TYPE_FLAGS_VOID_PTR_ARGS);
5936 }
5937
5938 if (status == NVL_SUCCESS)
5939 {
5940 chip_device->deferredLinkErrors[link].state.bLinkStateCallBackEnabled = NV_TRUE;
5941 }
5942 else
5943 {
5944 NVSWITCH_PRINT(device, ERROR,
5945 "%s: Failed to allocate memory. Cannot defer link state check.\n",
5946 __FUNCTION__);
5947 _nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link);
5948 _nvswitch_clear_deferred_link_errors_ls10(device, link);
5949 nvswitch_os_free(pErrorReportParams);
5950 }
5951 }
5952
5953 static void
_nvswitch_deferred_link_errors_check_ls10(nvswitch_device * device,void * fn_args)5954 _nvswitch_deferred_link_errors_check_ls10
5955 (
5956 nvswitch_device *device,
5957 void *fn_args
5958 )
5959 {
5960 NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams =
5961 (NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS*)fn_args;
5962 NvU32 nvlipt_instance = pErrorReportParams->nvlipt_instance;
5963 NvU32 link = pErrorReportParams->link;
5964 ls10_device *chip_device;
5965 NvU32 pending;
5966
5967 nvswitch_os_free(pErrorReportParams);
5968 pErrorReportParams = NULL;
5969
5970 chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
5971 chip_device->deferredLinkErrors[link].state.bLinkErrorsCallBackEnabled = NV_FALSE;
5972
5973 pending = chip_device->deferredLinkErrors[link].data.fatalIntrMask.dl;
5974
5975 // A link fault was observed which means we also did the retrain and
5976 // scheduled a state check task. We can exit.
5977 if (FLD_TEST_DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1U, pending))
5978 return;
5979
5980 if (FLD_TEST_DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_DOWN, 1U, pending))
5981 return;
5982
5983 //
5984 // No link fault, emit the deferred errors.
5985 // It is assumed that this callback runs long before a link could have been
5986 // retrained and hit errors again.
5987 //
5988 _nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link);
5989 _nvswitch_clear_deferred_link_errors_ls10(device, link);
5990 }
5991
5992 static void
_nvswitch_create_deferred_link_errors_task_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)5993 _nvswitch_create_deferred_link_errors_task_ls10
5994 (
5995 nvswitch_device *device,
5996 NvU32 nvlipt_instance,
5997 NvU32 link
5998 )
5999 {
6000 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
6001 NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams;
6002 NvlStatus status;
6003
6004 if (chip_device->deferredLinkErrors[link].state.bLinkErrorsCallBackEnabled)
6005 {
6006 return;
6007 }
6008
6009 status = NVL_ERR_GENERIC;
6010 pErrorReportParams = nvswitch_os_malloc(sizeof(NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS));
6011 if(pErrorReportParams != NULL)
6012 {
6013 pErrorReportParams->nvlipt_instance = nvlipt_instance;
6014 pErrorReportParams->link = link;
6015
6016 status = nvswitch_task_create_args(device, (void*)pErrorReportParams,
6017 &_nvswitch_deferred_link_errors_check_ls10,
6018 NVSWITCH_DEFERRED_FAULT_UP_CHECK_INTERVAL_NS,
6019 NVSWITCH_TASK_TYPE_FLAGS_RUN_ONCE |
6020 NVSWITCH_TASK_TYPE_FLAGS_VOID_PTR_ARGS);
6021 }
6022
6023 if (status == NVL_SUCCESS)
6024 {
6025 chip_device->deferredLinkErrors[link].state.bLinkErrorsCallBackEnabled = NV_TRUE;
6026 }
6027 else
6028 {
6029 NVSWITCH_PRINT(device, ERROR,
6030 "%s: Failed to create task. Cannot defer link error check.\n",
6031 __FUNCTION__);
6032 _nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link);
6033 _nvswitch_clear_deferred_link_errors_ls10(device, link);
6034 nvswitch_os_free(pErrorReportParams);
6035 }
6036 }
6037
6038 static NvlStatus
_nvswitch_service_nvldl_nonfatal_link_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)6039 _nvswitch_service_nvldl_nonfatal_link_ls10
6040 (
6041 nvswitch_device *device,
6042 NvU32 nvlipt_instance,
6043 NvU32 link
6044 )
6045 {
6046 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
6047 NvU32 pending, bit, unhandled;
6048 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
6049
6050 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLDL, _NVLDL_TOP, _INTR);
6051 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLDL, _NVLDL_TOP, _INTR_NONSTALL_EN);
6052 report.mask = report.raw_enable;
6053 pending = report.raw_pending & report.mask;
6054
6055 if (pending == 0)
6056 {
6057 return -NVL_NOT_FOUND;
6058 }
6059
6060 unhandled = pending;
6061
6062 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_REPLAY, 1);
6063 if (nvswitch_test_flags(pending, bit))
6064 {
6065 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_TX_REPLAY, "TX Replay Error");
6066 nvswitch_clear_flags(&unhandled, bit);
6067 }
6068
6069 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_RECOVERY_SHORT, 1);
6070 if (nvswitch_test_flags(pending, bit))
6071 {
6072 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_TX_RECOVERY_SHORT, "TX Recovery Short");
6073 nvswitch_clear_flags(&unhandled, bit);
6074 }
6075
6076 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_SHORT_ERROR_RATE, 1);
6077 if (nvswitch_test_flags(pending, bit))
6078 {
6079 chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.dl |= bit;
6080 _nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
6081 nvswitch_clear_flags(&unhandled, bit);
6082 }
6083
6084 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_LONG_ERROR_RATE, 1);
6085 if (nvswitch_test_flags(pending, bit))
6086 {
6087 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_RX_LONG_ERROR_RATE, "RX Long Error Rate");
6088 nvswitch_clear_flags(&unhandled, bit);
6089 }
6090
6091 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_ILA_TRIGGER, 1);
6092 if (nvswitch_test_flags(pending, bit))
6093 {
6094 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_RX_ILA_TRIGGER, "RX ILA Trigger");
6095 nvswitch_clear_flags(&unhandled, bit);
6096 }
6097
6098 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_CRC_COUNTER, 1);
6099 if (nvswitch_test_flags(pending, bit))
6100 {
6101
6102 chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.dl |= bit;
6103 _nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
6104 nvswitch_clear_flags(&unhandled, bit);
6105
6106 //
6107 // Mask CRC counter after first occurrance - otherwise, this interrupt
6108 // will continue to fire once the CRC counter has hit the threshold
6109 // See Bug 3341528
6110 //
6111 report.raw_enable = report.raw_enable & (~bit);
6112 NVSWITCH_LINK_WR32(device, link, NVLDL, _NVLDL_TOP, _INTR_NONSTALL_EN,
6113 report.raw_enable);
6114 }
6115
6116 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
6117
6118 NVSWITCH_LINK_WR32(device, link, NVLDL, _NVLDL_TOP, _INTR, pending);
6119
6120 if (unhandled != 0)
6121 {
6122 NVSWITCH_PRINT(device, WARN,
6123 "%s: Unhandled NVLDL nonfatal interrupts, link: %d pending: 0x%x enabled: 0x%x.\n",
6124 __FUNCTION__, link, pending, report.raw_enable);
6125 return -NVL_MORE_PROCESSING_REQUIRED;
6126 }
6127
6128 return NVL_SUCCESS;
6129 }
6130
6131 static NvlStatus
_nvswitch_service_nvldl_nonfatal_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU64 intrLinkMask)6132 _nvswitch_service_nvldl_nonfatal_ls10
6133 (
6134 nvswitch_device *device,
6135 NvU32 nvlipt_instance,
6136 NvU64 intrLinkMask
6137 )
6138 {
6139 NvU64 localLinkMask, enabledLinkMask, localIntrLinkMask;
6140 NvU32 i;
6141 nvlink_link *link;
6142 NvlStatus status;
6143 NvlStatus return_status = -NVL_NOT_FOUND;
6144 NvU32 clocksMask = NVSWITCH_PER_LINK_CLOCK_SET(RXCLK) | NVSWITCH_PER_LINK_CLOCK_SET(TXCLK);
6145
6146 //
6147 // The passed in interruptLinkMask should contain a link that is part of the
6148 // given nvlipt instance
6149 //
6150 localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(nvlipt_instance);
6151 enabledLinkMask = nvswitch_get_enabled_link_mask(device);
6152 localIntrLinkMask = localLinkMask & intrLinkMask & enabledLinkMask;
6153
6154 if (localIntrLinkMask == 0)
6155 {
6156 NVSWITCH_PRINT(device, ERROR, "%s: Bad link mask provided for link interrupt servicing!\n", __FUNCTION__);
6157 NVSWITCH_ASSERT(0);
6158 return -NVL_BAD_ARGS;
6159 }
6160
6161 FOR_EACH_INDEX_IN_MASK(64, i, localIntrLinkMask)
6162 {
6163 link = nvswitch_get_link(device, i);
6164 if (link == NULL)
6165 {
6166 // An interrupt on an invalid link should never occur
6167 NVSWITCH_ASSERT(link != NULL);
6168 continue;
6169 }
6170
6171 if (NVSWITCH_GET_LINK_ENG_INST(device, i, NVLIPT) != nvlipt_instance)
6172 {
6173 NVSWITCH_ASSERT(0);
6174 break;
6175 }
6176
6177 if (nvswitch_is_link_in_reset(device, link) ||
6178 !nvswitch_are_link_clocks_on_ls10(device, link, clocksMask))
6179 {
6180 continue;
6181 }
6182
6183 status = _nvswitch_service_nvldl_nonfatal_link_ls10(device, nvlipt_instance, i);
6184 if (status != NVL_SUCCESS)
6185 {
6186 return_status = status;
6187 }
6188 }
6189 FOR_EACH_INDEX_IN_MASK_END;
6190
6191 return return_status;
6192 }
6193
6194 static NvlStatus
_nvswitch_service_nvltlc_rx_lnk_nonfatal_0_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)6195 _nvswitch_service_nvltlc_rx_lnk_nonfatal_0_ls10
6196 (
6197 nvswitch_device *device,
6198 NvU32 nvlipt_instance,
6199 NvU32 link
6200 )
6201 {
6202 NvU32 pending, bit, unhandled;
6203 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
6204 INFOROM_NVLINK_ERROR_EVENT error_event;
6205
6206 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_0);
6207 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_NON_FATAL_REPORT_EN_0);
6208 report.mask = report.raw_enable;
6209
6210 error_event.nvliptInstance = (NvU8) nvlipt_instance;
6211 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
6212
6213 pending = report.raw_pending & report.mask;
6214 if (pending == 0)
6215 {
6216 return -NVL_NOT_FOUND;
6217 }
6218
6219 unhandled = pending;
6220 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_0);
6221
6222 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXRSPSTATUS_PRIV_ERR, 1);
6223 if (nvswitch_test_flags(pending, bit))
6224 {
6225 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_RX_LNK_RXRSPSTATUS_PRIV_ERR, "RX Rsp Status PRIV Error");
6226 nvswitch_clear_flags(&unhandled, bit);
6227 {
6228 error_event.error = INFOROM_NVLINK_TLC_RX_RSP_STATUS_PRIV_ERR_NONFATAL;
6229 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
6230 }
6231 }
6232
6233 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
6234
6235 if (report.raw_first & report.mask)
6236 {
6237 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_0,
6238 report.raw_first & report.mask);
6239 }
6240 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_0, pending);
6241
6242 if (unhandled != 0)
6243 {
6244 NVSWITCH_PRINT(device, WARN,
6245 "%s: Unhandled NVLTLC_RX_LNK _0 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n",
6246 __FUNCTION__, link, pending, report.raw_enable);
6247 return -NVL_MORE_PROCESSING_REQUIRED;
6248 }
6249
6250 return NVL_SUCCESS;
6251 }
6252
6253 static NvlStatus
_nvswitch_service_nvltlc_tx_lnk_nonfatal_0_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)6254 _nvswitch_service_nvltlc_tx_lnk_nonfatal_0_ls10
6255 (
6256 nvswitch_device *device,
6257 NvU32 nvlipt_instance,
6258 NvU32 link
6259 )
6260 {
6261 NvU32 pending, bit, unhandled;
6262 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
6263 INFOROM_NVLINK_ERROR_EVENT error_event;
6264
6265 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_0);
6266 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_NON_FATAL_REPORT_EN_0);
6267 report.mask = report.raw_enable;
6268 pending = report.raw_pending & report.mask;
6269
6270 error_event.nvliptInstance = (NvU8) nvlipt_instance;
6271 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
6272
6273 if (pending == 0)
6274 {
6275 return -NVL_NOT_FOUND;
6276 }
6277
6278 unhandled = pending;
6279 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_0);
6280
6281 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _CREQ_RAM_DAT_ECC_DBE_ERR, 1);
6282 if (nvswitch_test_flags(pending, bit))
6283 {
6284 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_CREQ_RAM_DAT_ECC_DBE_ERR, "CREQ RAM DAT ECC DBE Error");
6285 nvswitch_clear_flags(&unhandled, bit);
6286 {
6287 // TODO 3014908 log these in the NVL object until we have ECC object support
6288 error_event.error = INFOROM_NVLINK_TLC_TX_CREQ_DAT_RAM_ECC_DBE_NONFATAL;
6289 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
6290 }
6291 }
6292
6293 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _CREQ_RAM_ECC_LIMIT_ERR, 1);
6294 if (nvswitch_test_flags(pending, bit))
6295 {
6296 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_CREQ_RAM_ECC_LIMIT_ERR, "CREQ RAM DAT ECC Limit Error");
6297 nvswitch_clear_flags(&unhandled, bit);
6298 }
6299
6300 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP_RAM_DAT_ECC_DBE_ERR, 1);
6301 if (nvswitch_test_flags(pending, bit))
6302 {
6303 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_RSP_RAM_DAT_ECC_DBE_ERR, "Response RAM DAT ECC DBE Error");
6304 nvswitch_clear_flags(&unhandled, bit);
6305 }
6306
6307 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP_RAM_ECC_LIMIT_ERR, 1);
6308 if (nvswitch_test_flags(pending, bit))
6309 {
6310 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_RSP_RAM_ECC_LIMIT_ERR, "Response RAM ECC Limit Error");
6311 nvswitch_clear_flags(&unhandled, bit);
6312 }
6313
6314 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _COM_RAM_DAT_ECC_DBE_ERR, 1);
6315 if (nvswitch_test_flags(pending, bit))
6316 {
6317 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_COM_RAM_DAT_ECC_DBE_ERR, "COM RAM DAT ECC DBE Error");
6318 nvswitch_clear_flags(&unhandled, bit);
6319 {
6320 // TODO 3014908 log these in the NVL object until we have ECC object support
6321 error_event.error = INFOROM_NVLINK_TLC_TX_COM_DAT_RAM_ECC_DBE_NONFATAL;
6322 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
6323 }
6324 }
6325
6326 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _COM_RAM_ECC_LIMIT_ERR, 1);
6327 if (nvswitch_test_flags(pending, bit))
6328 {
6329 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_COM_RAM_ECC_LIMIT_ERR, "COM RAM ECC Limit Error");
6330 nvswitch_clear_flags(&unhandled, bit);
6331 }
6332
6333 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP1_RAM_ECC_LIMIT_ERR, 1);
6334 if (nvswitch_test_flags(pending, bit))
6335 {
6336 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_RSP1_RAM_ECC_LIMIT_ERR, "RSP1 RAM ECC Limit Error");
6337 nvswitch_clear_flags(&unhandled, bit);
6338 }
6339
6340 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
6341
6342 // Disable interrupts that have occurred after fatal error.
6343 if (device->link[link].fatal_error_occurred)
6344 {
6345 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_NON_FATAL_REPORT_EN_0,
6346 report.raw_enable & ~pending);
6347 }
6348
6349 if (report.raw_first & report.mask)
6350 {
6351 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_0,
6352 report.raw_first & report.mask);
6353 }
6354 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_0, pending);
6355
6356 if (unhandled != 0)
6357 {
6358 NVSWITCH_PRINT(device, WARN,
6359 "%s: Unhandled NVLTLC_TX_LNK _0 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n",
6360 __FUNCTION__, link, pending, report.raw_enable);
6361 return -NVL_MORE_PROCESSING_REQUIRED;
6362 }
6363
6364 return NVL_SUCCESS;
6365 }
6366
6367 static NvlStatus
_nvswitch_service_nvltlc_rx_lnk_nonfatal_1_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)6368 _nvswitch_service_nvltlc_rx_lnk_nonfatal_1_ls10
6369 (
6370 nvswitch_device *device,
6371 NvU32 nvlipt_instance,
6372 NvU32 link
6373 )
6374 {
6375 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
6376 NvU32 pending, bit, unhandled;
6377 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
6378 NvU32 injected;
6379
6380 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_1);
6381 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_NON_FATAL_REPORT_EN_1);
6382 report.mask = report.raw_enable;
6383 pending = report.raw_pending & report.mask;
6384
6385 if (pending == 0)
6386 {
6387 return -NVL_NOT_FOUND;
6388 }
6389
6390 unhandled = pending;
6391 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_1);
6392 injected = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_REPORT_INJECT_1);
6393
6394 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _HEARTBEAT_TIMEOUT_ERR, 1);
6395 if (nvswitch_test_flags(pending, bit))
6396 {
6397 chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1 |= bit;
6398 chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1Injected |= injected;
6399 _nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
6400
6401 if (FLD_TEST_DRF_NUM(_NVLTLC_RX_LNK, _ERR_REPORT_INJECT_1, _HEARTBEAT_TIMEOUT_ERR, 0x0, injected))
6402 {
6403 //
6404 // WAR Bug 200627368: Mask off HBTO to avoid a storm
6405 // During the start of reset_and_drain, all links on the GPU
6406 // will go into contain, causing HBTO on other switch links connected
6407 // to that GPU. For the switch side, these interrupts are not fatal,
6408 // but until we get to reset_and_drain for this link, HBTO will continue
6409 // to fire repeatedly. After reset_and_drain, HBTO will be re-enabled
6410 // by MINION after links are trained.
6411 //
6412 report.raw_enable = report.raw_enable & (~bit);
6413 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_NON_FATAL_REPORT_EN_1,
6414 report.raw_enable);
6415 }
6416 nvswitch_clear_flags(&unhandled, bit);
6417 }
6418
6419 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
6420
6421 // Disable interrupts that have occurred after fatal error.
6422 if (device->link[link].fatal_error_occurred)
6423 {
6424 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_NON_FATAL_REPORT_EN_1,
6425 report.raw_enable & (~pending));
6426 }
6427
6428 if (report.raw_first & report.mask)
6429 {
6430 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_1,
6431 report.raw_first & report.mask);
6432 }
6433 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_1, pending);
6434
6435 if (unhandled != 0)
6436 {
6437 NVSWITCH_PRINT(device, WARN,
6438 "%s: Unhandled NVLTLC_RX_LNK _1 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n",
6439 __FUNCTION__, link, pending, report.raw_enable);
6440 return -NVL_MORE_PROCESSING_REQUIRED;
6441 }
6442
6443 return NVL_SUCCESS;
6444 }
6445
6446 static NvlStatus
_nvswitch_service_nvltlc_tx_lnk_nonfatal_1_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)6447 _nvswitch_service_nvltlc_tx_lnk_nonfatal_1_ls10
6448 (
6449 nvswitch_device *device,
6450 NvU32 nvlipt_instance,
6451 NvU32 link
6452 )
6453 {
6454 NvU32 pending, bit, unhandled;
6455 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
6456 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 };
6457
6458 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_1);
6459 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_NON_FATAL_REPORT_EN_1);
6460 report.mask = report.raw_enable;
6461 pending = report.raw_pending & report.mask;
6462
6463 error_event.nvliptInstance = (NvU8) nvlipt_instance;
6464 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
6465
6466 if (pending == 0)
6467 {
6468 return -NVL_NOT_FOUND;
6469 }
6470
6471 unhandled = pending;
6472 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_1);
6473
6474 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC0, 1);
6475 if (nvswitch_test_flags(pending, bit))
6476 {
6477 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC0, "AN1 Timeout VC0");
6478 nvswitch_clear_flags(&unhandled, bit);
6479 {
6480 error_event.error = INFOROM_NVLINK_TLC_TX_AN1_TIMEOUT_VC0_NONFATAL;
6481 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
6482 }
6483 }
6484
6485 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC1, 1);
6486 if (nvswitch_test_flags(pending, bit))
6487 {
6488 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC1, "AN1 Timeout VC1");
6489 nvswitch_clear_flags(&unhandled, bit);
6490 {
6491 error_event.error = INFOROM_NVLINK_TLC_TX_AN1_TIMEOUT_VC1_NONFATAL;
6492 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
6493 }
6494 }
6495
6496 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC2, 1);
6497 if (nvswitch_test_flags(pending, bit))
6498 {
6499 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC2, "AN1 Timeout VC2");
6500 nvswitch_clear_flags(&unhandled, bit);
6501 {
6502 error_event.error = INFOROM_NVLINK_TLC_TX_AN1_TIMEOUT_VC2_NONFATAL;
6503 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
6504 }
6505 }
6506
6507 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC3, 1);
6508 if (nvswitch_test_flags(pending, bit))
6509 {
6510 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC3, "AN1 Timeout VC3");
6511 nvswitch_clear_flags(&unhandled, bit);
6512 {
6513 error_event.error = INFOROM_NVLINK_TLC_TX_AN1_TIMEOUT_VC3_NONFATAL;
6514 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
6515 }
6516 }
6517
6518 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC4, 1);
6519 if (nvswitch_test_flags(pending, bit))
6520 {
6521 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC4, "AN1 Timeout VC4");
6522 nvswitch_clear_flags(&unhandled, bit);
6523 {
6524 error_event.error = INFOROM_NVLINK_TLC_TX_AN1_TIMEOUT_VC4_NONFATAL;
6525 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
6526 }
6527 }
6528
6529 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC5, 1);
6530 if (nvswitch_test_flags(pending, bit))
6531 {
6532 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC5, "AN1 Timeout VC5");
6533 nvswitch_clear_flags(&unhandled, bit);
6534 {
6535 error_event.error = INFOROM_NVLINK_TLC_TX_AN1_TIMEOUT_VC5_NONFATAL;
6536 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
6537 }
6538 }
6539
6540 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC6, 1);
6541 if (nvswitch_test_flags(pending, bit))
6542 {
6543 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC6, "AN1 Timeout VC6");
6544 nvswitch_clear_flags(&unhandled, bit);
6545 {
6546 error_event.error = INFOROM_NVLINK_TLC_TX_AN1_TIMEOUT_VC6_NONFATAL;
6547 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
6548 }
6549 }
6550
6551 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC7, 1);
6552 if (nvswitch_test_flags(pending, bit))
6553 {
6554 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC7, "AN1 Timeout VC7");
6555 nvswitch_clear_flags(&unhandled, bit);
6556 {
6557 error_event.error = INFOROM_NVLINK_TLC_TX_AN1_TIMEOUT_VC7_NONFATAL;
6558 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
6559 }
6560 }
6561
6562 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
6563
6564 // Disable interrupts that have occurred after fatal error.
6565 if (device->link[link].fatal_error_occurred)
6566 {
6567 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_NON_FATAL_REPORT_EN_1,
6568 report.raw_enable & ~pending);
6569 }
6570
6571 if (report.raw_first & report.mask)
6572 {
6573 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_1,
6574 report.raw_first & report.mask);
6575 }
6576 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_1, pending);
6577
6578 if (unhandled != 0)
6579 {
6580 NVSWITCH_PRINT(device, WARN,
6581 "%s: Unhandled NVLTLC_TX_LNK _1 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n",
6582 __FUNCTION__, link, pending, report.raw_enable);
6583 return -NVL_MORE_PROCESSING_REQUIRED;
6584 }
6585
6586 return NVL_SUCCESS;
6587 }
6588
6589 static NvlStatus
_nvswitch_service_nvltlc_nonfatal_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU64 intrLinkMask)6590 _nvswitch_service_nvltlc_nonfatal_ls10
6591 (
6592 nvswitch_device *device,
6593 NvU32 nvlipt_instance,
6594 NvU64 intrLinkMask
6595 )
6596 {
6597 NvU64 localLinkMask, enabledLinkMask, localIntrLinkMask;
6598 NvU32 i;
6599 nvlink_link *link;
6600 NvlStatus status;
6601 NvlStatus return_status = NVL_SUCCESS;
6602
6603 localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(nvlipt_instance);
6604 enabledLinkMask = nvswitch_get_enabled_link_mask(device);
6605 localIntrLinkMask = localLinkMask & intrLinkMask & enabledLinkMask;
6606
6607 if (localIntrLinkMask == 0)
6608 {
6609 NVSWITCH_PRINT(device, ERROR, "%s: Bad link mask provided for link interrupt servicing!\n", __FUNCTION__);
6610 NVSWITCH_ASSERT(0);
6611 return -NVL_BAD_ARGS;
6612 }
6613
6614 FOR_EACH_INDEX_IN_MASK(64, i, localIntrLinkMask)
6615 {
6616 link = nvswitch_get_link(device, i);
6617 if (link == NULL)
6618 {
6619 // An interrupt on an invalid link should never occur
6620 NVSWITCH_ASSERT(link != NULL);
6621 continue;
6622 }
6623
6624 if (NVSWITCH_GET_LINK_ENG_INST(device, i, NVLIPT) != nvlipt_instance)
6625 {
6626 NVSWITCH_ASSERT(0);
6627 break;
6628 }
6629
6630 //
6631 // If link is in reset or NCISOC clock is off then
6632 // don't need to check the link for NVLTLC errors
6633 // as the IP's registers are off
6634 //
6635 if (nvswitch_is_link_in_reset(device, link) ||
6636 !nvswitch_are_link_clocks_on_ls10(device, link, NVSWITCH_PER_LINK_CLOCK_SET(NCISOCCLK)))
6637 {
6638 continue;
6639 }
6640
6641 status = _nvswitch_service_nvltlc_rx_lnk_nonfatal_0_ls10(device, nvlipt_instance, i);
6642 if (status != NVL_SUCCESS)
6643 {
6644 return_status = status;
6645 }
6646
6647 status = _nvswitch_service_nvltlc_tx_lnk_nonfatal_0_ls10(device, nvlipt_instance, i);
6648 if (status != NVL_SUCCESS)
6649 {
6650 return_status = status;
6651 }
6652
6653 status = _nvswitch_service_nvltlc_rx_lnk_nonfatal_1_ls10(device, nvlipt_instance, i);
6654 if (status != NVL_SUCCESS)
6655 {
6656 return_status = status;
6657 }
6658
6659 status = _nvswitch_service_nvltlc_tx_lnk_nonfatal_1_ls10(device, nvlipt_instance, i);
6660 if (status != NVL_SUCCESS)
6661 {
6662 return_status = status;
6663 }
6664 }
6665 FOR_EACH_INDEX_IN_MASK_END;
6666
6667 return return_status;
6668 }
6669
6670 static NvlStatus
_nvswitch_service_nvlipt_lnk_status_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link_id)6671 _nvswitch_service_nvlipt_lnk_status_ls10
6672 (
6673 nvswitch_device *device,
6674 NvU32 nvlipt_instance,
6675 NvU32 link_id
6676 )
6677 {
6678 NvU32 pending, enabled, unhandled, bit;
6679 NvU64 mode;
6680 nvlink_link *link;
6681 ls10_device *chip_device;
6682
6683 link = nvswitch_get_link(device, link_id);
6684 chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
6685
6686 if (link == NULL)
6687 {
6688 return -NVL_BAD_ARGS;
6689 }
6690
6691 pending = NVSWITCH_LINK_RD32(device, link_id, NVLIPT_LNK, _NVLIPT_LNK, _INTR_STATUS);
6692 enabled = NVSWITCH_LINK_RD32(device, link_id, NVLIPT_LNK, _NVLIPT_LNK, _INTR_INT1_EN);
6693 pending &= enabled;
6694 unhandled = pending;
6695
6696 bit = DRF_NUM(_NVLIPT_LNK, _INTR_STATUS, _LINKSTATEREQUESTREADYSET, 1);
6697 if (nvswitch_test_flags(pending, bit))
6698 {
6699 link = nvswitch_get_link(device, link_id);
6700 if (link == NULL)
6701 {
6702 // If we get here, it's a bug. Assert, then let callers detect unhandled IRQ.
6703 NVSWITCH_ASSERT(link != NULL);
6704 }
6705
6706 nvswitch_clear_flags(&unhandled, bit);
6707 if(nvswitch_corelib_get_dl_link_mode_ls10(link, &mode) != NVL_SUCCESS)
6708 {
6709 NVSWITCH_PRINT(device, ERROR, "%s: nvlipt_lnk_status: Failed to check link mode! LinkId %d\n",
6710 __FUNCTION__, link_id);
6711 }
6712 else if(mode == NVLINK_LINKSTATE_HS)
6713 {
6714 NVSWITCH_PRINT(device, INFO, "%s: nvlipt_lnk_status: Link is up!. LinkId %d\n",
6715 __FUNCTION__, link_id);
6716
6717 //
6718 // When a link comes up ensure that we finish off the post-training tasks:
6719 // -- enabling per-link DL interrupts
6720 // -- releasing buffer_ready on the link
6721 //
6722 nvswitch_corelib_training_complete_ls10(link);
6723 nvswitch_init_buffer_ready(device, link, NV_TRUE);
6724 link->bRxDetected = NV_TRUE;
6725
6726 //
6727 // Clear out any cached interrupts for the link and update the last link up timestamp
6728 //
6729 _nvswitch_clear_deferred_link_errors_ls10(device, link_id);
6730 chip_device->deferredLinkErrors[link_id].state.lastLinkUpTime = nvswitch_os_get_platform_time();
6731
6732 // Reset NV_NPORT_SCRATCH_WARM_PORT_RESET_REQUIRED to 0x0
6733 NVSWITCH_LINK_WR32(device, link_id, NPORT, _NPORT, _SCRATCH_WARM, 0);
6734 }
6735 else if (mode == NVLINK_LINKSTATE_FAULT)
6736 {
6737 //
6738 // If we are here then a previous state transition caused
6739 // the link to FAULT as there is no TL Link state requests
6740 // that explicitly transitions a link to fault. If that is the
6741 // case, set the DL interrupts so any errors can be handled
6742 //
6743 nvswitch_set_dlpl_interrupts_ls10(link);
6744 }
6745 }
6746
6747 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
6748 NVSWITCH_LINK_WR32(device, link_id, NVLIPT_LNK, _NVLIPT_LNK, _INTR_STATUS, pending);
6749
6750 if (unhandled != 0)
6751 {
6752 NVSWITCH_PRINT(device, WARN,
6753 "%s: Unhandled NVLIPT_LNK STATUS interrupts, pending: 0x%x enabled: 0x%x.\n",
6754 __FUNCTION__, pending, enabled);
6755 return -NVL_MORE_PROCESSING_REQUIRED;
6756 }
6757
6758 return NVL_SUCCESS;
6759 }
6760
6761 static NvlStatus
_nvswitch_service_nvlipt_lnk_nonfatal_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)6762 _nvswitch_service_nvlipt_lnk_nonfatal_ls10
6763 (
6764 nvswitch_device *device,
6765 NvU32 nvlipt_instance,
6766 NvU32 link
6767 )
6768 {
6769 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
6770 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
6771 NvU32 pending, bit, unhandled;
6772 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 };
6773
6774 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0);
6775 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_NON_FATAL_REPORT_EN_0);
6776 report.mask = report.raw_enable;
6777
6778 error_event.nvliptInstance = (NvU8) nvlipt_instance;
6779 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
6780
6781 pending = report.raw_pending & report.mask;
6782 if (pending == 0)
6783 {
6784 return -NVL_NOT_FOUND;
6785 }
6786
6787 unhandled = pending;
6788 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FIRST_0);
6789
6790 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _ILLEGALLINKSTATEREQUEST, 1);
6791 if (nvswitch_test_flags(pending, bit))
6792 {
6793 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_ILLEGALLINKSTATEREQUEST, "_HW_NVLIPT_LNK_ILLEGALLINKSTATEREQUEST");
6794 nvswitch_clear_flags(&unhandled, bit);
6795 {
6796 error_event.error = INFOROM_NVLINK_NVLIPT_ILLEGAL_LINK_STATE_REQUEST_NONFATAL;
6797 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
6798 }
6799 }
6800
6801 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _FAILEDMINIONREQUEST, 1);
6802 if (nvswitch_test_flags(pending, bit))
6803 {
6804 //
6805 // based off of HW's assertion. FAILEDMINIONREQUEST always trails a DL fault. So no need to
6806 // do reset_and_drain here
6807 //
6808 chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.liptLnk |= bit;
6809 _nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link);
6810 nvswitch_clear_flags(&unhandled, bit);
6811 }
6812
6813 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _RESERVEDREQUESTVALUE, 1);
6814 if (nvswitch_test_flags(pending, bit))
6815 {
6816 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_RESERVEDREQUESTVALUE, "_RESERVEDREQUESTVALUE");
6817 nvswitch_clear_flags(&unhandled, bit);
6818 {
6819 error_event.error = INFOROM_NVLINK_NVLIPT_RESERVED_REQUEST_VALUE_NONFATAL;
6820 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
6821 }
6822 }
6823
6824 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _LINKSTATEWRITEWHILEBUSY, 1);
6825 if (nvswitch_test_flags(pending, bit))
6826 {
6827 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_LINKSTATEWRITEWHILEBUSY, "_LINKSTATEWRITEWHILEBUSY");
6828 nvswitch_clear_flags(&unhandled, bit);
6829 {
6830 error_event.error = INFOROM_NVLINK_NVLIPT_LINK_STATE_WRITE_WHILE_BUSY_NONFATAL;
6831 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
6832 }
6833 }
6834
6835 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _LINK_STATE_REQUEST_TIMEOUT, 1);
6836 if (nvswitch_test_flags(pending, bit))
6837 {
6838 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_LINK_STATE_REQUEST_TIMEOUT, "_LINK_STATE_REQUEST_TIMEOUT");
6839 nvswitch_clear_flags(&unhandled, bit);
6840 {
6841 error_event.error = INFOROM_NVLINK_NVLIPT_LINK_STATE_REQUEST_TIMEOUT_NONFATAL;
6842 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
6843 }
6844 }
6845
6846 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _WRITE_TO_LOCKED_SYSTEM_REG_ERR, 1);
6847 if (nvswitch_test_flags(pending, bit))
6848 {
6849 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_WRITE_TO_LOCKED_SYSTEM_REG_ERR, "_WRITE_TO_LOCKED_SYSTEM_REG_ERR");
6850 nvswitch_clear_flags(&unhandled, bit);
6851 {
6852 error_event.error = INFOROM_NVLINK_NVLIPT_WRITE_TO_LOCKED_SYSTEM_REG_NONFATAL;
6853 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
6854 }
6855 }
6856
6857 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
6858
6859 if (report.raw_first & report.mask)
6860 {
6861 NVSWITCH_LINK_WR32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FIRST_0,
6862 report.raw_first & report.mask);
6863 }
6864 NVSWITCH_LINK_WR32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0, pending);
6865
6866 if (unhandled != 0)
6867 {
6868 NVSWITCH_PRINT(device, WARN,
6869 "%s: Unhandled NVLIPT_LNK NON_FATAL interrupts, pending: 0x%x enabled: 0x%x.\n",
6870 __FUNCTION__, pending, report.raw_enable);
6871 return -NVL_MORE_PROCESSING_REQUIRED;
6872 }
6873
6874 return NVL_SUCCESS;
6875 }
6876
6877 static NvlStatus
_nvswitch_service_nvlipt_link_nonfatal_ls10(nvswitch_device * device,NvU32 instance,NvU64 intrLinkMask)6878 _nvswitch_service_nvlipt_link_nonfatal_ls10
6879 (
6880 nvswitch_device *device,
6881 NvU32 instance,
6882 NvU64 intrLinkMask
6883 )
6884 {
6885 NvU32 i, intrLink;
6886 NvU64 localLinkMask, enabledLinkMask, localIntrLinkMask;
6887 NvU64 interruptingLinks = 0;
6888 NvU64 lnkStatusChangeLinks = 0;
6889 NvlStatus status = NVL_SUCCESS;
6890 NvlStatus retStatus = NVL_SUCCESS;
6891
6892 //
6893 // The passed in interruptLinkMask should contain a link that is part of the
6894 // given nvlipt instance
6895 //
6896 localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(instance);
6897 enabledLinkMask = nvswitch_get_enabled_link_mask(device);
6898 localIntrLinkMask = localLinkMask & intrLinkMask & enabledLinkMask;
6899
6900 if (localIntrLinkMask == 0)
6901 {
6902 NVSWITCH_PRINT(device, ERROR, "%s: Bad link mask provided for link interrupt servicing!\n", __FUNCTION__);
6903 NVSWITCH_ASSERT(0);
6904 return -NVL_BAD_ARGS;
6905 }
6906
6907
6908 FOR_EACH_INDEX_IN_MASK(64, i, localIntrLinkMask)
6909 {
6910 if (NVSWITCH_GET_LINK_ENG_INST(device, i, NVLIPT) != instance)
6911 {
6912 NVSWITCH_ASSERT(0);
6913 break;
6914 }
6915
6916 intrLink = NVSWITCH_LINK_RD32(device, i, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0);
6917
6918 if(intrLink)
6919 {
6920 interruptingLinks |= NVBIT64(i);
6921 }
6922
6923 intrLink = NVSWITCH_LINK_RD32(device, i, NVLIPT_LNK, _NVLIPT_LNK, _INTR_STATUS);
6924
6925 if(intrLink)
6926 {
6927 lnkStatusChangeLinks |= NVBIT64(i);
6928 }
6929 }
6930 FOR_EACH_INDEX_IN_MASK_END;
6931
6932
6933 FOR_EACH_INDEX_IN_MASK(64, i, lnkStatusChangeLinks)
6934 {
6935
6936 if(_nvswitch_service_nvlipt_lnk_status_ls10(device, instance, i) != NVL_SUCCESS)
6937 {
6938 NVSWITCH_PRINT(device, WARN, "%s: Could not process nvlipt link status interrupt. Continuing. LinkId %d\n",
6939 __FUNCTION__, i);
6940 }
6941 }
6942 FOR_EACH_INDEX_IN_MASK_END;
6943
6944 FOR_EACH_INDEX_IN_MASK(64, i, interruptingLinks)
6945 {
6946
6947 status = _nvswitch_service_nvlipt_lnk_nonfatal_ls10(device, instance, i);
6948 if (status != NVL_SUCCESS && status != -NVL_NOT_FOUND)
6949 {
6950 retStatus = -NVL_MORE_PROCESSING_REQUIRED;
6951 }
6952 }
6953 FOR_EACH_INDEX_IN_MASK_END;
6954
6955 return retStatus;
6956 }
6957
6958
6959 NvlStatus
_nvswitch_service_minion_fatal_ls10(nvswitch_device * device,NvU32 instance)6960 _nvswitch_service_minion_fatal_ls10
6961 (
6962 nvswitch_device *device,
6963 NvU32 instance
6964 )
6965 {
6966 NvU32 pending, bit, unhandled, mask;
6967
6968 pending = NVSWITCH_MINION_RD32_LS10(device, instance, _MINION, _MINION_INTR);
6969 mask = NVSWITCH_MINION_RD32_LS10(device, instance, _MINION, _MINION_INTR_STALL_EN);
6970
6971 // Don't consider MINION Link interrupts in this handler
6972 mask &= ~(DRF_NUM(_MINION, _MINION_INTR_STALL_EN, _LINK, NV_MINION_MINION_INTR_STALL_EN_LINK_ENABLE_ALL));
6973
6974 pending &= mask;
6975
6976 if (pending == 0)
6977 {
6978 return -NVL_NOT_FOUND;
6979 }
6980
6981 unhandled = pending;
6982
6983 bit = DRF_NUM(_MINION, _MINION_INTR, _FALCON_STALL, 0x1);
6984 if (nvswitch_test_flags(pending, bit))
6985 {
6986 if (nvswitch_minion_service_falcon_interrupts_ls10(device, instance) == NVL_SUCCESS)
6987 {
6988 nvswitch_clear_flags(&unhandled, bit);
6989 }
6990 }
6991
6992 bit = DRF_NUM(_MINION, _MINION_INTR, _NONFATAL, 0x1);
6993 if (nvswitch_test_flags(pending, bit))
6994 {
6995 NVSWITCH_PRINT(device, ERROR, "%s: servicing minion nonfatal interrupt\n",
6996 __FUNCTION__);
6997 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _MINION_INTR, bit);
6998 nvswitch_clear_flags(&unhandled, bit);
6999 }
7000
7001 bit = DRF_NUM(_MINION, _MINION_INTR, _FATAL, 0x1);
7002 if (nvswitch_test_flags(pending, bit))
7003 {
7004 NVSWITCH_PRINT(device, ERROR, "%s: servicing minion fatal interrupt\n",
7005 __FUNCTION__);
7006 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _MINION_INTR, bit);
7007 nvswitch_clear_flags(&unhandled, bit);
7008 }
7009
7010 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
7011
7012 if (unhandled != 0)
7013 {
7014 return -NVL_MORE_PROCESSING_REQUIRED;
7015 }
7016
7017 return NVL_SUCCESS;
7018 }
7019
7020 static NvlStatus
_nvswitch_service_nvlw_nonfatal_ls10(nvswitch_device * device,NvU32 instance)7021 _nvswitch_service_nvlw_nonfatal_ls10
7022 (
7023 nvswitch_device *device,
7024 NvU32 instance
7025 )
7026 {
7027 NvlStatus status[3];
7028 NvU32 reg;
7029 NvU64 intrLinkMask = 0;
7030
7031 reg = NVSWITCH_ENG_RD32_LS10(device, NVLW, instance, _NVLW, _TOP_INTR_1_STATUS);
7032 intrLinkMask = DRF_VAL(_NVLW, _TOP_INTR_1_STATUS, _LINK, reg);
7033
7034 //
7035 // Shift the mask of interrupting links from the local to the
7036 // NVLW instance to a global mask
7037 //
7038 intrLinkMask = intrLinkMask << (NVSWITCH_LINKS_PER_NVLW_LS10*instance);
7039
7040 // If there is no pending link interrupts then there is nothing to service
7041 if (intrLinkMask == 0)
7042 {
7043 return NVL_SUCCESS;
7044 }
7045
7046 status[0] = _nvswitch_service_nvlipt_link_nonfatal_ls10(device, instance, intrLinkMask);
7047 status[1] = _nvswitch_service_nvldl_nonfatal_ls10(device, instance, intrLinkMask);
7048 status[2] = _nvswitch_service_nvltlc_nonfatal_ls10(device, instance, intrLinkMask);
7049
7050 if ((status[0] != NVL_SUCCESS) && (status[0] != -NVL_NOT_FOUND) &&
7051 (status[1] != NVL_SUCCESS) && (status[1] != -NVL_NOT_FOUND) &&
7052 (status[2] != NVL_SUCCESS) && (status[2] != -NVL_NOT_FOUND))
7053 {
7054 return -NVL_MORE_PROCESSING_REQUIRED;
7055 }
7056
7057 return NVL_SUCCESS;
7058 }
7059
7060 static NvlStatus
_nvswitch_service_soe_fatal_ls10(nvswitch_device * device)7061 _nvswitch_service_soe_fatal_ls10
7062 (
7063 nvswitch_device *device
7064 )
7065 {
7066 // We only support 1 SOE as of LS10.
7067 if (soeService_HAL(device, (PSOE)device->pSoe) != NVL_SUCCESS)
7068 {
7069 return -NVL_MORE_PROCESSING_REQUIRED;
7070 }
7071
7072 return NVL_SUCCESS;
7073 }
7074
7075 static NvlStatus
_nvswitch_service_nvlipt_lnk_fatal_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)7076 _nvswitch_service_nvlipt_lnk_fatal_ls10
7077 (
7078 nvswitch_device *device,
7079 NvU32 nvlipt_instance,
7080 NvU32 link
7081 )
7082 {
7083 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
7084 NvU32 pending, bit, unhandled;
7085 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 };
7086
7087 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0);
7088 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FATAL_REPORT_EN_0);
7089 report.mask = report.raw_enable;
7090
7091 pending = report.raw_pending & report.mask;
7092 if (pending == 0)
7093 {
7094 return -NVL_NOT_FOUND;
7095 }
7096
7097 error_event.nvliptInstance = (NvU8) nvlipt_instance;
7098 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
7099
7100 unhandled = pending;
7101 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FIRST_0);
7102
7103 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _SLEEPWHILEACTIVELINK, 1);
7104 if (nvswitch_test_flags(pending, bit))
7105 {
7106 NVSWITCH_REPORT_FATAL(_HW_NVLIPT_LNK_SLEEPWHILEACTIVELINK, "No non-empty link is detected", NV_FALSE);
7107 nvswitch_clear_flags(&unhandled, bit);
7108 {
7109 error_event.error = INFOROM_NVLINK_NVLIPT_SLEEP_WHILE_ACTIVE_LINK_FATAL;
7110 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
7111 }
7112 }
7113
7114 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _RSTSEQ_PHYCTL_TIMEOUT, 1);
7115 if (nvswitch_test_flags(pending, bit))
7116 {
7117 NVSWITCH_REPORT_FATAL(_HW_NVLIPT_LNK_RSTSEQ_PHYCTL_TIMEOUT, "Reset sequencer timed out waiting for a handshake from PHYCTL", NV_FALSE);
7118 nvswitch_clear_flags(&unhandled, bit);
7119 {
7120 error_event.error = INFOROM_NVLINK_NVLIPT_RSTSEQ_PHYCTL_TIMEOUT_FATAL;
7121 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
7122 }
7123 }
7124
7125 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _RSTSEQ_CLKCTL_TIMEOUT, 1);
7126 if (nvswitch_test_flags(pending, bit))
7127 {
7128 NVSWITCH_REPORT_FATAL(_HW_NVLIPT_LNK_RSTSEQ_CLKCTL_TIMEOUT, "Reset sequencer timed out waiting for a handshake from CLKCTL", NV_FALSE);
7129 nvswitch_clear_flags(&unhandled, bit);
7130 {
7131 error_event.error = INFOROM_NVLINK_NVLIPT_RSTSEQ_CLKCTL_TIMEOUT_FATAL;
7132 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
7133 }
7134 }
7135
7136 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
7137
7138 // Disable interrupts that have occurred after fatal error.
7139 if (device->link[link].fatal_error_occurred)
7140 {
7141 NVSWITCH_LINK_WR32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FATAL_REPORT_EN_0,
7142 report.raw_enable & ~pending);
7143 }
7144
7145 // clear interrupts
7146 if (report.raw_first & report.mask)
7147 {
7148 NVSWITCH_LINK_WR32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FIRST_0,
7149 report.raw_first & report.mask);
7150 }
7151 NVSWITCH_LINK_WR32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0, pending);
7152
7153 if (unhandled != 0)
7154 {
7155 NVSWITCH_PRINT(device, WARN,
7156 "%s: Unhandled NVLIPT_LNK FATAL interrupts, pending: 0x%x enabled: 0x%x.\n",
7157 __FUNCTION__, pending, report.raw_enable);
7158 return -NVL_MORE_PROCESSING_REQUIRED;
7159 }
7160
7161 return NVL_SUCCESS;
7162 }
7163
7164 static NvlStatus
_nvswitch_service_nvlipt_link_fatal_ls10(nvswitch_device * device,NvU32 instance,NvU64 intrLinkMask)7165 _nvswitch_service_nvlipt_link_fatal_ls10
7166 (
7167 nvswitch_device *device,
7168 NvU32 instance,
7169 NvU64 intrLinkMask
7170 )
7171 {
7172 NvU32 i, intrLink;
7173 NvU64 localLinkMask, enabledLinkMask, localIntrLinkMask;
7174 NvlStatus status = NVL_SUCCESS;
7175
7176 //
7177 // The passed in interruptLinkMask should contain a link that is part of the
7178 // given nvlipt instance
7179 //
7180 localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(instance);
7181 enabledLinkMask = nvswitch_get_enabled_link_mask(device);
7182 localIntrLinkMask = localLinkMask & intrLinkMask & enabledLinkMask;
7183
7184 if (localIntrLinkMask == 0)
7185 {
7186 NVSWITCH_PRINT(device, ERROR, "%s: Bad link mask provided for link interrupt servicing!\n", __FUNCTION__);
7187 NVSWITCH_ASSERT(0);
7188 return -NVL_BAD_ARGS;
7189 }
7190
7191 // read in error status of current link
7192 FOR_EACH_INDEX_IN_MASK(64, i, localIntrLinkMask)
7193 {
7194 intrLink = NVSWITCH_LINK_RD32(device, i, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0);
7195 if (intrLink != 0)
7196 {
7197 if( _nvswitch_service_nvlipt_lnk_fatal_ls10(device, instance, i) != NVL_SUCCESS)
7198 {
7199 status = -NVL_MORE_PROCESSING_REQUIRED;
7200 }
7201 }
7202 }
7203 FOR_EACH_INDEX_IN_MASK_END;
7204
7205 return status;
7206 }
7207
7208 static NvlStatus
_nvswitch_service_nvlw_fatal_ls10(nvswitch_device * device,NvU32 instance)7209 _nvswitch_service_nvlw_fatal_ls10
7210 (
7211 nvswitch_device *device,
7212 NvU32 instance
7213 )
7214 {
7215 NvlStatus status[6];
7216 NvU64 intrLinkMask = 0;
7217 NvU32 reg;
7218
7219 reg = NVSWITCH_ENG_RD32_LS10(device, NVLW, instance, _NVLW, _TOP_INTR_0_STATUS);
7220 intrLinkMask = DRF_VAL(_NVLW, _TOP_INTR_0_STATUS, _LINK, reg);
7221
7222 //
7223 // Shift the mask of interrupting links from the local to the
7224 // NVLW instance to a global mask
7225 //
7226 intrLinkMask = intrLinkMask << (NVSWITCH_LINKS_PER_NVLW_LS10*instance);
7227
7228 status[0] = device->hal.nvswitch_service_minion_link(device, instance);
7229 status[1] = _nvswitch_service_minion_fatal_ls10(device, instance);
7230 status[2] = _nvswitch_service_nvlipt_common_fatal_ls10(device, instance);
7231
7232 //
7233 // If there is a pending link interrupt on this nvlw instance then service
7234 // those interrupts in the handlers below. Otherwise, mark the status's
7235 // as success as there is nothing to service
7236 //
7237 if (intrLinkMask != 0)
7238 {
7239 status[3] = _nvswitch_service_nvldl_fatal_ls10(device, instance, intrLinkMask);
7240 status[4] = _nvswitch_service_nvltlc_fatal_ls10(device, instance, intrLinkMask);
7241 status[5] = _nvswitch_service_nvlipt_link_fatal_ls10(device, instance, intrLinkMask);
7242 }
7243 else
7244 {
7245 status[3] = NVL_SUCCESS;
7246 status[4] = NVL_SUCCESS;
7247 status[5] = NVL_SUCCESS;
7248 }
7249
7250 if (status[0] != NVL_SUCCESS && status[0] != -NVL_NOT_FOUND &&
7251 status[1] != NVL_SUCCESS && status[1] != -NVL_NOT_FOUND &&
7252 status[2] != NVL_SUCCESS && status[2] != -NVL_NOT_FOUND &&
7253 status[3] != NVL_SUCCESS && status[3] != -NVL_NOT_FOUND &&
7254 status[4] != NVL_SUCCESS && status[4] != -NVL_NOT_FOUND &&
7255 status[5] != NVL_SUCCESS && status[5] != -NVL_NOT_FOUND)
7256 {
7257 return -NVL_MORE_PROCESSING_REQUIRED;
7258 }
7259
7260 return NVL_SUCCESS;
7261 }
7262
7263 /*
7264 * @Brief : Enable top level HW interrupts.
7265 *
7266 * @Description :
7267 *
7268 * @param[in] device operate on this device
7269 */
7270 void
nvswitch_lib_enable_interrupts_ls10(nvswitch_device * device)7271 nvswitch_lib_enable_interrupts_ls10
7272 (
7273 nvswitch_device *device
7274 )
7275 {
7276 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NPG_FATAL_IDX), 0xFFFF);
7277 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NPG_NON_FATAL_IDX), 0xFFFF);
7278 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NPG_CORRECTABLE_IDX), 0);
7279
7280 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NVLW_FATAL_IDX), 0xFFFF);
7281 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_IDX), 0xFFFF);
7282 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NVLW_CORRECTABLE_IDX), 0);
7283
7284 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NXBAR_FATAL_IDX), 0x7);
7285
7286 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_UNITS_IDX), 0xFFFFFFFF);
7287 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_UNITS_IDX),
7288 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PMGR_HOST, 1) |
7289 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER, 1) |
7290 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER_ALARM, 1) |
7291 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _SEC0_INTR0_0, 1) |
7292 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XTL_CPU, 1) |
7293 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XAL_EP, 1) |
7294 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PRIV_RING, 1));
7295
7296 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_TOP_EN_SET(0), 0xFFFFFFFF);
7297 }
7298
7299 /*
7300 * @Brief : Disable top level HW interrupts.
7301 *
7302 * @Description :
7303 *
7304 * @param[in] device operate on this device
7305 */
7306 void
nvswitch_lib_disable_interrupts_ls10(nvswitch_device * device)7307 nvswitch_lib_disable_interrupts_ls10
7308 (
7309 nvswitch_device *device
7310 )
7311 {
7312 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NPG_FATAL_IDX), 0xFFFF);
7313 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NPG_NON_FATAL_IDX), 0xFFFF);
7314 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NPG_CORRECTABLE_IDX), 0);
7315
7316 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NVLW_FATAL_IDX), 0xFFFF);
7317 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_IDX), 0xFFFF);
7318 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NVLW_CORRECTABLE_IDX), 0);
7319
7320 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NXBAR_FATAL_IDX), 0x7);
7321
7322 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_UNITS_IDX),
7323 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PMGR_HOST, 1) |
7324 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER, 1) |
7325 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER_ALARM, 1) |
7326 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _SEC0_INTR0_0, 1) |
7327 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XTL_CPU, 1) |
7328 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XAL_EP, 1) |
7329 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PRIV_RING, 1));
7330
7331 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_TOP_EN_CLEAR(0), 0xFFFFFFFF);
7332 }
7333
7334 //
7335 // Check if there are interrupts pending.
7336 //
7337 // On silicon/emulation we only use MSIs which are not shared, so this
7338 // function does not need to be called.
7339 //
7340 NvlStatus
nvswitch_lib_check_interrupts_ls10(nvswitch_device * device)7341 nvswitch_lib_check_interrupts_ls10
7342 (
7343 nvswitch_device *device
7344 )
7345 {
7346 NvlStatus retval = NVL_SUCCESS;
7347 NvU32 val;
7348
7349 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_TOP(0));
7350 if (DRF_NUM(_CTRL, _CPU_INTR_TOP, _VALUE, val) != 0)
7351 {
7352 retval = -NVL_MORE_PROCESSING_REQUIRED;
7353 }
7354
7355 return retval;
7356 }
7357
7358 static void
_nvswitch_retrigger_engine_intr_ls10(nvswitch_device * device)7359 _nvswitch_retrigger_engine_intr_ls10
7360 (
7361 nvswitch_device *device
7362 )
7363 {
7364
7365 // re-trigger engine to gin interrupts for CPR and NPG on the FATAL and NONFATAL trees
7366 NVSWITCH_BCAST_WR32_LS10(device, CPR, _CPR_SYS, _INTR_RETRIGGER(0), 1);
7367 NVSWITCH_BCAST_WR32_LS10(device, CPR, _CPR_SYS, _INTR_RETRIGGER(1), 1);
7368
7369 NVSWITCH_BCAST_WR32_LS10(device, NPG, _NPG, _INTR_RETRIGGER(0), 1);
7370 NVSWITCH_BCAST_WR32_LS10(device, NPG, _NPG, _INTR_RETRIGGER(1), 1);
7371 }
7372
7373 void
nvswitch_service_minion_all_links_ls10(nvswitch_device * device)7374 nvswitch_service_minion_all_links_ls10
7375 (
7376 nvswitch_device *device
7377 )
7378 {
7379 NvU32 val, i;
7380
7381 // Check NVLW
7382 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_FATAL);
7383 val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_FATAL, _MASK, val);
7384 if (val != 0)
7385 {
7386 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL,
7387 _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NVLW_FATAL_IDX), val);
7388
7389 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NVLW_FATAL_MASK); i++)
7390 {
7391 if (val & NVBIT(i))
7392 (void)_nvswitch_service_nvlw_fatal_ls10(device, i);
7393 }
7394 }
7395 }
7396
7397 //
7398 // Service interrupt and re-enable interrupts. Interrupts should disabled when
7399 // this is called.
7400 //
7401 NvlStatus
nvswitch_lib_service_interrupts_ls10(nvswitch_device * device)7402 nvswitch_lib_service_interrupts_ls10
7403 (
7404 nvswitch_device *device
7405 )
7406 {
7407 NvlStatus status = NVL_SUCCESS;
7408 NvlStatus return_status = NVL_SUCCESS;
7409 NvU32 val;
7410 NvU32 i;
7411
7412 //
7413 // Interrupt handler steps:
7414 // 1. Read Leaf interrupt
7415 // 2. Clear leaf interrupt
7416 // 3. Run leaf specific interrupt handler
7417 //
7418 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_NON_FATAL);
7419 val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_NON_FATAL, _MASK, val);
7420 if (val != 0)
7421 {
7422 NVSWITCH_PRINT(device, INFO, "%s: NVLW NON_FATAL interrupts pending = 0x%x\n",
7423 __FUNCTION__, val);
7424 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_IDX), val);
7425 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_MASK); i++)
7426 {
7427 if (val & NVBIT(i))
7428 {
7429 status = _nvswitch_service_nvlw_nonfatal_ls10(device, i);
7430 if (status != NVL_SUCCESS)
7431 {
7432 NVSWITCH_PRINT(device, INFO, "%s: NVLW[%d] NON_FATAL interrupt handling status = %d\n",
7433 __FUNCTION__, i, status);
7434 return_status = status;
7435 }
7436 }
7437 }
7438 }
7439
7440 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_FATAL);
7441 val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_FATAL, _MASK, val);
7442 if (val != 0)
7443 {
7444 NVSWITCH_PRINT(device, INFO, "%s: NVLW FATAL interrupts pending = 0x%x\n",
7445 __FUNCTION__, val);
7446
7447 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NVLW_FATAL_IDX), val);
7448
7449 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NVLW_FATAL_MASK); i++)
7450 {
7451 if (val & NVBIT(i))
7452 {
7453 status = _nvswitch_service_nvlw_fatal_ls10(device, i);
7454 if (status != NVL_SUCCESS)
7455 {
7456 NVSWITCH_PRINT(device, INFO, "%s: NVLW[%d] FATAL interrupt handling status = %d\n",
7457 __FUNCTION__, i, status);
7458 return_status = status;
7459 }
7460 }
7461 }
7462 }
7463
7464 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_CORRECTABLE);
7465 val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_CORRECTABLE, _MASK, val);
7466 if (val != 0)
7467 {
7468 NVSWITCH_PRINT(device, ERROR, "%s: NVLW CORRECTABLE interrupts pending = 0x%x\n",
7469 __FUNCTION__, val);
7470 return_status = -NVL_MORE_PROCESSING_REQUIRED;
7471 }
7472
7473 // Check NPG
7474 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NPG_FATAL);
7475 val = DRF_NUM(_CTRL, _CPU_INTR_NPG_FATAL, _MASK, val);
7476 if (val != 0)
7477 {
7478 NVSWITCH_PRINT(device, INFO, "%s: NPG FATAL interrupts pending = 0x%x\n",
7479 __FUNCTION__, val);
7480 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NPG_FATAL_IDX), val);
7481 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NPG_FATAL_MASK); i++)
7482 {
7483 if (val & NVBIT(i))
7484 {
7485 status = _nvswitch_service_npg_fatal_ls10(device, i);
7486 if (status != NVL_SUCCESS)
7487 {
7488 NVSWITCH_PRINT(device, INFO, "%s: NPG[%d] FATAL interrupt handling status = %d\n",
7489 __FUNCTION__, i, status);
7490 return_status = status;
7491 }
7492 }
7493 }
7494 }
7495
7496 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NPG_NON_FATAL);
7497 val = DRF_NUM(_CTRL, _CPU_INTR_NPG_NON_FATAL, _MASK, val);
7498 if (val != 0)
7499 {
7500 NVSWITCH_PRINT(device, INFO, "%s: NPG NON_FATAL interrupts pending = 0x%x\n",
7501 __FUNCTION__, val);
7502 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NPG_NON_FATAL_IDX), val);
7503 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NPG_NON_FATAL_MASK); i++)
7504 {
7505 if (val & NVBIT(i))
7506 {
7507 status = _nvswitch_service_npg_nonfatal_ls10(device, i);
7508 if (status != NVL_SUCCESS)
7509 {
7510 NVSWITCH_PRINT(device, INFO, "%s: NPG[%d] NON_FATAL interrupt handling status = %d\n",
7511 __FUNCTION__, i, status);
7512 return_status = status;
7513 }
7514 }
7515 }
7516 }
7517
7518 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NPG_CORRECTABLE);
7519 val = DRF_NUM(_CTRL, _CPU_INTR_NPG_CORRECTABLE, _MASK, val);
7520 if (val != 0)
7521 {
7522 NVSWITCH_PRINT(device, ERROR, "%s: NPG CORRECTABLE interrupts pending = 0x%x\n",
7523 __FUNCTION__, val);
7524 return_status = -NVL_MORE_PROCESSING_REQUIRED;
7525 }
7526
7527 // Check NXBAR
7528 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NXBAR_FATAL);
7529 val = DRF_NUM(_CTRL, _CPU_INTR_NXBAR_FATAL, _MASK, val);
7530 if (val != 0)
7531 {
7532 NVSWITCH_PRINT(device, INFO, "%s: NXBAR FATAL interrupts pending = 0x%x\n",
7533 __FUNCTION__, val);
7534 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NXBAR_FATAL_IDX), val);
7535 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NXBAR_FATAL_MASK); i++)
7536 {
7537 if (val & NVBIT(i))
7538 {
7539 status = _nvswitch_service_nxbar_fatal_ls10(device, i);
7540 if (status != NVL_SUCCESS)
7541 {
7542 NVSWITCH_PRINT(device, INFO, "%s: NXBAR[%d] FATAL interrupt handling status = %d\n",
7543 __FUNCTION__, i, status);
7544 return_status = status;
7545 }
7546 }
7547 }
7548 }
7549
7550 // Check UNITS
7551 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_UNITS);
7552 if (val != 0)
7553 {
7554 NVSWITCH_PRINT(device, MMIO, "%s: UNIT interrupts pending = 0x%x\n",
7555 __FUNCTION__, val);
7556
7557 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_UNITS_IDX), val);
7558 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PMGR_HOST, 1, val))
7559 {
7560 NVSWITCH_PRINT(device, ERROR, "%s: _PMGR_HOST interrupt pending\n",
7561 __FUNCTION__);
7562 return_status = -NVL_MORE_PROCESSING_REQUIRED;
7563 }
7564 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER, 1, val))
7565 {
7566 NVSWITCH_PRINT(device, ERROR, "%s: _PTIMER interrupt pending\n",
7567 __FUNCTION__);
7568 return_status = -NVL_MORE_PROCESSING_REQUIRED;
7569 }
7570 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER_ALARM, 1, val))
7571 {
7572 NVSWITCH_PRINT(device, ERROR, "%s: _PTIMER_ALARM interrupt pending\n",
7573 __FUNCTION__);
7574 return_status = -NVL_MORE_PROCESSING_REQUIRED;
7575 }
7576 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XTL_CPU, 1, val))
7577 {
7578 NVSWITCH_PRINT(device, ERROR, "%s: _XTL_CPU interrupt pending\n",
7579 __FUNCTION__);
7580 return_status = -NVL_MORE_PROCESSING_REQUIRED;
7581 }
7582 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XAL_EP, 1, val))
7583 {
7584 NVSWITCH_PRINT(device, ERROR, "%s: _XAL_EP interrupt pending\n",
7585 __FUNCTION__);
7586 return_status = -NVL_MORE_PROCESSING_REQUIRED;
7587 }
7588 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PRIV_RING, 1, val))
7589 {
7590 status = _nvswitch_service_priv_ring_ls10(device);
7591 if (status != NVL_SUCCESS)
7592 {
7593 NVSWITCH_PRINT(device, ERROR, "%s: Problem handling PRI errors\n",
7594 __FUNCTION__);
7595 return_status = status;
7596 }
7597 }
7598 if (!IS_RTLSIM(device) && !IS_FMODEL(device))
7599 {
7600 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _SEC0_INTR0_0, 1, val))
7601 {
7602 status = _nvswitch_service_soe_fatal_ls10(device);
7603 if (status != NVL_SUCCESS)
7604 {
7605 NVSWITCH_PRINT(device, ERROR, "%s: Problem servicing SOE",
7606 __FUNCTION__);
7607 return_status = status;
7608 }
7609 }
7610 }
7611 }
7612
7613 // step 4 -- retrigger engine interrupts
7614 _nvswitch_retrigger_engine_intr_ls10(device);
7615
7616 // step 5 -- retrigger top level GIN interrupts
7617 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_TOP_EN_CLEAR(0), 0xFFFFFFFF);
7618 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_TOP_EN_SET(0), 0xFFFFFFFF);
7619
7620 return return_status;
7621 }
7622
7623 /*
7624 * Initialize interrupt tree HW for all units.
7625 *
7626 * Init and servicing both depend on bits matching across STATUS/MASK
7627 * and IErr STATUS/LOG/REPORT/CONTAIN registers.
7628 */
7629 void
nvswitch_initialize_interrupt_tree_ls10(nvswitch_device * device)7630 nvswitch_initialize_interrupt_tree_ls10
7631 (
7632 nvswitch_device *device
7633 )
7634 {
7635 NvU64 link_mask = nvswitch_get_enabled_link_mask(device);
7636 NvU32 i, val;
7637
7638 // NPG/NPORT
7639 _nvswitch_initialize_nport_interrupts_ls10(device);
7640
7641 // NXBAR
7642 _nvswitch_initialize_nxbar_interrupts_ls10(device);
7643
7644 FOR_EACH_INDEX_IN_MASK(64, i, link_mask)
7645 {
7646 val = NVSWITCH_LINK_RD32(device, i,
7647 NVLW, _NVLW, _LINK_INTR_0_MASK(i));
7648 val = FLD_SET_DRF(_NVLW, _LINK_INTR_0_MASK, _FATAL, _ENABLE, val);
7649 val = FLD_SET_DRF(_NVLW, _LINK_INTR_0_MASK, _NONFATAL, _ENABLE, val);
7650 val = FLD_SET_DRF(_NVLW, _LINK_INTR_0_MASK, _CORRECTABLE, _ENABLE, val);
7651 val = FLD_SET_DRF(_NVLW, _LINK_INTR_0_MASK, _INTR0, _ENABLE, val);
7652 val = FLD_SET_DRF(_NVLW, _LINK_INTR_0_MASK, _INTR1, _ENABLE, val);
7653 NVSWITCH_LINK_WR32(device, i, NVLW, _NVLW, _LINK_INTR_0_MASK(i), val);
7654 }
7655 FOR_EACH_INDEX_IN_MASK_END;
7656
7657 FOR_EACH_INDEX_IN_MASK(64, i, link_mask)
7658 {
7659 val = NVSWITCH_LINK_RD32(device, i,
7660 NVLW, _NVLW, _LINK_INTR_1_MASK(i));
7661 val = FLD_SET_DRF(_NVLW, _LINK_INTR_1_MASK, _FATAL, _ENABLE, val);
7662 val = FLD_SET_DRF(_NVLW, _LINK_INTR_1_MASK, _NONFATAL, _ENABLE, val);
7663 val = FLD_SET_DRF(_NVLW, _LINK_INTR_1_MASK, _CORRECTABLE, _ENABLE, val);
7664 val = FLD_SET_DRF(_NVLW, _LINK_INTR_1_MASK, _INTR0, _ENABLE, val);
7665 val = FLD_SET_DRF(_NVLW, _LINK_INTR_1_MASK, _INTR1, _ENABLE, val);
7666 NVSWITCH_LINK_WR32(device, i, NVLW, _NVLW, _LINK_INTR_1_MASK(i), val);
7667 }
7668 FOR_EACH_INDEX_IN_MASK_END;
7669
7670 // NVLIPT
7671 _nvswitch_initialize_nvlipt_interrupts_ls10(device);
7672 }
7673
7674 //
7675 // Service Nvswitch NVLDL Fatal interrupts
7676 //
7677 NvlStatus
nvswitch_service_nvldl_fatal_link_ls10(nvswitch_device * device,NvU32 nvlipt_instance,NvU32 link)7678 nvswitch_service_nvldl_fatal_link_ls10
7679 (
7680 nvswitch_device *device,
7681 NvU32 nvlipt_instance,
7682 NvU32 link
7683 )
7684 {
7685 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
7686 NvU32 pending, bit, unhandled;
7687 NvU32 dlDeferredIntrLinkMask = 0;
7688 NvBool bRequireResetAndDrain = NV_FALSE;
7689
7690 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
7691 INFOROM_NVLINK_ERROR_EVENT error_event;
7692
7693 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLDL, _NVLDL_TOP, _INTR);
7694 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLDL, _NVLDL_TOP, _INTR_STALL_EN);
7695 report.mask = report.raw_enable;
7696 pending = report.raw_pending & report.mask;
7697
7698 error_event.nvliptInstance = (NvU8) nvlipt_instance;
7699 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link);
7700
7701 if (pending == 0)
7702 {
7703 return -NVL_NOT_FOUND;
7704 }
7705
7706 unhandled = pending;
7707
7708 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_FAULT_RAM, 1);
7709 if (nvswitch_test_flags(pending, bit))
7710 {
7711 NVSWITCH_REPORT_FATAL(_HW_DLPL_TX_FAULT_RAM, "TX Fault Ram", NV_FALSE);
7712 nvswitch_clear_flags(&unhandled, bit);
7713 error_event.error = INFOROM_NVLINK_DL_TX_FAULT_RAM_FATAL;
7714 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
7715 }
7716
7717 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_FAULT_INTERFACE, 1);
7718 if (nvswitch_test_flags(pending, bit))
7719 {
7720 NVSWITCH_REPORT_FATAL(_HW_DLPL_TX_FAULT_INTERFACE, "TX Fault Interface", NV_FALSE);
7721 nvswitch_clear_flags(&unhandled, bit);
7722 error_event.error = INFOROM_NVLINK_DL_TX_FAULT_INTERFACE_FATAL;
7723 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
7724 }
7725
7726 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_FAULT_SUBLINK_CHANGE, 1);
7727 if (nvswitch_test_flags(pending, bit))
7728 {
7729 NVSWITCH_REPORT_FATAL(_HW_DLPL_TX_FAULT_SUBLINK_CHANGE, "TX Fault Sublink Change", NV_FALSE);
7730 nvswitch_clear_flags(&unhandled, bit);
7731 error_event.error = INFOROM_NVLINK_DL_TX_FAULT_SUBLINK_CHANGE_FATAL;
7732 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
7733 }
7734
7735 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_FAULT_SUBLINK_CHANGE, 1);
7736 if (nvswitch_test_flags(pending, bit))
7737 {
7738 NVSWITCH_REPORT_FATAL(_HW_DLPL_RX_FAULT_SUBLINK_CHANGE, "RX Fault Sublink Change", NV_FALSE);
7739 nvswitch_clear_flags(&unhandled, bit);
7740 error_event.error = INFOROM_NVLINK_DL_RX_FAULT_SUBLINK_CHANGE_FATAL;
7741 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
7742 }
7743
7744 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_FAULT_DL_PROTOCOL, 1);
7745 if (nvswitch_test_flags(pending, bit))
7746 {
7747 NVSWITCH_REPORT_FATAL(_HW_DLPL_RX_FAULT_DL_PROTOCOL, "RX Fault DL Protocol", NV_FALSE);
7748 nvswitch_clear_flags(&unhandled, bit);
7749 error_event.error = INFOROM_NVLINK_DL_RX_FAULT_DL_PROTOCOL_FATAL;
7750 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
7751 }
7752
7753 bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_PROTOCOL, 1);
7754 if (nvswitch_test_flags(pending, bit))
7755 {
7756 NVSWITCH_REPORT_FATAL(_HW_DLPL_LTSSM_PROTOCOL, "LTSSM Protocol Error", NV_FALSE);
7757 nvswitch_clear_flags(&unhandled, bit);
7758
7759 // TODO 2827793 this should be logged to the InfoROM as fatal
7760 }
7761
7762 bit = DRF_NUM(_NVLDL_TOP, _INTR, _PHY_A, 1);
7763 if (nvswitch_test_flags(pending, bit))
7764 {
7765 NVSWITCH_REPORT_FATAL(_HW_DLPL_PHY_A, "PHY_A Error", NV_FALSE);
7766 nvswitch_clear_flags(&unhandled, bit);
7767 error_event.error = INFOROM_NVLINK_DL_PHY_A_FATAL;
7768 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
7769 }
7770
7771 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_PL_ERROR, 1);
7772 if (nvswitch_test_flags(pending, bit))
7773 {
7774 NVSWITCH_REPORT_FATAL(_HW_DLPL_TX_PL_ERROR, "TX_PL Error", NV_FALSE);
7775 nvswitch_clear_flags(&unhandled, bit);
7776 error_event.error = INFOROM_NVLINK_DL_TX_PL_ERROR_FATAL;
7777 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
7778 }
7779
7780 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_PL_ERROR, 1);
7781 if (nvswitch_test_flags(pending, bit))
7782 {
7783 NVSWITCH_REPORT_FATAL(_HW_DLPL_RX_PL_ERROR, "RX_PL Error", NV_FALSE);
7784 nvswitch_clear_flags(&unhandled, bit);
7785 error_event.error = INFOROM_NVLINK_DL_RX_PL_ERROR_FATAL;
7786 nvswitch_inforom_nvlink_log_error_event(device, &error_event);
7787 }
7788
7789 //
7790 // Note: LTSSM_FAULT_{UP/DOWN} must be the last interrupt serviced in the NVLDL
7791 // Fatal tree. The last step of handling this interrupt is going into the
7792 // reset_and_drain flow for the given link which will shutdown and reset
7793 // the link. The reset portion will also wipe away any link state including
7794 // pending DL interrupts. In order to log all error before wiping that state,
7795 // service all other interrupts before this one
7796 //
7797 bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_DOWN, 1);
7798 if (nvswitch_test_flags(pending, bit))
7799 {
7800 nvswitch_record_port_event(device, &(device->log_PORT_EVENTS), link, NVSWITCH_PORT_EVENT_TYPE_DOWN);
7801 if (nvswitch_lib_notify_client_events(device,
7802 NVSWITCH_DEVICE_EVENT_PORT_DOWN) != NVL_SUCCESS)
7803 {
7804 NVSWITCH_PRINT(device, ERROR, "%s: Failed to notify PORT_DOWN event\n",
7805 __FUNCTION__);
7806 }
7807 dlDeferredIntrLinkMask |= bit;
7808
7809 //
7810 // Disable LTSSM FAULT DOWN, NPG, and NVLW interrupts to avoid interrupt storm. The interrupts
7811 // will be re-enabled in reset and drain
7812 //
7813 report.raw_enable = FLD_SET_DRF(_NVLDL_TOP, _INTR_STALL_EN, _LTSSM_FAULT_DOWN, _DISABLE, report.raw_enable);
7814 NVSWITCH_LINK_WR32(device, link, NVLDL, _NVLDL_TOP, _INTR_STALL_EN, report.raw_enable);
7815 nvswitch_link_disable_interrupts_ls10(device, link);
7816
7817 if (device->bModeContinuousALI)
7818 {
7819 //
7820 // Since reset and drain will reset the link, including clearing
7821 // pending interrupts, skip the clear write below. There are cases
7822 // where link clocks will not be on after reset and drain so there
7823 // maybe PRI errors on writing to the register
7824 //
7825 // CCI will perform reset and drain
7826 if (!cciIsLinkManaged(device, link))
7827 {
7828 bRequireResetAndDrain = NV_TRUE;
7829 }
7830 }
7831 nvswitch_clear_flags(&unhandled, bit);
7832 }
7833
7834 bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1);
7835 if (nvswitch_test_flags(pending, bit))
7836 {
7837 nvswitch_record_port_event(device, &(device->log_PORT_EVENTS), link, NVSWITCH_PORT_EVENT_TYPE_DOWN);
7838 if (nvswitch_lib_notify_client_events(device,
7839 NVSWITCH_DEVICE_EVENT_PORT_DOWN) != NVL_SUCCESS)
7840 {
7841 NVSWITCH_PRINT(device, ERROR, "%s: Failed to notify PORT_DOWN event\n",
7842 __FUNCTION__);
7843 }
7844 dlDeferredIntrLinkMask |= bit;
7845
7846 //
7847 // Disable LTSSM FAULT UP, NPG, and NVLW link interrupts to avoid interrupt storm. The interrupts
7848 // will be re-enabled in reset and drain
7849 //
7850 report.raw_enable = FLD_SET_DRF(_NVLDL_TOP, _INTR_STALL_EN, _LTSSM_FAULT_UP, _DISABLE, report.raw_enable);
7851 NVSWITCH_LINK_WR32(device, link, NVLDL, _NVLDL_TOP, _INTR_STALL_EN, report.raw_enable);
7852 nvswitch_link_disable_interrupts_ls10(device, link);
7853
7854 if (device->bModeContinuousALI)
7855 {
7856 //
7857 // Since reset and drain will reset the link, including clearing
7858 // pending interrupts, skip the clear write below. There are cases
7859 // where link clocks will not be on after reset and drain so there
7860 // maybe PRI errors on writing to the register
7861 //
7862 // CCI will perform reset and drain
7863 if (!cciIsLinkManaged(device, link))
7864 {
7865 bRequireResetAndDrain = NV_TRUE;
7866 }
7867 }
7868 nvswitch_clear_flags(&unhandled, bit);
7869 }
7870
7871 if (bRequireResetAndDrain)
7872 {
7873 device->hal.nvswitch_reset_and_drain_links(device, NVBIT64(link), NV_FALSE);
7874 }
7875
7876 chip_device->deferredLinkErrors[link].data.fatalIntrMask.dl |= dlDeferredIntrLinkMask;
7877 if (dlDeferredIntrLinkMask)
7878 {
7879 nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link);
7880 }
7881
7882 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
7883
7884 // Disable interrupts that have occurred after fatal error.
7885 if (device->link[link].fatal_error_occurred)
7886 {
7887 NVSWITCH_LINK_WR32(device, link, NVLDL, _NVLDL_TOP, _INTR_STALL_EN,
7888 report.raw_enable & ~pending);
7889 }
7890
7891 if (!bRequireResetAndDrain)
7892 {
7893 NVSWITCH_LINK_WR32(device, link, NVLDL, _NVLDL_TOP, _INTR, pending);
7894 }
7895
7896 if (unhandled != 0)
7897 {
7898 NVSWITCH_PRINT(device, WARN,
7899 "%s: Unhandled NVLDL fatal interrupts, link: %d pending: 0x%x enabled: 0x%x.\n",
7900 __FUNCTION__, link, pending, report.raw_enable);
7901 return -NVL_MORE_PROCESSING_REQUIRED;
7902 }
7903
7904 return NVL_SUCCESS;
7905 }
7906
7907 NvlStatus
nvswitch_service_minion_link_ls10(nvswitch_device * device,NvU32 instance)7908 nvswitch_service_minion_link_ls10
7909 (
7910 nvswitch_device *device,
7911 NvU32 instance
7912 )
7913 {
7914 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 };
7915 NvU32 pending, unhandled, minionIntr, linkIntr, reg, enabledLinks, bit;
7916 NvU32 localLinkIdx, link;
7917 MINION_LINK_INTR minionLinkIntr = { 0 };
7918 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
7919
7920 //
7921 // _MINION_MINION_INTR shows all interrupts currently at the host on this minion
7922 // Note: _MINIO_MINION_INTR is not used to clear link specific interrupts
7923 //
7924 minionIntr = NVSWITCH_MINION_RD32_LS10(device, instance, _MINION, _MINION_INTR);
7925
7926 // get all possible interrupting links associated with this minion
7927 report.raw_pending = DRF_VAL(_MINION, _MINION_INTR, _LINK, minionIntr);
7928
7929 // read in the enaled minion interrupts on this minion
7930 reg = NVSWITCH_MINION_RD32_LS10(device, instance, _MINION, _MINION_INTR_STALL_EN);
7931
7932 // get the links with enabled interrupts on this minion
7933 enabledLinks = DRF_VAL(_MINION, _MINION_INTR_STALL_EN, _LINK, reg);
7934
7935 report.raw_enable = enabledLinks;
7936 report.mask = report.raw_enable;
7937
7938 // pending bit field contains interrupting links after being filtered
7939 pending = report.raw_pending & report.mask;
7940
7941 if (pending == 0)
7942 {
7943 return -NVL_NOT_FOUND;
7944 }
7945
7946 unhandled = pending;
7947
7948 minionLinkIntr.bPending = NV_TRUE;
7949
7950 FOR_EACH_INDEX_IN_MASK(32, localLinkIdx, pending)
7951 {
7952 link = (instance * NVSWITCH_LINKS_PER_NVLIPT_LS10) + localLinkIdx;
7953 bit = NVBIT(localLinkIdx);
7954
7955 // read in the interrupt register for the given link
7956 linkIntr = NVSWITCH_MINION_LINK_RD32_LS10(device, link, _MINION, _NVLINK_LINK_INTR(localLinkIdx));
7957 minionLinkIntr.regData = linkIntr;
7958
7959 // _STATE must be set for _CODE to be valid
7960 if (!DRF_VAL(_MINION, _NVLINK_LINK_INTR, _STATE, linkIntr))
7961 {
7962 continue;
7963 }
7964
7965 NVSWITCH_PRINT(device, INFO,
7966 "%s: link[%d] {%d, %d} linkIntr = 0x%x\n",
7967 __FUNCTION__, link, instance, localLinkIdx, linkIntr);
7968
7969 //
7970 // _MINION_INTR_LINK is a read-only register field for the host
7971 // Host must write 1 to _NVLINK_LINK_INTR_STATE to clear the interrupt on the link
7972 //
7973 reg = DRF_NUM(_MINION, _NVLINK_LINK_INTR, _STATE, 1);
7974 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _NVLINK_LINK_INTR(localLinkIdx), reg);
7975
7976 report.data[0] = linkIntr;
7977
7978 switch(DRF_VAL(_MINION, _NVLINK_LINK_INTR, _CODE, linkIntr))
7979 {
7980 case NV_MINION_NVLINK_LINK_INTR_CODE_NA:
7981 case NV_MINION_NVLINK_LINK_INTR_CODE_DLCMDFAULT:
7982 case NV_MINION_NVLINK_LINK_INTR_CODE_LOCAL_CONFIG_ERR:
7983 case NV_MINION_NVLINK_LINK_INTR_CODE_NEGOTIATION_CONFIG_ERR:
7984 case NV_MINION_NVLINK_LINK_INTR_CODE_BADINIT:
7985 case NV_MINION_NVLINK_LINK_INTR_CODE_PMFAIL:
7986 case NV_MINION_NVLINK_LINK_INTR_CODE_NOINIT:
7987 chip_device->deferredLinkErrors[link].data.fatalIntrMask.minionLinkIntr =
7988 minionLinkIntr;
7989 _nvswitch_create_deferred_link_errors_task_ls10(device, instance, link);
7990 break;
7991 case NV_MINION_NVLINK_LINK_INTR_CODE_SWREQ:
7992 NVSWITCH_PRINT(device, INFO,
7993 "%s: Received MINION Link SW Generate interrupt on MINION %d : link %d.\n",
7994 __FUNCTION__, instance, link);
7995 break;
7996 case NV_MINION_NVLINK_LINK_INTR_CODE_DLREQ:
7997 case NV_MINION_NVLINK_LINK_INTR_CODE_PMDISABLED:
7998 case NV_MINION_NVLINK_LINK_INTR_CODE_TLREQ:
7999 chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.minionLinkIntr =
8000 minionLinkIntr;
8001 _nvswitch_create_deferred_link_errors_task_ls10(device, instance, link);
8002 case NV_MINION_NVLINK_LINK_INTR_CODE_NOTIFY:
8003 NVSWITCH_PRINT(device, INFO,
8004 "%s: Received MINION NOTIFY interrupt on MINION %d : link %d.\n",
8005 __FUNCTION__, instance, link);
8006 break;
8007 case NV_MINION_NVLINK_LINK_INTR_CODE_INBAND_BUFFER_AVAILABLE:
8008 {
8009 NVSWITCH_PRINT(device, INFO,
8010 "Received INBAND_BUFFER_AVAILABLE interrupt on MINION %d,\n", instance);
8011 nvswitch_minion_receive_inband_data_ls10(device, link);
8012 break;
8013 }
8014 default:
8015 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Interrupt code unknown", NV_FALSE);
8016 }
8017 nvswitch_clear_flags(&unhandled, bit);
8018 }
8019 FOR_EACH_INDEX_IN_MASK_END;
8020
8021 NVSWITCH_UNHANDLED_CHECK(device, unhandled);
8022
8023 if (unhandled != 0)
8024 {
8025 return -NVL_MORE_PROCESSING_REQUIRED;
8026 }
8027
8028 return NVL_SUCCESS;
8029 }
8030