1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "common_nvswitch.h" 25 #include "intr_nvswitch.h" 26 #include "regkey_nvswitch.h" 27 28 #include "ls10/ls10.h" 29 #include "ls10/minion_ls10.h" 30 31 #include "nvswitch/ls10/dev_ctrl_ip.h" 32 #include "nvswitch/ls10/dev_pri_masterstation_ip.h" 33 #include "nvswitch/ls10/dev_pri_hub_sys_ip.h" 34 #include "nvswitch/ls10/dev_pri_hub_sysb_ip.h" 35 #include "nvswitch/ls10/dev_pri_hub_prt_ip.h" 36 37 #include "nvswitch/ls10/dev_npg_ip.h" 38 #include "nvswitch/ls10/dev_nport_ip.h" 39 #include "nvswitch/ls10/dev_route_ip.h" 40 #include "nvswitch/ls10/dev_ingress_ip.h" 41 #include "nvswitch/ls10/dev_sourcetrack_ip.h" 42 #include "nvswitch/ls10/dev_egress_ip.h" 43 #include "nvswitch/ls10/dev_tstate_ip.h" 44 #include "nvswitch/ls10/dev_multicasttstate_ip.h" 45 #include "nvswitch/ls10/dev_reductiontstate_ip.h" 46 47 #include "nvswitch/ls10/dev_nvlw_ip.h" 48 #include "nvswitch/ls10/dev_minion_ip.h" 49 #include "nvswitch/ls10/dev_minion_ip_addendum.h" 50 #include "nvswitch/ls10/dev_cpr_ip.h" 51 #include "nvswitch/ls10/dev_nvlipt_ip.h" 52 #include "nvswitch/ls10/dev_nvlipt_lnk_ip.h" 53 #include "nvswitch/ls10/dev_nvltlc_ip.h" 54 #include "nvswitch/ls10/dev_nvldl_ip.h" 55 56 #include "nvswitch/ls10/dev_nxbar_tcp_global_ip.h" 57 #include "nvswitch/ls10/dev_nxbar_tile_ip.h" 58 #include "nvswitch/ls10/dev_nxbar_tileout_ip.h" 59 60 #include "nvswitch/ls10/dev_ctrl_ip_addendum.h" 61 62 static void 63 _nvswitch_construct_ecc_error_event_ls10 64 ( 65 INFOROM_NVS_ECC_ERROR_EVENT *err_event, 66 NvU32 sxid, 67 NvU32 linkId, 68 NvBool bAddressValid, 69 NvU32 address, 70 NvBool bUncErr, 71 NvU32 errorCount 72 ) 73 { 74 err_event->sxid = sxid; 75 err_event->linkId = linkId; 76 err_event->bAddressValid = bAddressValid; 77 err_event->address = address; 78 err_event->bUncErr = bUncErr; 79 err_event->errorCount = errorCount; 80 } 81 82 static void 83 _nvswitch_initialize_minion_interrupts 84 ( 85 nvswitch_device *device, 86 NvU32 instance 87 ) 88 { 89 NvU32 intrEn, localDiscoveredLinks, globalLink, i; 90 localDiscoveredLinks = 0; 91 92 // Tree 1 (non-stall) is disabled until there is a need 93 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _MINION_INTR_NONSTALL_EN, 0); 94 95 // Tree 0 (stall) is where we route _all_ MINION interrupts for now 96 intrEn = DRF_DEF(_MINION, _MINION_INTR_STALL_EN, _FATAL, _ENABLE) | 97 DRF_DEF(_MINION, _MINION_INTR_STALL_EN, _NONFATAL, _ENABLE) | 98 DRF_DEF(_MINION, _MINION_INTR_STALL_EN, _FALCON_STALL, _ENABLE) | 99 DRF_DEF(_MINION, _MINION_INTR_STALL_EN, _FALCON_NOSTALL, _DISABLE); 100 101 for (i = 0; i < NVSWITCH_LINKS_PER_MINION_LS10; ++i) 102 { 103 // get the global link number of the link we are iterating over 104 globalLink = (instance * NVSWITCH_LINKS_PER_MINION_LS10) + i; 105 106 // the link is valid place bit in link mask 107 if (device->link[globalLink].valid) 108 { 109 localDiscoveredLinks |= NVBIT(i); 110 } 111 } 112 113 intrEn = FLD_SET_DRF_NUM(_MINION, _MINION_INTR_STALL_EN, _LINK, 114 localDiscoveredLinks, intrEn); 115 116 { 117 // Disable interrupts only if explicitly requested to. Default to enable. 118 if (device->regkeys.minion_intr != NV_SWITCH_REGKEY_MINION_INTERRUPTS_DISABLE) 119 { 120 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _MINION_INTR_STALL_EN, intrEn); 121 } 122 } 123 } 124 125 static void 126 _nvswitch_initialize_nvlipt_interrupts_ls10 127 ( 128 nvswitch_device *device 129 ) 130 { 131 NvU32 i; 132 NvU32 regval = 0; 133 134 // 135 // NVLipt interrupt routing (NVLIPT_COMMON, NVLIPT_LNK, NVLDL, NVLTLC) 136 // will be initialized by MINION NVLPROD flow 137 // 138 // We must enable interrupts at the top levels in NVLW, NVLIPT_COMMON, 139 // NVLIPT_LNK and MINION 140 // 141 142 // NVLW 143 regval = DRF_NUM(_NVLW_COMMON, _INTR_0_MASK, _FATAL, 0x1) | 144 DRF_NUM(_NVLW_COMMON, _INTR_0_MASK, _NONFATAL, 0x0) | 145 DRF_NUM(_NVLW_COMMON, _INTR_0_MASK, _CORRECTABLE, 0x0) | 146 DRF_NUM(_NVLW_COMMON, _INTR_0_MASK, _INTR0, 0x1) | 147 DRF_NUM(_NVLW_COMMON, _INTR_0_MASK, _INTR1, 0x0); 148 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_COMMON, _INTR_0_MASK, regval); 149 150 regval = DRF_NUM(_NVLW_COMMON, _INTR_1_MASK, _FATAL, 0x0) | 151 DRF_NUM(_NVLW_COMMON, _INTR_1_MASK, _NONFATAL, 0x1) | 152 DRF_NUM(_NVLW_COMMON, _INTR_1_MASK, _CORRECTABLE, 0x1) | 153 DRF_NUM(_NVLW_COMMON, _INTR_1_MASK, _INTR0, 0x0) | 154 DRF_NUM(_NVLW_COMMON, _INTR_1_MASK, _INTR1, 0x1); 155 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_COMMON, _INTR_1_MASK, regval); 156 157 regval = DRF_NUM(_NVLW_COMMON, _INTR_2_MASK, _FATAL, 0x0) | 158 DRF_NUM(_NVLW_COMMON, _INTR_2_MASK, _NONFATAL, 0x0) | 159 DRF_NUM(_NVLW_COMMON, _INTR_2_MASK, _CORRECTABLE, 0x0) | 160 DRF_NUM(_NVLW_COMMON, _INTR_2_MASK, _INTR0, 0x0) | 161 DRF_NUM(_NVLW_COMMON, _INTR_2_MASK, _INTR1, 0x0); 162 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_COMMON, _INTR_2_MASK, regval); 163 164 // NVLW link 165 for (i = 0; i < NV_NVLW_LINK_INTR_0_MASK__SIZE_1; i++) 166 { 167 regval = DRF_NUM(_NVLW_LINK, _INTR_0_MASK, _FATAL, 0x1) | 168 DRF_NUM(_NVLW_LINK, _INTR_0_MASK, _NONFATAL, 0x0) | 169 DRF_NUM(_NVLW_LINK, _INTR_0_MASK, _CORRECTABLE, 0x0) | 170 DRF_NUM(_NVLW_LINK, _INTR_0_MASK, _INTR0, 0x1) | 171 DRF_NUM(_NVLW_LINK, _INTR_0_MASK, _INTR1, 0x0); 172 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_LINK, _INTR_0_MASK(i), regval); 173 174 regval = DRF_NUM(_NVLW_LINK, _INTR_1_MASK, _FATAL, 0x0) | 175 DRF_NUM(_NVLW_LINK, _INTR_1_MASK, _NONFATAL, 0x1) | 176 DRF_NUM(_NVLW_LINK, _INTR_1_MASK, _CORRECTABLE, 0x1) | 177 DRF_NUM(_NVLW_LINK, _INTR_1_MASK, _INTR0, 0x0) | 178 DRF_NUM(_NVLW_LINK, _INTR_1_MASK, _INTR1, 0x1); 179 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_LINK, _INTR_1_MASK(i), regval); 180 181 regval = DRF_NUM(_NVLW_LINK, _INTR_2_MASK, _FATAL, 0x0) | 182 DRF_NUM(_NVLW_LINK, _INTR_2_MASK, _NONFATAL, 0x0) | 183 DRF_NUM(_NVLW_LINK, _INTR_2_MASK, _CORRECTABLE, 0x0) | 184 DRF_NUM(_NVLW_LINK, _INTR_2_MASK, _INTR0, 0x0) | 185 DRF_NUM(_NVLW_LINK, _INTR_2_MASK, _INTR1, 0x0); 186 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_LINK, _INTR_2_MASK(i), regval); 187 } 188 189 // NVLIPT_COMMON 190 regval = DRF_NUM(_NVLIPT_COMMON, _INTR_CONTROL_COMMON, _INT0_EN, 0x1) | 191 DRF_NUM(_NVLIPT_COMMON, _INTR_CONTROL_COMMON, _INT1_EN, 0x1); 192 193 NVSWITCH_BCAST_WR32_LS10(device, NVLIPT, _NVLIPT_COMMON, _INTR_CONTROL_COMMON, regval); 194 195 // NVLIPT_LNK 196 regval = DRF_NUM(_NVLIPT_LNK, _INTR_CONTROL_LINK, _INT0_EN, 0x1) | 197 DRF_NUM(_NVLIPT_LNK, _INTR_CONTROL_LINK, _INT1_EN, 0x1); 198 NVSWITCH_BCAST_WR32_LS10(device, NVLIPT_LNK, _NVLIPT_LNK, _INTR_CONTROL_LINK, regval); 199 200 // NVLIPT_LNK_INTR_1 201 regval = DRF_NUM(_NVLIPT_LNK, _INTR_INT1_EN, _LINKSTATEREQUESTREADYSET, 0x1); 202 NVSWITCH_BCAST_WR32_LS10(device, NVLIPT_LNK, _NVLIPT_LNK, _INTR_INT1_EN, regval); 203 204 // MINION 205 for (i = 0; i < NUM_MINION_ENGINE_LS10; ++i) 206 { 207 if (!NVSWITCH_ENG_VALID_LS10(device, MINION, i)) 208 { 209 continue; 210 } 211 212 _nvswitch_initialize_minion_interrupts(device,i); 213 } 214 215 // CPR 216 217 regval = NVSWITCH_ENG_RD32(device, CPR, _BCAST, 0, _CPR_SYS, _ERR_LOG_EN_0); 218 regval = FLD_SET_DRF(_CPR_SYS, _ERR_LOG_EN_0, _ENGINE_RESET_ERR, __PROD, regval); 219 NVSWITCH_ENG_WR32(device, CPR, _BCAST, 0, _CPR_SYS, _ERR_LOG_EN_0, regval); 220 221 regval = DRF_DEF(_CPR_SYS, _NVLW_INTR_0_MASK, _CPR_INTR, _ENABLE) | 222 DRF_DEF(_CPR_SYS, _NVLW_INTR_0_MASK, _INTR0, _ENABLE); 223 NVSWITCH_ENG_WR32(device, CPR, _BCAST, 0, _CPR_SYS, _NVLW_INTR_0_MASK, regval); 224 225 regval = DRF_DEF(_CPR_SYS, _NVLW_INTR_1_MASK, _CPR_INTR, _DISABLE) | 226 DRF_DEF(_CPR_SYS, _NVLW_INTR_1_MASK, _INTR1, _ENABLE); 227 NVSWITCH_ENG_WR32(device, CPR, _BCAST, 0, _CPR_SYS, _NVLW_INTR_1_MASK, regval); 228 229 regval = DRF_DEF(_CPR_SYS, _NVLW_INTR_2_MASK, _CPR_INTR, _DISABLE) | 230 DRF_DEF(_CPR_SYS, _NVLW_INTR_2_MASK, _INTR2, _ENABLE); 231 NVSWITCH_ENG_WR32(device, CPR, _BCAST, 0, _CPR_SYS, _NVLW_INTR_2_MASK, regval); 232 } 233 234 static void 235 _nvswitch_initialize_route_interrupts 236 ( 237 nvswitch_device *device 238 ) 239 { 240 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 241 242 chip_device->intr_mask.route.fatal = 243 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _ROUTEBUFERR, _ENABLE) | 244 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _GLT_ECC_DBE_ERR, _ENABLE) | 245 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _PDCTRLPARERR, _ENABLE) | 246 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _NVS_ECC_DBE_ERR, _ENABLE) | 247 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _CDTPARERR, _ENABLE) | 248 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _MCRID_ECC_DBE_ERR, _ENABLE) | 249 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _EXTMCRID_ECC_DBE_ERR, _ENABLE) | 250 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _RAM_ECC_DBE_ERR, _ENABLE); 251 252 chip_device->intr_mask.route.nonfatal = 253 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _NOPORTDEFINEDERR, _ENABLE) | 254 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _INVALIDROUTEPOLICYERR, _ENABLE) | 255 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _GLT_ECC_LIMIT_ERR, _ENABLE) | 256 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _NVS_ECC_LIMIT_ERR, _ENABLE) | 257 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _MCRID_ECC_LIMIT_ERR, _ENABLE) | 258 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _EXTMCRID_ECC_LIMIT_ERR, _ENABLE) | 259 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _RAM_ECC_LIMIT_ERR, _ENABLE) | 260 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _INVALID_MCRID_ERR, _ENABLE); 261 // NOTE: _MC_TRIGGER_ERR is debug-use only 262 } 263 264 static void 265 _nvswitch_initialize_ingress_interrupts 266 ( 267 nvswitch_device *device 268 ) 269 { 270 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 271 272 chip_device->intr_mask.ingress[0].fatal = 273 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _CMDDECODEERR, _ENABLE) | 274 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _EXTAREMAPTAB_ECC_DBE_ERR, _ENABLE) | 275 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _NCISOC_HDR_ECC_DBE_ERR, _ENABLE) | 276 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _INVALIDVCSET, _ENABLE) | 277 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _REMAPTAB_ECC_DBE_ERR, _ENABLE) | 278 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _RIDTAB_ECC_DBE_ERR, _ENABLE) | 279 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _RLANTAB_ECC_DBE_ERR, _ENABLE) | 280 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _NCISOC_PARITY_ERR, _ENABLE) | 281 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _EXTBREMAPTAB_ECC_DBE_ERR, _ENABLE) | 282 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _MCREMAPTAB_ECC_DBE_ERR, _ENABLE); 283 284 chip_device->intr_mask.ingress[0].nonfatal = 285 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _REQCONTEXTMISMATCHERR, _ENABLE) | 286 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _ACLFAIL, _ENABLE) | 287 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _NCISOC_HDR_ECC_LIMIT_ERR, _ENABLE) | 288 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _ADDRBOUNDSERR, _ENABLE) | 289 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RIDTABCFGERR, _ENABLE) | 290 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RLANTABCFGERR, _ENABLE) | 291 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _REMAPTAB_ECC_LIMIT_ERR, _ENABLE) | 292 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RIDTAB_ECC_LIMIT_ERR, _ENABLE) | 293 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RLANTAB_ECC_LIMIT_ERR, _ENABLE) | 294 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _ADDRTYPEERR, _ENABLE) | 295 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTAREMAPTAB_INDEX_ERR, _ENABLE) | 296 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTBREMAPTAB_INDEX_ERR, _ENABLE) | 297 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _MCREMAPTAB_INDEX_ERR, _ENABLE) | 298 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTAREMAPTAB_REQCONTEXTMISMATCHERR, _ENABLE) | 299 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTBREMAPTAB_REQCONTEXTMISMATCHERR, _ENABLE) | 300 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _MCREMAPTAB_REQCONTEXTMISMATCHERR, _ENABLE) | 301 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTAREMAPTAB_ACLFAIL, _ENABLE) | 302 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTBREMAPTAB_ACLFAIL, _ENABLE) | 303 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _MCREMAPTAB_ACLFAIL, _ENABLE) | 304 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTAREMAPTAB_ADDRBOUNDSERR, _ENABLE) | 305 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTBREMAPTAB_ADDRBOUNDSERR, _ENABLE) | 306 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _MCREMAPTAB_ADDRBOUNDSERR, _ENABLE); 307 308 chip_device->intr_mask.ingress[1].fatal = 0; 309 310 chip_device->intr_mask.ingress[1].nonfatal = 311 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _EXTAREMAPTAB_ECC_LIMIT_ERR, _ENABLE) | 312 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _EXTBREMAPTAB_ECC_LIMIT_ERR, _ENABLE) | 313 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCREMAPTAB_ECC_LIMIT_ERR, _ENABLE) | 314 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCCMDTOUCADDRERR, _ENABLE) | 315 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _READMCREFLECTMEMERR, _ENABLE) | 316 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _EXTAREMAPTAB_ADDRTYPEERR, _ENABLE) | 317 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _EXTBREMAPTAB_ADDRTYPEERR, _ENABLE) | 318 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCREMAPTAB_ADDRTYPEERR, _ENABLE); 319 } 320 321 static void 322 _nvswitch_initialize_egress_interrupts 323 ( 324 nvswitch_device *device 325 ) 326 { 327 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 328 329 chip_device->intr_mask.egress[0].fatal = 330 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _EGRESSBUFERR, _ENABLE) | 331 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _PKTROUTEERR, _ENABLE) | 332 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _SEQIDERR, _ENABLE) | 333 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NXBAR_HDR_ECC_DBE_ERR, _ENABLE) | 334 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _RAM_OUT_HDR_ECC_DBE_ERR, _ENABLE) | 335 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NCISOCCREDITOVFL, _ENABLE) | 336 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _REQTGTIDMISMATCHERR, _ENABLE) | 337 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _RSPREQIDMISMATCHERR, _ENABLE) | 338 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NXBAR_HDR_PARITY_ERR, _ENABLE) | 339 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NCISOC_CREDIT_PARITY_ERR, _ENABLE) | 340 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NXBAR_FLITTYPE_MISMATCH_ERR, _ENABLE) | 341 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _CREDIT_TIME_OUT_ERR, _ENABLE) | 342 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _INVALIDVCSET_ERR, _ENABLE) | 343 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NXBAR_SIDEBAND_PD_PARITY_ERR, _ENABLE) | 344 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _URRSPERR, _ENABLE) | 345 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _HWRSPERR, _ENABLE); 346 347 chip_device->intr_mask.egress[0].nonfatal = 348 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_0, _NXBAR_HDR_ECC_LIMIT_ERR, _ENABLE) | 349 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RAM_OUT_HDR_ECC_LIMIT_ERR, _ENABLE) | 350 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_0, _PRIVRSPERR, _ENABLE) | 351 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RFU, _DISABLE); 352 353 chip_device->intr_mask.egress[1].fatal = 354 355 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_1, _MCRSPCTRLSTORE_ECC_DBE_ERR, _ENABLE) | 356 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_1, _RBCTRLSTORE_ECC_DBE_ERR, _ENABLE) | 357 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_1, _MCREDSGT_ECC_DBE_ERR, _ENABLE) | 358 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_1, _MCRSP_RAM_HDR_ECC_DBE_ERR, _ENABLE); 359 360 chip_device->intr_mask.egress[1].nonfatal = 361 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, _ENABLE) | 362 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCRSPCTRLSTORE_ECC_LIMIT_ERR, _ENABLE) | 363 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _RBCTRLSTORE_ECC_LIMIT_ERR, _ENABLE) | 364 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCREDSGT_ECC_LIMIT_ERR, _ENABLE) | 365 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCREDBUF_ECC_LIMIT_ERR, _ENABLE) | 366 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCRSP_RAM_HDR_ECC_LIMIT_ERR, _ENABLE) | 367 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _NXBAR_REDUCTION_HDR_ECC_DBE_ERR, _ENABLE) | 368 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _NXBAR_REDUCTION_HDR_PARITY_ERR, _ENABLE) | 369 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _NXBAR_REDUCTION_FLITTYPE_MISMATCH_ERR, _ENABLE) | 370 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCREDBUF_ECC_DBE_ERR, _ENABLE) | 371 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCRSP_CNT_ERR, _ENABLE) | 372 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _RBRSP_CNT_ERR, _ENABLE); 373 } 374 375 static void 376 _nvswitch_initialize_tstate_interrupts 377 ( 378 nvswitch_device *device 379 ) 380 { 381 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 382 383 chip_device->intr_mask.tstate.fatal = 384 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _TAGPOOLBUFERR, _ENABLE) | 385 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _TAGPOOL_ECC_DBE_ERR, _ENABLE) | 386 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTOREBUFERR, _ENABLE) | 387 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_DBE_ERR, _ENABLE) | 388 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _ATO_ERR, _ENABLE) | 389 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _CAMRSP_ERR, _ENABLE); 390 391 chip_device->intr_mask.tstate.nonfatal = 392 DRF_DEF(_TSTATE, _ERR_NON_FATAL_REPORT_EN_0, _TAGPOOL_ECC_LIMIT_ERR, _ENABLE) | 393 DRF_DEF(_TSTATE, _ERR_NON_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_LIMIT_ERR, _ENABLE); 394 } 395 396 static void 397 _nvswitch_initialize_sourcetrack_interrupts 398 ( 399 nvswitch_device *device 400 ) 401 { 402 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 403 404 chip_device->intr_mask.sourcetrack.fatal = 405 DRF_DEF(_SOURCETRACK, _ERR_FATAL_REPORT_EN_0, _CREQ_TCEN0_CRUMBSTORE_ECC_DBE_ERR, _ENABLE) | 406 DRF_DEF(_SOURCETRACK, _ERR_FATAL_REPORT_EN_0, _DUP_CREQ_TCEN0_TAG_ERR, _ENABLE) | 407 DRF_DEF(_SOURCETRACK, _ERR_FATAL_REPORT_EN_0, _INVALID_TCEN0_RSP_ERR, _ENABLE) | 408 DRF_DEF(_SOURCETRACK, _ERR_FATAL_REPORT_EN_0, _INVALID_TCEN1_RSP_ERR, _ENABLE) | 409 DRF_DEF(_SOURCETRACK, _ERR_FATAL_REPORT_EN_0, _SOURCETRACK_TIME_OUT_ERR, _ENABLE); 410 411 chip_device->intr_mask.sourcetrack.nonfatal = 412 DRF_DEF(_SOURCETRACK, _ERR_NON_FATAL_REPORT_EN_0, _CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, _ENABLE); 413 } 414 415 static void 416 _nvswitch_initialize_multicast_tstate_interrupts 417 ( 418 nvswitch_device *device 419 ) 420 { 421 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 422 423 chip_device->intr_mask.mc_tstate.fatal = 424 DRF_DEF(_MULTICASTTSTATE, _ERR_FATAL_REPORT_EN_0, _TAGPOOL_ECC_DBE_ERR, _ENABLE) | 425 DRF_DEF(_MULTICASTTSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTORE_BUF_OVERWRITE_ERR, _ENABLE) | 426 DRF_DEF(_MULTICASTTSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_DBE_ERR, _ENABLE); 427 428 chip_device->intr_mask.mc_tstate.nonfatal = 429 DRF_DEF(_MULTICASTTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _TAGPOOL_ECC_LIMIT_ERR, _ENABLE) | 430 DRF_DEF(_MULTICASTTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_LIMIT_ERR, _ENABLE) | 431 DRF_DEF(_MULTICASTTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _CRUMBSTORE_MCTO_ERR, _ENABLE); 432 } 433 434 static void 435 _nvswitch_initialize_reduction_tstate_interrupts 436 ( 437 nvswitch_device *device 438 ) 439 { 440 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 441 442 chip_device->intr_mask.red_tstate.fatal = 443 DRF_DEF(_REDUCTIONTSTATE, _ERR_FATAL_REPORT_EN_0, _TAGPOOL_ECC_DBE_ERR, _ENABLE) | 444 DRF_DEF(_REDUCTIONTSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTORE_BUF_OVERWRITE_ERR, _ENABLE) | 445 DRF_DEF(_REDUCTIONTSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_DBE_ERR, _ENABLE); 446 447 chip_device->intr_mask.red_tstate.nonfatal = 448 DRF_DEF(_REDUCTIONTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _TAGPOOL_ECC_LIMIT_ERR, _ENABLE) | 449 DRF_DEF(_REDUCTIONTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_LIMIT_ERR, _ENABLE) | 450 DRF_DEF(_REDUCTIONTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _CRUMBSTORE_RTO_ERR, _ENABLE); 451 } 452 453 void 454 _nvswitch_initialize_nport_interrupts_ls10 455 ( 456 nvswitch_device *device 457 ) 458 { 459 // Moving this L2 register access to SOE. Refer bug #3747687 460 #if 0 461 NvU32 val; 462 463 val = 464 DRF_NUM(_NPORT, _ERR_CONTROL_COMMON_NPORT, _CORRECTABLEENABLE, 1) | 465 DRF_NUM(_NPORT, _ERR_CONTROL_COMMON_NPORT, _FATALENABLE, 1) | 466 DRF_NUM(_NPORT, _ERR_CONTROL_COMMON_NPORT, _NONFATALENABLE, 1); 467 NVSWITCH_NPORT_BCAST_WR32_LS10(device, _NPORT, _ERR_CONTROL_COMMON_NPORT, val); 468 #endif // 0 469 470 _nvswitch_initialize_route_interrupts(device); 471 _nvswitch_initialize_ingress_interrupts(device); 472 _nvswitch_initialize_egress_interrupts(device); 473 _nvswitch_initialize_tstate_interrupts(device); 474 _nvswitch_initialize_sourcetrack_interrupts(device); 475 _nvswitch_initialize_multicast_tstate_interrupts(device); 476 _nvswitch_initialize_reduction_tstate_interrupts(device); 477 } 478 479 void 480 _nvswitch_initialize_nxbar_interrupts_ls10 481 ( 482 nvswitch_device *device 483 ) 484 { 485 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 486 NvU32 report_fatal; 487 488 report_fatal = 489 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_BUFFER_OVERFLOW, 1) | 490 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_BUFFER_UNDERFLOW, 1) | 491 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _EGRESS_CREDIT_OVERFLOW, 1) | 492 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _EGRESS_CREDIT_UNDERFLOW, 1) | 493 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_NON_BURSTY_PKT, 1) | 494 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_NON_STICKY_PKT, 1) | 495 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_BURST_GT_9_DATA_VC, 1) | 496 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_PKT_INVALID_DST, 1) | 497 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_PKT_PARITY_ERROR, 1) | 498 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_SIDEBAND_PARITY_ERROR, 1) | 499 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_REDUCTION_PKT_ERROR, 1); 500 501 // Moving this L2 register access to SOE. Refer bug #3747687 502 #if 0 503 NVSWITCH_BCAST_WR32_LS10(device, NXBAR, _NXBAR_TILE, _ERR_FATAL_INTR_EN, report_fatal); 504 #endif // 0 505 506 chip_device->intr_mask.tile.fatal = report_fatal; 507 chip_device->intr_mask.tile.nonfatal = 0; 508 509 report_fatal = 510 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _INGRESS_BUFFER_OVERFLOW, 1) | 511 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _INGRESS_BUFFER_UNDERFLOW, 1) | 512 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _EGRESS_CREDIT_OVERFLOW, 1) | 513 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _EGRESS_CREDIT_UNDERFLOW, 1) | 514 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _INGRESS_NON_BURSTY_PKT, 1) | 515 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _INGRESS_NON_STICKY_PKT, 1) | 516 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _INGRESS_BURST_GT_9_DATA_VC, 1) | 517 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _EGRESS_CDT_PARITY_ERROR, 1); 518 519 // Moving this L2 register access to SOE. Refer bug #3747687 520 #if 0 521 NVSWITCH_BCAST_WR32_LS10(device, NXBAR, _NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, report_fatal); 522 #endif // 0 523 524 chip_device->intr_mask.tileout.fatal = report_fatal; 525 chip_device->intr_mask.tileout.nonfatal = 0; 526 } 527 528 /* 529 * @brief Service MINION Falcon interrupts on the requested interrupt tree 530 * Falcon Interrupts are a little unique in how they are handled:#include <assert.h> 531 * IRQSTAT is used to read in interrupt status from FALCON 532 * IRQMASK is used to read in mask of interrupts 533 * IRQDEST is used to read in enabled interrupts that are routed to the HOST 534 * 535 * IRQSTAT & IRQMASK gives the pending interrupting on this minion 536 * 537 * @param[in] device MINION on this device 538 * @param[in] instance MINION instance 539 * 540 */ 541 NvlStatus 542 nvswitch_minion_service_falcon_interrupts_ls10 543 ( 544 nvswitch_device *device, 545 NvU32 instance 546 ) 547 { 548 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 549 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 550 NvU32 pending, bit, unhandled, intr, link; 551 552 link = instance * NVSWITCH_LINKS_PER_MINION_LS10; 553 report.raw_pending = NVSWITCH_MINION_RD32_LS10(device, instance, _CMINION, _FALCON_IRQSTAT); 554 report.raw_enable = chip_device->intr_minion_dest; 555 report.mask = NVSWITCH_MINION_RD32_LS10(device, instance, _CMINION, _FALCON_IRQMASK); 556 557 pending = report.raw_pending & report.mask; 558 559 if (pending == 0) 560 { 561 return -NVL_NOT_FOUND; 562 } 563 564 unhandled = pending; 565 566 bit = DRF_NUM(_CMINION_FALCON, _IRQSTAT, _WDTMR, 1); 567 if (nvswitch_test_flags(pending, bit)) 568 { 569 NVSWITCH_REPORT_FATAL(_HW_MINION_WATCHDOG, "MINION Watchdog timer ran out", NV_TRUE); 570 nvswitch_clear_flags(&unhandled, bit); 571 } 572 573 bit = DRF_NUM(_CMINION_FALCON, _IRQSTAT, _HALT, 1); 574 if (nvswitch_test_flags(pending, bit)) 575 { 576 NVSWITCH_REPORT_FATAL(_HW_MINION_HALT, "MINION HALT", NV_TRUE); 577 nvswitch_clear_flags(&unhandled, bit); 578 } 579 580 bit = DRF_NUM(_CMINION_FALCON, _IRQSTAT, _EXTERR, 1); 581 if (nvswitch_test_flags(pending, bit)) 582 { 583 NVSWITCH_REPORT_FATAL(_HW_MINION_EXTERR, "MINION EXTERR", NV_TRUE); 584 nvswitch_clear_flags(&unhandled, bit); 585 } 586 587 bit = DRF_NUM(_CMINION_FALCON, _IRQSTAT, _SWGEN0, 1); 588 if (nvswitch_test_flags(pending, bit)) 589 { 590 NVSWITCH_PRINT(device, INFO, 591 "%s: Received MINION Falcon SWGEN0 interrupt on MINION %d.\n", 592 __FUNCTION__, instance); 593 nvswitch_clear_flags(&unhandled, bit); 594 } 595 596 bit = DRF_NUM(_CMINION_FALCON, _IRQSTAT, _SWGEN1, 1); 597 if (nvswitch_test_flags(pending, bit)) 598 { 599 NVSWITCH_PRINT(device, INFO, 600 "%s: Received MINION Falcon SWGEN1 interrupt on MINION %d.\n", 601 __FUNCTION__, instance); 602 nvswitch_clear_flags(&unhandled, bit); 603 } 604 605 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 606 607 if (device->link[link].fatal_error_occurred) 608 { 609 intr = NVSWITCH_MINION_RD32_LS10(device, instance, _MINION, _MINION_INTR_STALL_EN); 610 intr = FLD_SET_DRF(_MINION, _MINION_INTR_STALL_EN, _FATAL, _DISABLE, intr); 611 intr = FLD_SET_DRF(_MINION, _MINION_INTR_STALL_EN, _FALCON_STALL, _DISABLE, intr); 612 intr = FLD_SET_DRF(_MINION, _MINION_INTR_STALL_EN, _FATAL, _DISABLE, intr); 613 intr = FLD_SET_DRF(_MINION, _MINION_INTR_STALL_EN, _NONFATAL, _DISABLE, intr); 614 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _MINION_INTR_STALL_EN, intr); 615 } 616 617 // Write to IRQSCLR to clear status of interrupt 618 NVSWITCH_MINION_WR32_LS10(device, instance, _CMINION, _FALCON_IRQSCLR, pending); 619 620 if (unhandled != 0) 621 { 622 return -NVL_MORE_PROCESSING_REQUIRED; 623 } 624 625 return NVL_SUCCESS; 626 } 627 628 /* 629 * @Brief : Send priv ring command and wait for completion 630 * 631 * @Description : 632 * 633 * @param[in] device a reference to the device to initialize 634 * @param[in] cmd encoded priv ring command 635 */ 636 static NvlStatus 637 _nvswitch_ring_master_cmd_ls10 638 ( 639 nvswitch_device *device, 640 NvU32 cmd 641 ) 642 { 643 NvU32 value; 644 NVSWITCH_TIMEOUT timeout; 645 NvBool keepPolling; 646 647 NVSWITCH_ENG_WR32(device, PRI_MASTER_RS, , 0, _PPRIV_MASTER, _RING_COMMAND, cmd); 648 649 nvswitch_timeout_create(NVSWITCH_INTERVAL_5MSEC_IN_NS, &timeout); 650 do 651 { 652 keepPolling = (nvswitch_timeout_check(&timeout)) ? NV_FALSE : NV_TRUE; 653 654 value = NVSWITCH_ENG_RD32(device, PRI_MASTER_RS, , 0, _PPRIV_MASTER, _RING_COMMAND); 655 if (FLD_TEST_DRF(_PPRIV_MASTER, _RING_COMMAND, _CMD, _NO_CMD, value)) 656 { 657 break; 658 } 659 660 nvswitch_os_sleep(1); 661 } 662 while (keepPolling); 663 664 if (!FLD_TEST_DRF(_PPRIV_MASTER, _RING_COMMAND, _CMD, _NO_CMD, value)) 665 { 666 NVSWITCH_PRINT(device, ERROR, 667 "%s: Timeout waiting for RING_COMMAND == NO_CMD (cmd=0x%x).\n", 668 __FUNCTION__, cmd); 669 return -NVL_INITIALIZATION_TOTAL_FAILURE; 670 } 671 672 return NVL_SUCCESS; 673 } 674 675 static NvlStatus 676 _nvswitch_service_priv_ring_ls10 677 ( 678 nvswitch_device *device 679 ) 680 { 681 NvU32 pending, i; 682 NVSWITCH_PRI_ERROR_LOG_TYPE pri_error; 683 NvlStatus status = NVL_SUCCESS; 684 685 pending = NVSWITCH_ENG_RD32(device, PRI_MASTER_RS, , 0, _PPRIV_MASTER, _RING_INTERRUPT_STATUS0); 686 if (pending == 0) 687 { 688 return -NVL_NOT_FOUND; 689 } 690 691 // 692 // SYS 693 // 694 695 if (FLD_TEST_DRF_NUM(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0, 696 _GBL_WRITE_ERROR_SYS, 1, pending)) 697 { 698 pri_error.addr = NVSWITCH_ENG_RD32(device, SYS_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_ADR); 699 pri_error.data = NVSWITCH_ENG_RD32(device, SYS_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_WRDAT); 700 pri_error.info = NVSWITCH_ENG_RD32(device, SYS_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_INFO); 701 pri_error.code = NVSWITCH_ENG_RD32(device, SYS_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_CODE); 702 703 NVSWITCH_REPORT_PRI_ERROR_NONFATAL(_HW_HOST_PRIV_ERROR, "PRI WRITE SYS error", NVSWITCH_PPRIV_WRITE_SYS, 0, pri_error); 704 705 NVSWITCH_PRINT(device, ERROR, 706 "SYS PRI write error addr: 0x%08x data: 0x%08x info: 0x%08x code: 0x%08x\n", 707 pri_error.addr, pri_error.data, 708 pri_error.info, pri_error.code); 709 710 pending = FLD_SET_DRF_NUM(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0, 711 _GBL_WRITE_ERROR_SYS, 0, pending); 712 } 713 714 // 715 // SYSB 716 // 717 718 if (FLD_TEST_DRF_NUM(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0, 719 _GBL_WRITE_ERROR_SYSB, 1, pending)) 720 { 721 pri_error.addr = NVSWITCH_ENG_RD32(device, SYSB_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_ADR); 722 pri_error.data = NVSWITCH_ENG_RD32(device, SYSB_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_WRDAT); 723 pri_error.info = NVSWITCH_ENG_RD32(device, SYSB_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_INFO); 724 pri_error.code = NVSWITCH_ENG_RD32(device, SYSB_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_CODE); 725 726 NVSWITCH_REPORT_PRI_ERROR_NONFATAL(_HW_HOST_PRIV_ERROR, "PRI WRITE SYSB error", NVSWITCH_PPRIV_WRITE_SYS, 1, pri_error); 727 728 NVSWITCH_PRINT(device, ERROR, 729 "SYSB PRI write error addr: 0x%08x data: 0x%08x info: 0x%08x code: 0x%08x\n", 730 pri_error.addr, pri_error.data, 731 pri_error.info, pri_error.code); 732 733 pending = FLD_SET_DRF_NUM(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0, 734 _GBL_WRITE_ERROR_SYSB, 0, pending); 735 } 736 737 // 738 // per-PRT 739 // 740 741 for (i = 0; i < NUM_PRT_PRI_HUB_ENGINE_LS10; i++) 742 { 743 if (DRF_VAL(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0, 744 _GBL_WRITE_ERROR_FBP, pending) & NVBIT(i)) 745 { 746 pri_error.addr = NVSWITCH_ENG_RD32(device, PRT_PRI_HUB, , i, _PPRIV_PRT, _PRIV_ERROR_ADR); 747 pri_error.data = NVSWITCH_ENG_RD32(device, PRT_PRI_HUB, , i, _PPRIV_PRT, _PRIV_ERROR_WRDAT); 748 pri_error.info = NVSWITCH_ENG_RD32(device, PRT_PRI_HUB, , i, _PPRIV_PRT, _PRIV_ERROR_INFO); 749 pri_error.code = NVSWITCH_ENG_RD32(device, PRT_PRI_HUB, , i, _PPRIV_PRT, _PRIV_ERROR_CODE); 750 751 NVSWITCH_REPORT_PRI_ERROR_NONFATAL(_HW_HOST_PRIV_ERROR, "PRI WRITE PRT error", NVSWITCH_PPRIV_WRITE_PRT, i, pri_error); 752 753 NVSWITCH_PRINT(device, ERROR, 754 "PRT%d PRI write error addr: 0x%08x data: 0x%08x info: 0x%08x code: 0x%08x\n", 755 i, pri_error.addr, pri_error.data, pri_error.info, pri_error.code); 756 757 pending &= ~DRF_NUM(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0, 758 _GBL_WRITE_ERROR_FBP, NVBIT(i)); 759 } 760 } 761 762 if (pending != 0) 763 { 764 NVSWITCH_PRINT_SXID(device, NVSWITCH_ERR_HW_HOST_PRIV_ERROR, 765 "Fatal, Unexpected PRI error\n"); 766 NVSWITCH_LOG_FATAL_DATA(device, _HW, _HW_HOST_PRIV_ERROR, 2, 0, NV_FALSE, &pending); 767 768 NVSWITCH_PRINT(device, ERROR, 769 "Unexpected PRI error 0x%08x\n", pending); 770 return -NVL_MORE_PROCESSING_REQUIRED; 771 } 772 773 // acknowledge the interrupt to the ringmaster 774 status = _nvswitch_ring_master_cmd_ls10(device, 775 DRF_DEF(_PPRIV_MASTER, _RING_COMMAND, _CMD, _ACK_INTERRUPT)); 776 if (status != NVL_SUCCESS) 777 { 778 NVSWITCH_PRINT(device, ERROR, "Timeout ACK'ing PRI error\n"); 779 // 780 // Don't return error code -- there is nothing kernel SW can do about it if ACK failed. 781 // Likely it is PLM protected and SOE needs to handle it. 782 // 783 } 784 785 return NVL_SUCCESS; 786 } 787 788 static NvlStatus 789 _nvswitch_collect_nport_error_info_ls10 790 ( 791 nvswitch_device *device, 792 NvU32 link, 793 NVSWITCH_RAW_ERROR_LOG_TYPE *data, 794 NvU32 *idx, 795 NvU32 register_start, 796 NvU32 register_end 797 ) 798 { 799 NvU32 register_block_size; 800 NvU32 i = *idx; 801 802 if ((register_start > register_end) || 803 (register_start % sizeof(NvU32) != 0) || 804 (register_end % sizeof(NvU32) != 0)) 805 { 806 return -NVL_BAD_ARGS; 807 } 808 809 register_block_size = (register_end - register_start)/sizeof(NvU32) + 1; 810 if ((i + register_block_size > NVSWITCH_RAW_ERROR_LOG_DATA_SIZE) || 811 (register_block_size > NVSWITCH_RAW_ERROR_LOG_DATA_SIZE)) 812 { 813 return -NVL_BAD_ARGS; 814 } 815 816 do 817 { 818 data->data[i] = NVSWITCH_ENG_OFF_RD32(device, NPORT, , link, register_start); 819 register_start += sizeof(NvU32); 820 i++; 821 822 } 823 while (register_start <= register_end); 824 825 *idx = i; 826 return NVL_SUCCESS; 827 } 828 829 static void 830 _nvswitch_collect_error_info_ls10 831 ( 832 nvswitch_device *device, 833 NvU32 link, 834 NvU32 collect_flags, // NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_* 835 NVSWITCH_RAW_ERROR_LOG_TYPE *data 836 ) 837 { 838 NvU32 val; 839 NvU32 i = 0; 840 NvlStatus status = NVL_SUCCESS; 841 842 // 843 // The requested data 'collect_flags' is captured, if valid. 844 // if the error log buffer fills, then the currently captured data block 845 // could be truncated and subsequent blocks will be skipped. 846 // The 'flags' field in the log structure describes which blocks are 847 // actually captured. 848 // Captured blocks are packed, in order. 849 // 850 851 data->flags = 0; 852 853 // ROUTE 854 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME) 855 { 856 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 857 NV_ROUTE_ERR_TIMESTAMP_LOG, 858 NV_ROUTE_ERR_TIMESTAMP_LOG); 859 if (status == NVL_SUCCESS) 860 { 861 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME; 862 NVSWITCH_PRINT(device, INFO, 863 "ROUTE: TIMESTAMP: 0x%08x\n", data->data[i-1]); 864 } 865 } 866 867 val = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_HEADER_LOG_VALID); 868 if (FLD_TEST_DRF_NUM(_ROUTE, _ERR_HEADER_LOG_VALID, _HEADERVALID0, 1, val)) 869 { 870 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC) 871 { 872 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 873 NV_ROUTE_ERR_MISC_LOG_0, 874 NV_ROUTE_ERR_MISC_LOG_0); 875 if (status == NVL_SUCCESS) 876 { 877 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC; 878 NVSWITCH_PRINT(device, INFO, 879 "ROUTE: MISC: 0x%08x\n", data->data[i-1]); 880 } 881 } 882 883 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR) 884 { 885 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 886 NV_ROUTE_ERR_HEADER_LOG_4, 887 NV_ROUTE_ERR_HEADER_LOG_10); 888 if (status == NVL_SUCCESS) 889 { 890 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR; 891 NVSWITCH_PRINT(device, INFO, 892 "ROUTE: HEADER: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x,\n", 893 data->data[i-8], data->data[i-7], data->data[i-6], data->data[i-5], 894 data->data[i-4], data->data[i-3], data->data[i-2], data->data[i-1]); 895 } 896 } 897 } 898 899 // INGRESS 900 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME) 901 { 902 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 903 NV_INGRESS_ERR_TIMESTAMP_LOG, 904 NV_INGRESS_ERR_TIMESTAMP_LOG); 905 if (status == NVL_SUCCESS) 906 { 907 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME; 908 NVSWITCH_PRINT(device, INFO, 909 "INGRESS: TIMESTAMP: 0x%08x\n", data->data[i-1]); 910 } 911 } 912 913 val = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_HEADER_LOG_VALID); 914 if (FLD_TEST_DRF_NUM(_INGRESS, _ERR_HEADER_LOG_VALID, _HEADERVALID0, 1, val)) 915 { 916 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC) 917 { 918 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 919 NV_INGRESS_ERR_MISC_LOG_0, 920 NV_INGRESS_ERR_MISC_LOG_0); 921 if (status == NVL_SUCCESS) 922 { 923 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC; 924 NVSWITCH_PRINT(device, INFO, 925 "INGRESS: MISC: 0x%08x\n", data->data[i-1]); 926 } 927 } 928 929 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR) 930 { 931 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 932 NV_INGRESS_ERR_HEADER_LOG_4, 933 NV_INGRESS_ERR_HEADER_LOG_9); 934 if (status == NVL_SUCCESS) 935 { 936 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR; 937 NVSWITCH_PRINT(device, INFO, 938 "INGRESS: HEADER: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x,\n", 939 data->data[i-7], data->data[i-6], data->data[i-5], data->data[i-4], 940 data->data[i-3], data->data[i-2], data->data[i-1]); 941 } 942 } 943 } 944 945 // EGRESS 946 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME) 947 { 948 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 949 NV_EGRESS_ERR_TIMESTAMP_LOG, 950 NV_EGRESS_ERR_TIMESTAMP_LOG); 951 if (status == NVL_SUCCESS) 952 { 953 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME; 954 NVSWITCH_PRINT(device, INFO, 955 "EGRESS: TIMESTAMP: 0x%08x\n", data->data[i-1]); 956 } 957 } 958 959 val = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_HEADER_LOG_VALID); 960 if (FLD_TEST_DRF_NUM(_EGRESS, _ERR_HEADER_LOG_VALID, _HEADERVALID0, 1, val)) 961 { 962 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC) 963 { 964 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 965 NV_EGRESS_ERR_MISC_LOG_0, 966 NV_EGRESS_ERR_MISC_LOG_0); 967 if (status == NVL_SUCCESS) 968 { 969 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC; 970 NVSWITCH_PRINT(device, INFO, 971 "EGRESS: MISC: 0x%08x\n", data->data[i-1]); 972 } 973 } 974 975 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR) 976 { 977 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 978 NV_EGRESS_ERR_HEADER_LOG_4, 979 NV_EGRESS_ERR_HEADER_LOG_10); 980 if (status == NVL_SUCCESS) 981 { 982 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR; 983 NVSWITCH_PRINT(device, INFO, 984 "EGRESS: HEADER: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x\n", 985 data->data[i-7], data->data[i-6], data->data[i-5], data->data[i-4], 986 data->data[i-3], data->data[i-2], data->data[i-1]); 987 } 988 } 989 } 990 991 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_TIME) 992 { 993 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 994 NV_EGRESS_MC_ERR_TIMESTAMP_LOG, 995 NV_EGRESS_MC_ERR_TIMESTAMP_LOG); 996 if (status == NVL_SUCCESS) 997 { 998 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_TIME; 999 NVSWITCH_PRINT(device, INFO, 1000 "EGRESS: TIME MC: 0x%08x\n", data->data[i-1]); 1001 } 1002 } 1003 1004 val = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _MC_ERR_HEADER_LOG_VALID); 1005 if (FLD_TEST_DRF_NUM(_EGRESS, _MC_ERR_HEADER_LOG_VALID, _HEADERVALID0, 1, val)) 1006 { 1007 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_MISC) 1008 { 1009 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 1010 NV_EGRESS_MC_ERR_MISC_LOG_0, 1011 NV_EGRESS_MC_ERR_MISC_LOG_0); 1012 if (status == NVL_SUCCESS) 1013 { 1014 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_MISC; 1015 NVSWITCH_PRINT(device, INFO, 1016 "EGRESS: MISC MC: 0x%08x\n", data->data[i-1]); 1017 } 1018 } 1019 1020 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_HDR) 1021 { 1022 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 1023 NV_EGRESS_MC_ERR_HEADER_LOG_4, 1024 NV_EGRESS_MC_ERR_HEADER_LOG_10); 1025 if (status == NVL_SUCCESS) 1026 { 1027 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_HDR; 1028 NVSWITCH_PRINT(device, INFO, 1029 "EGRESS MC: HEADER: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x\n", 1030 data->data[i-7], data->data[i-6], data->data[i-5], data->data[i-4], 1031 data->data[i-3], data->data[i-2], data->data[i-1]); 1032 } 1033 } 1034 } 1035 1036 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_MC_TIME) 1037 { 1038 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 1039 NV_MULTICASTTSTATE_ERR_TIMESTAMP_LOG, 1040 NV_MULTICASTTSTATE_ERR_TIMESTAMP_LOG); 1041 if (status == NVL_SUCCESS) 1042 { 1043 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_MC_TIME; 1044 NVSWITCH_PRINT(device, INFO, 1045 "MC TSTATE MC: 0x%08x\n", 1046 data->data[i-1]); 1047 } 1048 } 1049 1050 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_RED_TIME) 1051 { 1052 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 1053 NV_REDUCTIONTSTATE_ERR_TIMESTAMP_LOG, 1054 NV_REDUCTIONTSTATE_ERR_TIMESTAMP_LOG); 1055 if (status == NVL_SUCCESS) 1056 { 1057 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_RED_TIME; 1058 NVSWITCH_PRINT(device, INFO, 1059 "MC TSTATE RED: 0x%08x\n", 1060 data->data[i-1]); 1061 } 1062 } 1063 1064 while (i < NVSWITCH_RAW_ERROR_LOG_DATA_SIZE) 1065 { 1066 data->data[i++] = 0; 1067 } 1068 } 1069 1070 static NvlStatus 1071 _nvswitch_service_route_fatal_ls10 1072 ( 1073 nvswitch_device *device, 1074 NvU32 link 1075 ) 1076 { 1077 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 1078 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 1079 NvU32 pending, bit, contain, unhandled; 1080 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 1081 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 1082 1083 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_STATUS_0); 1084 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_FATAL_REPORT_EN_0); 1085 report.mask = report.raw_enable & chip_device->intr_mask.route.fatal; 1086 pending = report.raw_pending & report.mask; 1087 1088 if (pending == 0) 1089 { 1090 return -NVL_NOT_FOUND; 1091 } 1092 1093 unhandled = pending; 1094 1095 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_FIRST_0); 1096 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_CONTAIN_EN_0); 1097 1098 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _ROUTEBUFERR, 1); 1099 if (nvswitch_test_flags(pending, bit)) 1100 { 1101 _nvswitch_collect_error_info_ls10(device, link, 1102 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME, 1103 &data); 1104 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_ROUTEBUFERR, "route buffer over/underflow", NV_FALSE); 1105 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_ROUTEBUFERR, data); 1106 nvswitch_clear_flags(&unhandled, bit); 1107 } 1108 1109 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _GLT_ECC_DBE_ERR, 1); 1110 if (nvswitch_test_flags(pending, bit)) 1111 { 1112 NvBool bAddressValid = NV_FALSE; 1113 NvU32 address = 0; 1114 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, 1115 _ERR_GLT_ECC_ERROR_ADDRESS_VALID); 1116 1117 if (FLD_TEST_DRF(_ROUTE_ERR_GLT, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 1118 addressValid)) 1119 { 1120 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, 1121 _ERR_GLT_ECC_ERROR_ADDRESS); 1122 bAddressValid = NV_TRUE; 1123 } 1124 1125 _nvswitch_collect_error_info_ls10(device, link, 1126 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME | 1127 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC | 1128 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR, 1129 &data); 1130 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_GLT_ECC_DBE_ERR, "route GLT DBE", NV_FALSE); 1131 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_GLT_ECC_DBE_ERR, data); 1132 nvswitch_clear_flags(&unhandled, bit); 1133 1134 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1135 NVSWITCH_ERR_HW_NPORT_ROUTE_GLT_ECC_DBE_ERR, link, bAddressValid, 1136 address, NV_TRUE, 1); 1137 1138 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1139 } 1140 1141 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _PDCTRLPARERR, 1); 1142 if (nvswitch_test_flags(pending, bit)) 1143 { 1144 _nvswitch_collect_error_info_ls10(device, link, 1145 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME | 1146 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC | 1147 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR, 1148 &data); 1149 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_PDCTRLPARERR, "route parity", NV_FALSE); 1150 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_PDCTRLPARERR, data); 1151 nvswitch_clear_flags(&unhandled, bit); 1152 } 1153 1154 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _NVS_ECC_DBE_ERR, 1); 1155 if (nvswitch_test_flags(pending, bit)) 1156 { 1157 _nvswitch_collect_error_info_ls10(device, link, 1158 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME | 1159 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC | 1160 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR, 1161 &data); 1162 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_NVS_ECC_DBE_ERR, "route incoming DBE", NV_FALSE); 1163 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_NVS_ECC_DBE_ERR, data); 1164 nvswitch_clear_flags(&unhandled, bit); 1165 1166 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1167 NVSWITCH_ERR_HW_NPORT_ROUTE_NVS_ECC_DBE_ERR, link, NV_FALSE, 0, 1168 NV_TRUE, 1); 1169 1170 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1171 1172 // Clear associated LIMIT_ERR interrupt 1173 if (report.raw_pending & DRF_NUM(_ROUTE, _ERR_STATUS_0, _NVS_ECC_LIMIT_ERR, 1)) 1174 { 1175 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_STATUS_0, 1176 DRF_NUM(_ROUTE, _ERR_STATUS_0, _NVS_ECC_LIMIT_ERR, 1)); 1177 } 1178 } 1179 1180 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _CDTPARERR, 1); 1181 if (nvswitch_test_flags(pending, bit)) 1182 { 1183 _nvswitch_collect_error_info_ls10(device, link, 1184 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME, 1185 &data); 1186 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_CDTPARERR, "route credit parity", NV_FALSE); 1187 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_CDTPARERR, data); 1188 nvswitch_clear_flags(&unhandled, bit); 1189 1190 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1191 NVSWITCH_ERR_HW_NPORT_ROUTE_CDTPARERR, link, NV_FALSE, 0, 1192 NV_TRUE, 1); 1193 1194 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1195 } 1196 1197 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _MCRID_ECC_DBE_ERR, 1); 1198 if (nvswitch_test_flags(pending, bit)) 1199 { 1200 _nvswitch_collect_error_info_ls10(device, link, 1201 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME | 1202 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC | 1203 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR, 1204 &data); 1205 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_MCRID_ECC_DBE_ERR, "MC route ECC", NV_FALSE); 1206 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_MCRID_ECC_DBE_ERR, data); 1207 nvswitch_clear_flags(&unhandled, bit); 1208 1209 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1210 NVSWITCH_ERR_HW_NPORT_ROUTE_MCRID_ECC_DBE_ERR, link, NV_FALSE, 0, 1211 NV_TRUE, 1); 1212 1213 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1214 } 1215 1216 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _EXTMCRID_ECC_DBE_ERR, 1); 1217 if (nvswitch_test_flags(pending, bit)) 1218 { 1219 _nvswitch_collect_error_info_ls10(device, link, 1220 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME | 1221 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC | 1222 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR, 1223 &data); 1224 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_EXTMCRID_ECC_DBE_ERR, "Extd MC route ECC", NV_FALSE); 1225 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_EXTMCRID_ECC_DBE_ERR, data); 1226 nvswitch_clear_flags(&unhandled, bit); 1227 1228 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1229 NVSWITCH_ERR_HW_NPORT_ROUTE_EXTMCRID_ECC_DBE_ERR, link, NV_FALSE, 0, 1230 NV_TRUE, 1); 1231 1232 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1233 } 1234 1235 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _RAM_ECC_DBE_ERR, 1); 1236 if (nvswitch_test_flags(pending, bit)) 1237 { 1238 _nvswitch_collect_error_info_ls10(device, link, 1239 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME | 1240 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC | 1241 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR, 1242 &data); 1243 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_RAM_ECC_DBE_ERR, "route RAM ECC", NV_FALSE); 1244 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_RAM_ECC_DBE_ERR, data); 1245 nvswitch_clear_flags(&unhandled, bit); 1246 1247 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1248 NVSWITCH_ERR_HW_NPORT_ROUTE_RAM_ECC_DBE_ERR, link, NV_FALSE, 0, 1249 NV_TRUE, 1); 1250 1251 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1252 } 1253 1254 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 1255 1256 // Disable interrupts that have occurred after fatal error. 1257 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 1258 if (device->link[link].fatal_error_occurred) 1259 { 1260 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_FATAL_REPORT_EN_0, 1261 report.raw_enable ^ pending); 1262 } 1263 1264 if (report.raw_first & report.mask) 1265 { 1266 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_FIRST_0, 1267 report.raw_first & report.mask); 1268 } 1269 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_STATUS_0, pending); 1270 1271 if (unhandled != 0) 1272 { 1273 return -NVL_MORE_PROCESSING_REQUIRED; 1274 } 1275 1276 return NVL_SUCCESS; 1277 } 1278 1279 static NvlStatus 1280 _nvswitch_service_route_nonfatal_ls10 1281 ( 1282 nvswitch_device *device, 1283 NvU32 link 1284 ) 1285 { 1286 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 1287 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 1288 NvU32 pending, bit, unhandled; 1289 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 1290 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 1291 1292 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_STATUS_0); 1293 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_NON_FATAL_REPORT_EN_0); 1294 report.mask = report.raw_enable & chip_device->intr_mask.route.nonfatal; 1295 pending = report.raw_pending & report.mask; 1296 1297 if (pending == 0) 1298 { 1299 return -NVL_NOT_FOUND; 1300 } 1301 1302 unhandled = pending; 1303 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_FIRST_0); 1304 1305 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _NOPORTDEFINEDERR, 1); 1306 if (nvswitch_test_flags(pending, bit)) 1307 { 1308 _nvswitch_collect_error_info_ls10(device, link, 1309 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME | 1310 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC | 1311 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR, 1312 &data); 1313 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_NOPORTDEFINEDERR, "route undefined route"); 1314 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_NOPORTDEFINEDERR, data); 1315 nvswitch_clear_flags(&unhandled, bit); 1316 } 1317 1318 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _INVALIDROUTEPOLICYERR, 1); 1319 if (nvswitch_test_flags(pending, bit)) 1320 { 1321 _nvswitch_collect_error_info_ls10(device, link, 1322 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME | 1323 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC | 1324 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR, 1325 &data); 1326 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_INVALIDROUTEPOLICYERR, "route invalid policy"); 1327 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_INVALIDROUTEPOLICYERR, data); 1328 nvswitch_clear_flags(&unhandled, bit); 1329 } 1330 1331 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _NVS_ECC_LIMIT_ERR, 1); 1332 if (nvswitch_test_flags(pending, bit)) 1333 { 1334 // Ignore LIMIT error if DBE is pending 1335 if (!(nvswitch_test_flags(report.raw_pending, 1336 DRF_NUM(_ROUTE, _ERR_STATUS_0, _NVS_ECC_DBE_ERR, 1)))) 1337 { 1338 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_NVS_ECC_ERROR_COUNTER); 1339 _nvswitch_collect_error_info_ls10(device, link, 1340 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME, 1341 &data); 1342 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, "route incoming ECC limit"); 1343 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, data); 1344 1345 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1346 NVSWITCH_ERR_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1347 NV_FALSE, 1); 1348 1349 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1350 } 1351 1352 nvswitch_clear_flags(&unhandled, bit); 1353 } 1354 1355 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _GLT_ECC_LIMIT_ERR, 1); 1356 if (nvswitch_test_flags(pending, bit)) 1357 { 1358 // Ignore LIMIT error if DBE is pending 1359 if (!(nvswitch_test_flags(report.raw_pending, 1360 DRF_NUM(_ROUTE, _ERR_STATUS_0, _GLT_ECC_DBE_ERR, 1)))) 1361 { 1362 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_GLT_ECC_ERROR_COUNTER); 1363 _nvswitch_collect_error_info_ls10(device, link, 1364 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME, 1365 &data); 1366 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, "GLT ECC limit"); 1367 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_GLT_ECC_LIMIT_ERR, data); 1368 1369 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1370 NVSWITCH_ERR_HW_NPORT_ROUTE_GLT_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1371 NV_FALSE, 1); 1372 1373 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1374 } 1375 1376 nvswitch_clear_flags(&unhandled, bit); 1377 } 1378 1379 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _MCRID_ECC_LIMIT_ERR, 1); 1380 if (nvswitch_test_flags(pending, bit)) 1381 { 1382 // Ignore LIMIT error if DBE is pending 1383 if (!(nvswitch_test_flags(report.raw_pending, 1384 DRF_NUM(_ROUTE, _ERR_STATUS_0, _MCRID_ECC_DBE_ERR, 1)))) 1385 { 1386 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_MCRID_ECC_ERROR_COUNTER); 1387 _nvswitch_collect_error_info_ls10(device, link, 1388 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME, 1389 &data); 1390 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, "MCRID ECC limit"); 1391 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_MCRID_ECC_LIMIT_ERR, data); 1392 1393 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1394 NVSWITCH_ERR_HW_NPORT_ROUTE_MCRID_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1395 NV_FALSE, 1); 1396 1397 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1398 } 1399 1400 nvswitch_clear_flags(&unhandled, bit); 1401 } 1402 1403 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _EXTMCRID_ECC_LIMIT_ERR, 1); 1404 if (nvswitch_test_flags(pending, bit)) 1405 { 1406 // Ignore LIMIT error if DBE is pending 1407 if (!(nvswitch_test_flags(report.raw_pending, 1408 DRF_NUM(_ROUTE, _ERR_STATUS_0, _EXTMCRID_ECC_DBE_ERR, 1)))) 1409 { 1410 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_EXTMCRID_ECC_ERROR_COUNTER); 1411 _nvswitch_collect_error_info_ls10(device, link, 1412 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME, 1413 &data); 1414 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, "EXTMCRID ECC limit"); 1415 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_EXTMCRID_ECC_LIMIT_ERR, data); 1416 1417 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1418 NVSWITCH_ERR_HW_NPORT_ROUTE_EXTMCRID_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1419 NV_FALSE, 1); 1420 1421 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1422 } 1423 1424 nvswitch_clear_flags(&unhandled, bit); 1425 } 1426 1427 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _RAM_ECC_LIMIT_ERR, 1); 1428 if (nvswitch_test_flags(pending, bit)) 1429 { 1430 // Ignore LIMIT error if DBE is pending 1431 if (!(nvswitch_test_flags(report.raw_pending, 1432 DRF_NUM(_ROUTE, _ERR_STATUS_0, _RAM_ECC_DBE_ERR, 1)))) 1433 { 1434 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_RAM_ECC_ERROR_COUNTER); 1435 _nvswitch_collect_error_info_ls10(device, link, 1436 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME, 1437 &data); 1438 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_RAM_ECC_LIMIT_ERR, "RAM ECC limit"); 1439 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_RAM_ECC_LIMIT_ERR, data); 1440 1441 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1442 NVSWITCH_ERR_HW_NPORT_ROUTE_RAM_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1443 NV_FALSE, 1); 1444 1445 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1446 } 1447 1448 nvswitch_clear_flags(&unhandled, bit); 1449 } 1450 1451 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _INVALID_MCRID_ERR, 1); 1452 if (nvswitch_test_flags(pending, bit)) 1453 { 1454 _nvswitch_collect_error_info_ls10(device, link, 1455 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME, 1456 &data); 1457 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_INVALID_MCRID_ERR, "invalid MC route"); 1458 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_INVALID_MCRID_ERR, data); 1459 nvswitch_clear_flags(&unhandled, bit); 1460 } 1461 1462 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 1463 1464 // Disable interrupts that have occurred after fatal error. 1465 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 1466 if (device->link[link].fatal_error_occurred) 1467 { 1468 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_NON_FATAL_REPORT_EN_0, 1469 report.raw_enable ^ pending); 1470 } 1471 1472 if (report.raw_first & report.mask) 1473 { 1474 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_FIRST_0, 1475 report.raw_first & report.mask); 1476 } 1477 1478 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_STATUS_0, pending); 1479 1480 // 1481 // Note, when traffic is flowing, if we reset ERR_COUNT before ERR_STATUS 1482 // register, we won't see an interrupt again until counter wraps around. 1483 // In that case, we will miss writing back many ECC victim entries. Hence, 1484 // always clear _ERR_COUNT only after _ERR_STATUS register is cleared! 1485 // 1486 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_NVS_ECC_ERROR_COUNTER, 0x0); 1487 1488 if (unhandled != 0) 1489 { 1490 return -NVL_MORE_PROCESSING_REQUIRED; 1491 } 1492 1493 return NVL_SUCCESS; 1494 } 1495 1496 // 1497 // Ingress 1498 // 1499 1500 static NvlStatus 1501 _nvswitch_service_ingress_fatal_ls10 1502 ( 1503 nvswitch_device *device, 1504 NvU32 link 1505 ) 1506 { 1507 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 1508 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 1509 NvU32 pending, bit, contain, unhandled; 1510 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 1511 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 1512 1513 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_STATUS_0); 1514 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_FATAL_REPORT_EN_0); 1515 report.mask = report.raw_enable & chip_device->intr_mask.ingress[0].fatal; 1516 pending = report.raw_pending & report.mask; 1517 1518 if (pending == 0) 1519 { 1520 return -NVL_NOT_FOUND; 1521 } 1522 1523 unhandled = pending; 1524 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_FIRST_0); 1525 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_CONTAIN_EN_0); 1526 _nvswitch_collect_error_info_ls10(device, link, 1527 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME | 1528 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC | 1529 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR, 1530 &data); 1531 1532 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _CMDDECODEERR, 1); 1533 if (nvswitch_test_flags(pending, bit)) 1534 { 1535 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_CMDDECODEERR, "ingress invalid command", NV_FALSE); 1536 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_CMDDECODEERR, data); 1537 nvswitch_clear_flags(&unhandled, bit); 1538 } 1539 1540 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_ECC_DBE_ERR, 1); 1541 if (nvswitch_test_flags(pending, bit)) 1542 { 1543 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTAREMAPTAB_ECC_ERROR_COUNTER); 1544 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTAREMAPTAB_ECC_ERROR_ADDRESS); 1545 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTAREMAPTAB_ECC_ERROR_ADDRESS_VALID); 1546 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_DBE_ERR, "ingress ExtA remap DBE", NV_FALSE); 1547 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_DBE_ERR, data); 1548 nvswitch_clear_flags(&unhandled, bit); 1549 1550 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1551 NVSWITCH_ERR_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_DBE_ERR, link, NV_FALSE, 0, 1552 NV_TRUE, 1); 1553 1554 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1555 } 1556 1557 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_HDR_ECC_DBE_ERR, 1); 1558 if (nvswitch_test_flags(pending, bit)) 1559 { 1560 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_NCISOC_HDR_ECC_ERROR_COUNTER); 1561 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_NCISOC_HDR_ECC_DBE_ERR, "ingress header DBE", NV_FALSE); 1562 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_NCISOC_HDR_ECC_DBE_ERR, data); 1563 nvswitch_clear_flags(&unhandled, bit); 1564 1565 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1566 NVSWITCH_ERR_HW_NPORT_INGRESS_NCISOC_HDR_ECC_DBE_ERR, link, NV_FALSE, 0, 1567 NV_TRUE, 1); 1568 1569 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1570 1571 // Clear associated LIMIT_ERR interrupt 1572 if (report.raw_pending & DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_HDR_ECC_LIMIT_ERR, 1)) 1573 { 1574 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_STATUS_0, 1575 DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_HDR_ECC_LIMIT_ERR, 1)); 1576 } 1577 } 1578 1579 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _INVALIDVCSET, 1); 1580 if (nvswitch_test_flags(pending, bit)) 1581 { 1582 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_INVALIDVCSET, "ingress invalid VCSet", NV_FALSE); 1583 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_INVALIDVCSET, data); 1584 nvswitch_clear_flags(&unhandled, bit); 1585 } 1586 1587 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _REMAPTAB_ECC_DBE_ERR, 1); 1588 if (nvswitch_test_flags(pending, bit)) 1589 { 1590 NvBool bAddressValid = NV_FALSE; 1591 NvU32 address = 0; 1592 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, 1593 _ERR_REMAPTAB_ECC_ERROR_ADDRESS); 1594 1595 if (FLD_TEST_DRF(_INGRESS_ERR_REMAPTAB, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 1596 addressValid)) 1597 { 1598 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, 1599 _ERR_REMAPTAB_ECC_ERROR_ADDRESS); 1600 bAddressValid = NV_TRUE; 1601 } 1602 1603 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_REMAPTAB_ECC_ERROR_COUNTER); 1604 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_REMAPTAB_ECC_ERROR_ADDRESS); 1605 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_REMAPTAB_ECC_ERROR_ADDRESS_VALID); 1606 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_REMAPTAB_ECC_DBE_ERR, "ingress Remap DBE", NV_FALSE); 1607 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_REMAPTAB_ECC_DBE_ERR, data); 1608 nvswitch_clear_flags(&unhandled, bit); 1609 1610 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1611 NVSWITCH_ERR_HW_NPORT_INGRESS_REMAPTAB_ECC_DBE_ERR, link, bAddressValid, 1612 address, NV_TRUE, 1); 1613 1614 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1615 } 1616 1617 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RIDTAB_ECC_DBE_ERR, 1); 1618 if (nvswitch_test_flags(pending, bit)) 1619 { 1620 NvBool bAddressValid = NV_FALSE; 1621 NvU32 address = 0; 1622 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, 1623 _ERR_RIDTAB_ECC_ERROR_ADDRESS_VALID); 1624 1625 if (FLD_TEST_DRF(_INGRESS_ERR_RIDTAB, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 1626 addressValid)) 1627 { 1628 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, 1629 _ERR_RIDTAB_ECC_ERROR_ADDRESS); 1630 bAddressValid = NV_TRUE; 1631 } 1632 1633 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RIDTAB_ECC_ERROR_COUNTER); 1634 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RIDTAB_ECC_ERROR_ADDRESS); 1635 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RIDTAB_ECC_ERROR_ADDRESS_VALID); 1636 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_RIDTAB_ECC_DBE_ERR, "ingress RID DBE", NV_FALSE); 1637 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_RIDTAB_ECC_DBE_ERR, data); 1638 nvswitch_clear_flags(&unhandled, bit); 1639 1640 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1641 NVSWITCH_ERR_HW_NPORT_INGRESS_RIDTAB_ECC_DBE_ERR, link, bAddressValid, 1642 address, NV_TRUE, 1); 1643 1644 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1645 } 1646 1647 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RLANTAB_ECC_DBE_ERR, 1); 1648 if (nvswitch_test_flags(pending, bit)) 1649 { 1650 NvBool bAddressValid = NV_FALSE; 1651 NvU32 address = 0; 1652 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, 1653 _ERR_RLANTAB_ECC_ERROR_ADDRESS_VALID); 1654 1655 if (FLD_TEST_DRF(_INGRESS_ERR_RLANTAB, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 1656 addressValid)) 1657 { 1658 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, 1659 _ERR_RLANTAB_ECC_ERROR_ADDRESS); 1660 bAddressValid = NV_TRUE; 1661 } 1662 1663 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RLANTAB_ECC_ERROR_COUNTER); 1664 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RLANTAB_ECC_ERROR_ADDRESS); 1665 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RLANTAB_ECC_ERROR_ADDRESS_VALID); 1666 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_RLANTAB_ECC_DBE_ERR, "ingress RLAN DBE", NV_FALSE); 1667 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_RLANTAB_ECC_DBE_ERR, data); 1668 nvswitch_clear_flags(&unhandled, bit); 1669 1670 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1671 NVSWITCH_ERR_HW_NPORT_INGRESS_RLANTAB_ECC_DBE_ERR, link, bAddressValid, 1672 address, NV_TRUE, 1); 1673 1674 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1675 } 1676 1677 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_PARITY_ERR, 1); 1678 if (nvswitch_test_flags(pending, bit)) 1679 { 1680 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_NCISOC_PARITY_ERR, "ingress control parity", NV_FALSE); 1681 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_NCISOC_PARITY_ERR, data); 1682 nvswitch_clear_flags(&unhandled, bit); 1683 1684 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1685 NVSWITCH_ERR_HW_NPORT_INGRESS_NCISOC_PARITY_ERR, link, NV_FALSE, 0, 1686 NV_TRUE, 1); 1687 1688 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1689 } 1690 1691 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_ECC_DBE_ERR, 1); 1692 if (nvswitch_test_flags(pending, bit)) 1693 { 1694 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTBREMAPTAB_ECC_ERROR_COUNTER); 1695 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTBREMAPTAB_ECC_ERROR_ADDRESS); 1696 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTBREMAPTAB_ECC_ERROR_ADDRESS_VALID); 1697 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_DBE_ERR, "ingress ExtB remap DBE", NV_FALSE); 1698 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_DBE_ERR, data); 1699 nvswitch_clear_flags(&unhandled, bit); 1700 1701 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1702 NVSWITCH_ERR_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_DBE_ERR, link, NV_FALSE, 0, 1703 NV_TRUE, 1); 1704 1705 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1706 } 1707 1708 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_ECC_DBE_ERR, 1); 1709 if (nvswitch_test_flags(pending, bit)) 1710 { 1711 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_MCREMAPTAB_ECC_ERROR_COUNTER); 1712 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_MCREMAPTAB_ECC_ERROR_ADDRESS); 1713 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_MCREMAPTAB_ECC_ERROR_ADDRESS_VALID); 1714 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_MCREMAPTAB_ECC_DBE_ERR, "ingress MC remap DBE", NV_FALSE); 1715 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_ECC_DBE_ERR, data); 1716 nvswitch_clear_flags(&unhandled, bit); 1717 1718 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1719 NVSWITCH_ERR_HW_NPORT_INGRESS_MCREMAPTAB_ECC_DBE_ERR, link, NV_FALSE, 0, 1720 NV_TRUE, 1); 1721 1722 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1723 } 1724 1725 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 1726 1727 // Disable interrupts that have occurred after fatal error. 1728 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 1729 if (device->link[link].fatal_error_occurred) 1730 { 1731 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_FATAL_REPORT_EN_0, 1732 report.raw_enable ^ pending); 1733 } 1734 1735 if (report.raw_first & report.mask) 1736 { 1737 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_FIRST_0, 1738 report.raw_first & report.mask); 1739 } 1740 1741 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_STATUS_0, pending); 1742 1743 if (unhandled != 0) 1744 { 1745 return -NVL_MORE_PROCESSING_REQUIRED; 1746 } 1747 1748 return NVL_SUCCESS; 1749 } 1750 1751 static NvlStatus 1752 _nvswitch_service_ingress_nonfatal_ls10 1753 ( 1754 nvswitch_device *device, 1755 NvU32 link 1756 ) 1757 { 1758 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 1759 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 1760 NvU32 pending, bit, unhandled; 1761 NvU32 pending_0, pending_1; 1762 NvU32 raw_pending_0; 1763 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 1764 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 1765 NvlStatus status = NVL_SUCCESS; 1766 1767 // 1768 // _ERR_STATUS_0 1769 // 1770 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_STATUS_0); 1771 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_NON_FATAL_REPORT_EN_0); 1772 report.mask = report.raw_enable & chip_device->intr_mask.ingress[0].nonfatal; 1773 1774 raw_pending_0 = report.raw_pending; 1775 pending = (report.raw_pending & report.mask); 1776 pending_0 = pending; 1777 1778 if (pending == 0) 1779 { 1780 goto _nvswitch_service_ingress_nonfatal_ls10_err_status_1; 1781 } 1782 1783 unhandled = pending; 1784 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_FIRST_0); 1785 _nvswitch_collect_error_info_ls10(device, link, 1786 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME | 1787 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC | 1788 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR, 1789 &data); 1790 1791 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _REQCONTEXTMISMATCHERR, 1); 1792 if (nvswitch_test_flags(pending, bit)) 1793 { 1794 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_REQCONTEXTMISMATCHERR, "ingress request context mismatch"); 1795 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_REQCONTEXTMISMATCHERR, data); 1796 nvswitch_clear_flags(&unhandled, bit); 1797 } 1798 1799 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _ACLFAIL, 1); 1800 if (nvswitch_test_flags(pending, bit)) 1801 { 1802 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_ACLFAIL, "ingress invalid ACL"); 1803 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_ACLFAIL, data); 1804 nvswitch_clear_flags(&unhandled, bit); 1805 } 1806 1807 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_HDR_ECC_LIMIT_ERR, 1); 1808 if (nvswitch_test_flags(pending, bit)) 1809 { 1810 // Ignore LIMIT error if DBE is pending 1811 if (!(nvswitch_test_flags(report.raw_pending, 1812 DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_HDR_ECC_DBE_ERR, 1)))) 1813 { 1814 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_NCISOC_HDR_ECC_ERROR_COUNTER); 1815 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_NCISOC_HDR_ECC_LIMIT_ERR, "ingress header ECC"); 1816 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_NCISOC_HDR_ECC_LIMIT_ERR, data); 1817 1818 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1819 NVSWITCH_ERR_HW_NPORT_INGRESS_NCISOC_HDR_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1820 NV_FALSE, 1); 1821 1822 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1823 } 1824 1825 nvswitch_clear_flags(&unhandled, bit); 1826 } 1827 1828 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _ADDRBOUNDSERR, 1); 1829 if (nvswitch_test_flags(pending, bit)) 1830 { 1831 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_ADDRBOUNDSERR, "ingress address bounds"); 1832 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_ADDRBOUNDSERR, data); 1833 nvswitch_clear_flags(&unhandled, bit); 1834 } 1835 1836 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RIDTABCFGERR, 1); 1837 if (nvswitch_test_flags(pending, bit)) 1838 { 1839 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_RIDTABCFGERR, "ingress RID packet"); 1840 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_RIDTABCFGERR, data); 1841 nvswitch_clear_flags(&unhandled, bit); 1842 } 1843 1844 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RLANTABCFGERR, 1); 1845 if (nvswitch_test_flags(pending, bit)) 1846 { 1847 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_RLANTABCFGERR, "ingress RLAN packet"); 1848 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_RLANTABCFGERR, data); 1849 nvswitch_clear_flags(&unhandled, bit); 1850 } 1851 1852 1853 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _REMAPTAB_ECC_LIMIT_ERR, 1); 1854 if (nvswitch_test_flags(pending, bit)) 1855 { 1856 // Ignore LIMIT error if DBE is pending 1857 if (!(nvswitch_test_flags(report.raw_pending, 1858 DRF_NUM(_INGRESS, _ERR_STATUS_0, _REMAPTAB_ECC_DBE_ERR, 1)))) 1859 { 1860 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_REMAPTAB_ECC_ERROR_COUNTER); 1861 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_REMAPTAB_ECC_LIMIT_ERR, "ingress remap ECC"); 1862 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_REMAPTAB_ECC_LIMIT_ERR, data); 1863 1864 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1865 NVSWITCH_ERR_HW_NPORT_INGRESS_REMAPTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1866 NV_FALSE, 1); 1867 1868 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1869 } 1870 1871 nvswitch_clear_flags(&unhandled, bit); 1872 } 1873 1874 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RIDTAB_ECC_LIMIT_ERR, 1); 1875 if (nvswitch_test_flags(pending, bit)) 1876 { 1877 // Ignore LIMIT error if DBE is pending 1878 if (!(nvswitch_test_flags(report.raw_pending, 1879 DRF_NUM(_INGRESS, _ERR_STATUS_0, _RIDTAB_ECC_DBE_ERR, 1)))) 1880 { 1881 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RIDTAB_ECC_ERROR_COUNTER); 1882 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_RIDTAB_ECC_LIMIT_ERR, "ingress RID ECC"); 1883 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_RIDTAB_ECC_LIMIT_ERR, data); 1884 1885 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1886 NVSWITCH_ERR_HW_NPORT_INGRESS_RIDTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1887 NV_FALSE, 1); 1888 1889 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1890 } 1891 1892 nvswitch_clear_flags(&unhandled, bit); 1893 } 1894 1895 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RLANTAB_ECC_LIMIT_ERR, 1); 1896 if (nvswitch_test_flags(pending, bit)) 1897 { 1898 // Ignore LIMIT error if DBE is pending 1899 if (!(nvswitch_test_flags(report.raw_pending, 1900 DRF_NUM(_INGRESS, _ERR_STATUS_0, _RLANTAB_ECC_DBE_ERR, 1)))) 1901 { 1902 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RLANTAB_ECC_ERROR_COUNTER); 1903 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_RLANTAB_ECC_LIMIT_ERR, "ingress RLAN ECC"); 1904 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_RLANTAB_ECC_LIMIT_ERR, data); 1905 1906 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1907 NVSWITCH_ERR_HW_NPORT_INGRESS_RLANTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1908 NV_FALSE, 1); 1909 1910 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1911 } 1912 1913 nvswitch_clear_flags(&unhandled, bit); 1914 } 1915 1916 1917 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _ADDRTYPEERR, 1); 1918 if (nvswitch_test_flags(pending, bit)) 1919 { 1920 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_ADDRTYPEERR, "ingress illegal address"); 1921 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_ADDRTYPEERR, data); 1922 nvswitch_clear_flags(&unhandled, bit); 1923 } 1924 1925 1926 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_INDEX_ERR, 1); 1927 if (nvswitch_test_flags(pending, bit)) 1928 { 1929 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_INDEX_ERR, "ingress ExtA remap index"); 1930 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_INDEX_ERR, data); 1931 nvswitch_clear_flags(&unhandled, bit); 1932 } 1933 1934 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_INDEX_ERR, 1); 1935 if (nvswitch_test_flags(pending, bit)) 1936 { 1937 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_INDEX_ERR, "ingress ExtB remap index"); 1938 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_INDEX_ERR, data); 1939 nvswitch_clear_flags(&unhandled, bit); 1940 } 1941 1942 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_INDEX_ERR, 1); 1943 if (nvswitch_test_flags(pending, bit)) 1944 { 1945 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_INDEX_ERR, "ingress MC remap index"); 1946 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_INDEX_ERR, data); 1947 nvswitch_clear_flags(&unhandled, bit); 1948 } 1949 1950 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_REQCONTEXTMISMATCHERR, 1); 1951 if (nvswitch_test_flags(pending, bit)) 1952 { 1953 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_REQCONTEXTMISMATCHERR, "ingress ExtA request context mismatch"); 1954 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_REQCONTEXTMISMATCHERR, data); 1955 nvswitch_clear_flags(&unhandled, bit); 1956 } 1957 1958 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_REQCONTEXTMISMATCHERR, 1); 1959 if (nvswitch_test_flags(pending, bit)) 1960 { 1961 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_REQCONTEXTMISMATCHERR, "ingress ExtB request context mismatch"); 1962 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_REQCONTEXTMISMATCHERR, data); 1963 nvswitch_clear_flags(&unhandled, bit); 1964 } 1965 1966 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_REQCONTEXTMISMATCHERR, 1); 1967 if (nvswitch_test_flags(pending, bit)) 1968 { 1969 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_REQCONTEXTMISMATCHERR, "ingress MC request context mismatch"); 1970 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_REQCONTEXTMISMATCHERR, data); 1971 nvswitch_clear_flags(&unhandled, bit); 1972 } 1973 1974 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_ACLFAIL, 1); 1975 if (nvswitch_test_flags(pending, bit)) 1976 { 1977 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_ACLFAIL, "ingress invalid ExtA ACL"); 1978 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_ACLFAIL, data); 1979 nvswitch_clear_flags(&unhandled, bit); 1980 } 1981 1982 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_ACLFAIL, 1); 1983 if (nvswitch_test_flags(pending, bit)) 1984 { 1985 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_ACLFAIL, "ingress invalid ExtB ACL"); 1986 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_ACLFAIL, data); 1987 nvswitch_clear_flags(&unhandled, bit); 1988 } 1989 1990 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_ACLFAIL, 1); 1991 if (nvswitch_test_flags(pending, bit)) 1992 { 1993 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_ACLFAIL, "ingress invalid MC ACL"); 1994 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_ACLFAIL, data); 1995 nvswitch_clear_flags(&unhandled, bit); 1996 } 1997 1998 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_ADDRBOUNDSERR, 1); 1999 if (nvswitch_test_flags(pending, bit)) 2000 { 2001 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_ADDRBOUNDSERR, "ingress ExtA address bounds"); 2002 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_ADDRBOUNDSERR, data); 2003 nvswitch_clear_flags(&unhandled, bit); 2004 } 2005 2006 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_ADDRBOUNDSERR, 1); 2007 if (nvswitch_test_flags(pending, bit)) 2008 { 2009 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_ADDRBOUNDSERR, "ingress ExtB address bounds"); 2010 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_ADDRBOUNDSERR, data); 2011 nvswitch_clear_flags(&unhandled, bit); 2012 } 2013 2014 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_ADDRBOUNDSERR, 1); 2015 if (nvswitch_test_flags(pending, bit)) 2016 { 2017 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_ADDRBOUNDSERR, "ingress MC address bounds"); 2018 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_ADDRBOUNDSERR, data); 2019 nvswitch_clear_flags(&unhandled, bit); 2020 } 2021 2022 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 2023 2024 // Disable interrupts that have occurred after fatal error. 2025 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 2026 if (device->link[link].fatal_error_occurred) 2027 { 2028 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_NON_FATAL_REPORT_EN_0, 2029 report.raw_enable ^ pending); 2030 } 2031 2032 if (report.raw_first & report.mask) 2033 { 2034 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_FIRST_0, 2035 report.raw_first & report.mask); 2036 } 2037 2038 if (unhandled != 0) 2039 { 2040 status = -NVL_MORE_PROCESSING_REQUIRED; 2041 } 2042 2043 _nvswitch_service_ingress_nonfatal_ls10_err_status_1: 2044 // 2045 // _ERR_STATUS_1 2046 // 2047 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_STATUS_1); 2048 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_NON_FATAL_REPORT_EN_1); 2049 report.mask = report.raw_enable & chip_device->intr_mask.ingress[1].nonfatal; 2050 2051 pending = (report.raw_pending & report.mask); 2052 pending_1 = pending; 2053 2054 if ((pending_0 == 0) && (pending_1 == 0)) 2055 { 2056 return -NVL_NOT_FOUND; 2057 } 2058 2059 unhandled = pending; 2060 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_FIRST_1); 2061 2062 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _EXTAREMAPTAB_ECC_LIMIT_ERR, 1); 2063 if (nvswitch_test_flags(pending, bit)) 2064 { 2065 // Ignore LIMIT error if DBE is pending 2066 if (!(nvswitch_test_flags(raw_pending_0, 2067 DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_ECC_DBE_ERR, 1)))) 2068 { 2069 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTAREMAPTAB_ECC_ERROR_COUNTER); 2070 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_LIMIT_ERR, "ingress ExtA remap ECC"); 2071 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_LIMIT_ERR, data); 2072 2073 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2074 NVSWITCH_ERR_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2075 NV_FALSE, 1); 2076 2077 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2078 } 2079 2080 nvswitch_clear_flags(&unhandled, bit); 2081 } 2082 2083 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _EXTBREMAPTAB_ECC_LIMIT_ERR, 1); 2084 if (nvswitch_test_flags(pending, bit)) 2085 { 2086 // Ignore LIMIT error if DBE is pending 2087 if (!(nvswitch_test_flags(raw_pending_0, 2088 DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_ECC_DBE_ERR, 1)))) 2089 { 2090 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTBREMAPTAB_ECC_ERROR_COUNTER); 2091 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_LIMIT_ERR, "ingress ExtB remap ECC"); 2092 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_LIMIT_ERR, data); 2093 2094 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2095 NVSWITCH_ERR_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2096 NV_FALSE, 1); 2097 2098 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2099 } 2100 2101 nvswitch_clear_flags(&unhandled, bit); 2102 } 2103 2104 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _MCREMAPTAB_ECC_LIMIT_ERR, 1); 2105 if (nvswitch_test_flags(pending, bit)) 2106 { 2107 // Ignore LIMIT error if DBE is pending 2108 if (!(nvswitch_test_flags(raw_pending_0, 2109 DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_ECC_DBE_ERR, 1)))) 2110 { 2111 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_MCREMAPTAB_ECC_ERROR_COUNTER); 2112 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_ECC_LIMIT_ERR, "ingress MC remap ECC"); 2113 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_ECC_LIMIT_ERR, data); 2114 2115 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2116 NVSWITCH_ERR_HW_NPORT_INGRESS_MCREMAPTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2117 NV_FALSE, 1); 2118 2119 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2120 } 2121 2122 nvswitch_clear_flags(&unhandled, bit); 2123 } 2124 2125 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _MCCMDTOUCADDRERR, 1); 2126 if (nvswitch_test_flags(pending, bit)) 2127 { 2128 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCCMDTOUCADDRERR, "ingress MC command to uc"); 2129 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCCMDTOUCADDRERR, data); 2130 nvswitch_clear_flags(&unhandled, bit); 2131 } 2132 2133 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _READMCREFLECTMEMERR, 1); 2134 if (nvswitch_test_flags(pending, bit)) 2135 { 2136 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_READMCREFLECTMEMERR, "ingress read reflective"); 2137 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_READMCREFLECTMEMERR, data); 2138 nvswitch_clear_flags(&unhandled, bit); 2139 } 2140 2141 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _EXTAREMAPTAB_ADDRTYPEERR, 1); 2142 if (nvswitch_test_flags(pending, bit)) 2143 { 2144 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_ADDRTYPEERR, "ingress ExtA address type"); 2145 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_ADDRTYPEERR, data); 2146 nvswitch_clear_flags(&unhandled, bit); 2147 } 2148 2149 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _EXTBREMAPTAB_ADDRTYPEERR, 1); 2150 if (nvswitch_test_flags(pending, bit)) 2151 { 2152 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_ADDRTYPEERR, "ingress ExtB address type"); 2153 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_ADDRTYPEERR, data); 2154 nvswitch_clear_flags(&unhandled, bit); 2155 } 2156 2157 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _MCREMAPTAB_ADDRTYPEERR, 1); 2158 if (nvswitch_test_flags(pending, bit)) 2159 { 2160 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_ADDRTYPEERR, "ingress MC address type"); 2161 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_ADDRTYPEERR, data); 2162 nvswitch_clear_flags(&unhandled, bit); 2163 } 2164 2165 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 2166 2167 // Disable interrupts that have occurred after fatal error. 2168 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 2169 if (device->link[link].fatal_error_occurred) 2170 { 2171 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_NON_FATAL_REPORT_EN_1, 2172 report.raw_enable ^ pending); 2173 } 2174 2175 if (report.raw_first & report.mask) 2176 { 2177 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_FIRST_1, 2178 report.raw_first & report.mask); 2179 } 2180 2181 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_STATUS_0, pending_0); 2182 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_STATUS_1, pending_1); 2183 2184 if (unhandled != 0) 2185 { 2186 status = -NVL_MORE_PROCESSING_REQUIRED; 2187 } 2188 2189 return status; 2190 } 2191 2192 // 2193 // Tstate 2194 // 2195 2196 static NvlStatus 2197 _nvswitch_service_tstate_nonfatal_ls10 2198 ( 2199 nvswitch_device *device, 2200 NvU32 link 2201 ) 2202 { 2203 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 2204 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 2205 NvU32 pending, bit, unhandled; 2206 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 2207 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 2208 2209 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0); 2210 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_NON_FATAL_REPORT_EN_0); 2211 report.mask = report.raw_enable & chip_device->intr_mask.tstate.nonfatal; 2212 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_MISC_LOG_0); 2213 pending = report.raw_pending & report.mask; 2214 2215 if (pending == 0) 2216 { 2217 return -NVL_NOT_FOUND; 2218 } 2219 2220 unhandled = pending; 2221 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_FIRST_0); 2222 2223 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1); 2224 if (nvswitch_test_flags(pending, bit)) 2225 { 2226 // Ignore LIMIT error if DBE is pending 2227 if(!(nvswitch_test_flags(report.raw_pending, 2228 DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1)))) 2229 { 2230 NvBool bAddressValid = NV_FALSE; 2231 NvU32 address = 0; 2232 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, 2233 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID); 2234 2235 if (FLD_TEST_DRF(_TSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 2236 addressValid)) 2237 { 2238 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, 2239 _ERR_TAGPOOL_ECC_ERROR_ADDRESS); 2240 bAddressValid = NV_TRUE; 2241 } 2242 2243 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER); 2244 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, 2245 DRF_DEF(_TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 2246 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_TSTATE_TAGPOOL_ECC_LIMIT_ERR, "TS tag store single-bit threshold"); 2247 _nvswitch_collect_error_info_ls10(device, link, 2248 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME | 2249 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC | 2250 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR, 2251 &data); 2252 NVSWITCH_REPORT_DATA(_HW_NPORT_TSTATE_TAGPOOL_ECC_LIMIT_ERR, data); 2253 2254 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2255 NVSWITCH_ERR_HW_NPORT_TSTATE_TAGPOOL_ECC_LIMIT_ERR, link, 2256 bAddressValid, address, NV_FALSE, 1); 2257 2258 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2259 } 2260 2261 nvswitch_clear_flags(&unhandled, bit); 2262 } 2263 2264 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1); 2265 if (nvswitch_test_flags(pending, bit)) 2266 { 2267 // Ignore LIMIT error if DBE is pending 2268 if(!(nvswitch_test_flags(report.raw_pending, 2269 DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1)))) 2270 { 2271 NvBool bAddressValid = NV_FALSE; 2272 NvU32 address = 0; 2273 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, 2274 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 2275 2276 if (FLD_TEST_DRF(_TSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 2277 addressValid)) 2278 { 2279 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, 2280 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS); 2281 bAddressValid = NV_TRUE; 2282 } 2283 2284 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER); 2285 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, 2286 DRF_DEF(_TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 2287 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_TSTATE_CRUMBSTORE_ECC_LIMIT_ERR, "TS crumbstore single-bit threshold"); 2288 _nvswitch_collect_error_info_ls10(device, link, 2289 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME | 2290 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC | 2291 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR, 2292 &data); 2293 NVSWITCH_REPORT_DATA(_HW_NPORT_TSTATE_CRUMBSTORE_ECC_LIMIT_ERR, data); 2294 2295 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2296 NVSWITCH_ERR_HW_NPORT_TSTATE_CRUMBSTORE_ECC_LIMIT_ERR, link, 2297 bAddressValid, address, NV_FALSE, 1); 2298 2299 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2300 } 2301 2302 nvswitch_clear_flags(&unhandled, bit); 2303 } 2304 2305 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 2306 2307 // Disable interrupts that have occurred after fatal error. 2308 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 2309 if (device->link[link].fatal_error_occurred) 2310 { 2311 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_NON_FATAL_REPORT_EN_0, 2312 report.raw_enable ^ pending); 2313 } 2314 2315 if (report.raw_first & report.mask) 2316 { 2317 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_FIRST_0, 2318 report.raw_first & report.mask); 2319 } 2320 2321 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0, pending); 2322 2323 if (unhandled != 0) 2324 { 2325 return -NVL_MORE_PROCESSING_REQUIRED; 2326 } 2327 2328 return NVL_SUCCESS; 2329 } 2330 2331 static NvlStatus 2332 _nvswitch_service_tstate_fatal_ls10 2333 ( 2334 nvswitch_device *device, 2335 NvU32 link 2336 ) 2337 { 2338 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 2339 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 2340 NvU32 pending, bit, contain, unhandled; 2341 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 2342 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 2343 2344 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0); 2345 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_FATAL_REPORT_EN_0); 2346 report.mask = report.raw_enable & chip_device->intr_mask.tstate.fatal; 2347 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_MISC_LOG_0); 2348 pending = report.raw_pending & report.mask; 2349 2350 if (pending == 0) 2351 { 2352 return -NVL_NOT_FOUND; 2353 } 2354 2355 unhandled = pending; 2356 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_FIRST_0); 2357 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_CONTAIN_EN_0); 2358 2359 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOLBUFERR, 1); 2360 if (nvswitch_test_flags(pending, bit)) 2361 { 2362 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_TAGPOOLBUFERR, "TS pointer crossover", NV_FALSE); 2363 _nvswitch_collect_error_info_ls10(device, link, 2364 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME | 2365 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC | 2366 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR, 2367 &data); 2368 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_TSTATE_TAGPOOLBUFERR, data); 2369 nvswitch_clear_flags(&unhandled, bit); 2370 } 2371 2372 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1); 2373 if (nvswitch_test_flags(pending, bit)) 2374 { 2375 NvBool bAddressValid = NV_FALSE; 2376 NvU32 address = 0; 2377 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, 2378 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID); 2379 2380 if (FLD_TEST_DRF(_TSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 2381 addressValid)) 2382 { 2383 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, 2384 _ERR_TAGPOOL_ECC_ERROR_ADDRESS); 2385 bAddressValid = NV_TRUE; 2386 } 2387 2388 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER); 2389 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, 2390 DRF_DEF(_TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 2391 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_TAGPOOL_ECC_DBE_ERR, "TS tag store fatal ECC", NV_FALSE); 2392 _nvswitch_collect_error_info_ls10(device, link, 2393 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME | 2394 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC | 2395 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR, 2396 &data); 2397 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_TSTATE_TAGPOOL_ECC_DBE_ERR, data); 2398 nvswitch_clear_flags(&unhandled, bit); 2399 2400 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2401 NVSWITCH_ERR_HW_NPORT_TSTATE_TAGPOOL_ECC_DBE_ERR, link, bAddressValid, 2402 address, NV_TRUE, 1); 2403 2404 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2405 2406 // Clear associated LIMIT_ERR interrupt 2407 if (report.raw_pending & DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1)) 2408 { 2409 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0, 2410 DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1)); 2411 } 2412 } 2413 2414 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTOREBUFERR, 1); 2415 if (nvswitch_test_flags(pending, bit)) 2416 { 2417 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_CRUMBSTOREBUFERR, "TS crumbstore", NV_FALSE); 2418 _nvswitch_collect_error_info_ls10(device, link, 2419 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME | 2420 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC | 2421 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR, 2422 &data); 2423 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_TSTATE_CRUMBSTOREBUFERR, data); 2424 nvswitch_clear_flags(&unhandled, bit); 2425 } 2426 2427 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1); 2428 if (nvswitch_test_flags(pending, bit)) 2429 { 2430 NvBool bAddressValid = NV_FALSE; 2431 NvU32 address = 0; 2432 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, 2433 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 2434 2435 if (FLD_TEST_DRF(_TSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 2436 addressValid)) 2437 { 2438 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, 2439 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS); 2440 bAddressValid = NV_TRUE; 2441 } 2442 2443 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER); 2444 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, 2445 DRF_DEF(_TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 2446 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_CRUMBSTORE_ECC_DBE_ERR, "TS crumbstore fatal ECC", NV_FALSE); 2447 _nvswitch_collect_error_info_ls10(device, link, 2448 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME | 2449 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC | 2450 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR, 2451 &data); 2452 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_TSTATE_CRUMBSTORE_ECC_DBE_ERR, data); 2453 nvswitch_clear_flags(&unhandled, bit); 2454 2455 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2456 NVSWITCH_ERR_HW_NPORT_TSTATE_CRUMBSTORE_ECC_DBE_ERR, link, bAddressValid, 2457 address, NV_TRUE, 1); 2458 2459 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2460 2461 // Clear associated LIMIT_ERR interrupt 2462 if (report.raw_pending & DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1)) 2463 { 2464 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0, 2465 DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1)); 2466 } 2467 } 2468 2469 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _ATO_ERR, 1); 2470 if (nvswitch_test_flags(pending, bit)) 2471 { 2472 if (FLD_TEST_DRF_NUM(_TSTATE, _ERR_FIRST_0, _ATO_ERR, 1, report.raw_first)) 2473 { 2474 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_DEBUG); 2475 } 2476 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_ATO_ERR, "TS ATO timeout", NV_FALSE); 2477 nvswitch_clear_flags(&unhandled, bit); 2478 } 2479 2480 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _CAMRSP_ERR, 1); 2481 if (nvswitch_test_flags(pending, bit)) 2482 { 2483 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_CAMRSP_ERR, "Rsp Tag value out of range", NV_FALSE); 2484 _nvswitch_collect_error_info_ls10(device, link, 2485 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME | 2486 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC | 2487 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR, 2488 &data); 2489 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_TSTATE_CAMRSP_ERR, data); 2490 nvswitch_clear_flags(&unhandled, bit); 2491 } 2492 2493 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 2494 2495 // Disable interrupts that have occurred after fatal error. 2496 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 2497 if (device->link[link].fatal_error_occurred) 2498 { 2499 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_FATAL_REPORT_EN_0, 2500 report.raw_enable ^ pending); 2501 } 2502 2503 if (report.raw_first & report.mask) 2504 { 2505 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_FIRST_0, 2506 report.raw_first & report.mask); 2507 } 2508 2509 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0, pending); 2510 2511 if (unhandled != 0) 2512 { 2513 return -NVL_MORE_PROCESSING_REQUIRED; 2514 } 2515 2516 return NVL_SUCCESS; 2517 } 2518 2519 // 2520 // Egress 2521 // 2522 2523 static NvlStatus 2524 _nvswitch_service_egress_nonfatal_ls10 2525 ( 2526 nvswitch_device *device, 2527 NvU32 link 2528 ) 2529 { 2530 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 2531 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 2532 NvU32 pending, bit, unhandled; 2533 NvU32 pending_0, pending_1; 2534 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 2535 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 2536 NvlStatus status = NVL_SUCCESS; 2537 2538 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0); 2539 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_NON_FATAL_REPORT_EN_0); 2540 report.mask = report.raw_enable & chip_device->intr_mask.egress[0].nonfatal; 2541 pending = report.raw_pending & report.mask; 2542 pending_0 = pending; 2543 2544 if (pending == 0) 2545 { 2546 goto _nvswitch_service_egress_nonfatal_ls10_err_status_1; 2547 } 2548 2549 unhandled = pending; 2550 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FIRST_0); 2551 _nvswitch_collect_error_info_ls10(device, link, 2552 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME | 2553 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC | 2554 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR, 2555 &data); 2556 2557 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_ECC_LIMIT_ERR, 1); 2558 if (nvswitch_test_flags(pending, bit)) 2559 { 2560 // Ignore LIMIT error if DBE is pending 2561 if (!(nvswitch_test_flags(report.raw_pending, 2562 DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_ECC_DBE_ERR, 1)))) 2563 { 2564 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_NXBAR_ECC_ERROR_COUNTER); 2565 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_NXBAR_HDR_ECC_LIMIT_ERR, "egress input ECC error limit"); 2566 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_NXBAR_HDR_ECC_LIMIT_ERR, data); 2567 2568 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2569 NVSWITCH_ERR_HW_NPORT_EGRESS_NXBAR_HDR_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2570 NV_FALSE, 1); 2571 2572 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2573 } 2574 2575 nvswitch_clear_flags(&unhandled, bit); 2576 } 2577 2578 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _RAM_OUT_HDR_ECC_LIMIT_ERR, 1); 2579 if (nvswitch_test_flags(pending, bit)) 2580 { 2581 // Ignore LIMIT error if DBE is pending 2582 if(!(nvswitch_test_flags(report.raw_pending, 2583 DRF_NUM(_EGRESS, _ERR_STATUS_0, _RAM_OUT_HDR_ECC_DBE_ERR, 1)))) 2584 { 2585 NvBool bAddressValid = NV_FALSE; 2586 NvU32 address = 0; 2587 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, 2588 _ERR_RAM_OUT_ECC_ERROR_ADDRESS_VALID); 2589 2590 if (FLD_TEST_DRF(_EGRESS_ERR_RAM_OUT, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 2591 addressValid)) 2592 { 2593 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, 2594 _ERR_RAM_OUT_ECC_ERROR_ADDRESS); 2595 bAddressValid = NV_TRUE; 2596 } 2597 2598 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_RAM_OUT_ECC_ERROR_COUNTER); 2599 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_RAM_OUT_ECC_ERROR_ADDRESS); 2600 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_LIMIT_ERR, "egress output ECC error limit"); 2601 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_LIMIT_ERR, data); 2602 2603 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2604 NVSWITCH_ERR_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_LIMIT_ERR, link, bAddressValid, address, 2605 NV_FALSE, 1); 2606 2607 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2608 } 2609 2610 nvswitch_clear_flags(&unhandled, bit); 2611 } 2612 2613 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _PRIVRSPERR, 1); 2614 if (nvswitch_test_flags(pending, bit)) 2615 { 2616 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_PRIVRSPERR, "egress non-posted PRIV error"); 2617 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_PRIVRSPERR, data); 2618 nvswitch_clear_flags(&unhandled, bit); 2619 } 2620 2621 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 2622 2623 // Disable interrupts that have occurred after fatal error. 2624 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 2625 if (device->link[link].fatal_error_occurred) 2626 { 2627 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_NON_FATAL_REPORT_EN_0, 2628 report.raw_enable ^ pending); 2629 } 2630 2631 if (report.raw_first & report.mask) 2632 { 2633 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FIRST_0, 2634 report.raw_first & report.mask); 2635 } 2636 2637 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0, pending); 2638 2639 // HACK: Clear all pending interrupts! 2640 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0, 0xFFFFFFFF); 2641 2642 if (unhandled != 0) 2643 { 2644 status = -NVL_MORE_PROCESSING_REQUIRED; 2645 } 2646 2647 _nvswitch_service_egress_nonfatal_ls10_err_status_1: 2648 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1); 2649 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_NON_FATAL_REPORT_EN_1); 2650 report.mask = report.raw_enable & chip_device->intr_mask.egress[1].nonfatal; 2651 pending = report.raw_pending & report.mask; 2652 pending_1 = pending; 2653 2654 if ((pending_0 == 0) && (pending_1 == 0)) 2655 { 2656 return -NVL_NOT_FOUND; 2657 } 2658 2659 unhandled = pending; 2660 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FIRST_1); 2661 2662 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, 1); 2663 if (nvswitch_test_flags(pending, bit)) 2664 { 2665 // Ignore LIMIT error if DBE is pending 2666 if (!(nvswitch_test_flags(report.raw_pending, 2667 DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_ECC_DBE_ERR, 1)))) 2668 { 2669 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, "egress reduction header ECC error limit"); 2670 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_NXBAR_HDR_ECC_LIMIT_ERR, data); 2671 2672 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2673 NVSWITCH_ERR_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2674 NV_FALSE, 1); 2675 2676 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2677 } 2678 2679 nvswitch_clear_flags(&unhandled, bit); 2680 } 2681 2682 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSPCTRLSTORE_ECC_LIMIT_ERR, 1); 2683 if (nvswitch_test_flags(pending, bit)) 2684 { 2685 // Ignore LIMIT error if DBE is pending 2686 if (!(nvswitch_test_flags(report.raw_pending, 2687 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSPCTRLSTORE_ECC_DBE_ERR, 1)))) 2688 { 2689 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_LIMIT_ERR, "egress MC response ECC error limit"); 2690 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_LIMIT_ERR, data); 2691 2692 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2693 NVSWITCH_ERR_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2694 NV_FALSE, 1); 2695 2696 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2697 } 2698 2699 nvswitch_clear_flags(&unhandled, bit); 2700 } 2701 2702 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBCTRLSTORE_ECC_LIMIT_ERR, 1); 2703 if (nvswitch_test_flags(pending, bit)) 2704 { 2705 // Ignore LIMIT error if DBE is pending 2706 if (!(nvswitch_test_flags(report.raw_pending, 2707 DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBCTRLSTORE_ECC_DBE_ERR, 1)))) 2708 { 2709 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_LIMIT_ERR, "egress RB ECC error limit"); 2710 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_LIMIT_ERR, data); 2711 2712 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2713 NVSWITCH_ERR_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2714 NV_FALSE, 1); 2715 2716 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2717 } 2718 2719 nvswitch_clear_flags(&unhandled, bit); 2720 } 2721 2722 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDSGT_ECC_LIMIT_ERR, 1); 2723 if (nvswitch_test_flags(pending, bit)) 2724 { 2725 // Ignore LIMIT error if DBE is pending 2726 if (!(nvswitch_test_flags(report.raw_pending, 2727 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDSGT_ECC_DBE_ERR, 1)))) 2728 { 2729 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCREDSGT_ECC_LIMIT_ERR, "egress RSG ECC error limit"); 2730 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCREDSGT_ECC_LIMIT_ERR, data); 2731 2732 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2733 NVSWITCH_ERR_HW_NPORT_EGRESS_MCREDSGT_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2734 NV_FALSE, 1); 2735 2736 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2737 } 2738 2739 nvswitch_clear_flags(&unhandled, bit); 2740 } 2741 2742 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDBUF_ECC_LIMIT_ERR, 1); 2743 if (nvswitch_test_flags(pending, bit)) 2744 { 2745 // Ignore LIMIT error if DBE is pending 2746 if (!(nvswitch_test_flags(report.raw_pending, 2747 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDBUF_ECC_DBE_ERR, 1)))) 2748 { 2749 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCREDBUF_ECC_LIMIT_ERR, "egress MCRB ECC error limit"); 2750 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCREDBUF_ECC_LIMIT_ERR, data); 2751 2752 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2753 NVSWITCH_ERR_HW_NPORT_EGRESS_MCREDBUF_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2754 NV_FALSE, 1); 2755 2756 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2757 } 2758 2759 nvswitch_clear_flags(&unhandled, bit); 2760 } 2761 2762 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_RAM_HDR_ECC_LIMIT_ERR, 1); 2763 if (nvswitch_test_flags(pending, bit)) 2764 { 2765 // Ignore LIMIT error if DBE is pending 2766 if (!(nvswitch_test_flags(report.raw_pending, 2767 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_RAM_HDR_ECC_DBE_ERR, 1)))) 2768 { 2769 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_LIMIT_ERR, "egress MC header ECC error limit"); 2770 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_LIMIT_ERR, data); 2771 2772 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2773 NVSWITCH_ERR_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2774 NV_FALSE, 1); 2775 2776 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2777 } 2778 2779 nvswitch_clear_flags(&unhandled, bit); 2780 } 2781 2782 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_ECC_DBE_ERR, 1); 2783 if (nvswitch_test_flags(pending, bit)) 2784 { 2785 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_ECC_DBE_ERR, "egress reduction header ECC DBE error"); 2786 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_ECC_DBE_ERR, data); 2787 nvswitch_clear_flags(&unhandled, bit); 2788 2789 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2790 NVSWITCH_ERR_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_ECC_DBE_ERR, link, NV_FALSE, 0, 2791 NV_TRUE, 1); 2792 2793 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2794 2795 // Clear associated LIMIT_ERR interrupt 2796 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, 1)) 2797 { 2798 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 2799 DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, 1)); 2800 } 2801 } 2802 2803 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_PARITY_ERR, 1); 2804 if (nvswitch_test_flags(pending, bit)) 2805 { 2806 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_PARITY_ERR, "egress reduction header parity error"); 2807 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_PARITY_ERR, data); 2808 nvswitch_clear_flags(&unhandled, bit); 2809 } 2810 2811 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_FLITTYPE_MISMATCH_ERR, 1); 2812 if (nvswitch_test_flags(pending, bit)) 2813 { 2814 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_NXBAR_REDUCTION_FLITTYPE_MISMATCH_ERR, "egress reduction flit mismatch error"); 2815 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_NXBAR_REDUCTION_FLITTYPE_MISMATCH_ERR, data); 2816 nvswitch_clear_flags(&unhandled, bit); 2817 } 2818 2819 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDBUF_ECC_DBE_ERR, 1); 2820 if (nvswitch_test_flags(pending, bit)) 2821 { 2822 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCREDBUF_ECC_DBE_ERR, "egress reduction buffer ECC DBE error"); 2823 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCREDBUF_ECC_DBE_ERR, data); 2824 nvswitch_clear_flags(&unhandled, bit); 2825 2826 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2827 NVSWITCH_ERR_HW_NPORT_EGRESS_MCREDBUF_ECC_DBE_ERR, link, NV_FALSE, 0, 2828 NV_TRUE, 1); 2829 2830 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2831 2832 // Clear associated LIMIT_ERR interrupt 2833 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDBUF_ECC_LIMIT_ERR, 1)) 2834 { 2835 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 2836 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDBUF_ECC_LIMIT_ERR, 1)); 2837 } 2838 } 2839 2840 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_CNT_ERR, 1); 2841 if (nvswitch_test_flags(pending, bit)) 2842 { 2843 _nvswitch_collect_error_info_ls10(device, link, 2844 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_TIME | 2845 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_MISC | 2846 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_HDR, 2847 &data); 2848 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCRSP_CNT_ERR, "egress MC response count error"); 2849 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCRSP_CNT_ERR, data); 2850 nvswitch_clear_flags(&unhandled, bit); 2851 } 2852 2853 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBRSP_CNT_ERR, 1); 2854 if (nvswitch_test_flags(pending, bit)) 2855 { 2856 _nvswitch_collect_error_info_ls10(device, link, 2857 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_TIME | 2858 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_MISC | 2859 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_HDR, 2860 &data); 2861 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_RBRSP_CNT_ERR, "egress reduction response count error"); 2862 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_RBRSP_CNT_ERR, data); 2863 nvswitch_clear_flags(&unhandled, bit); 2864 } 2865 2866 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 2867 2868 // Disable interrupts that have occurred after fatal error. 2869 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 2870 if (device->link[link].fatal_error_occurred) 2871 { 2872 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_NON_FATAL_REPORT_EN_1, 2873 report.raw_enable ^ pending); 2874 } 2875 2876 if (report.raw_first & report.mask) 2877 { 2878 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FIRST_1, 2879 report.raw_first & report.mask); 2880 } 2881 2882 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, pending); 2883 2884 // Clear all pending interrupts! 2885 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 0xFFFFFFFF); 2886 2887 if (unhandled != 0) 2888 { 2889 status = -NVL_MORE_PROCESSING_REQUIRED; 2890 } 2891 2892 return status; 2893 } 2894 2895 static NvlStatus 2896 _nvswitch_service_egress_fatal_ls10 2897 ( 2898 nvswitch_device *device, 2899 NvU32 link 2900 ) 2901 { 2902 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 2903 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 2904 NvU32 pending, bit, contain, unhandled; 2905 NvU32 pending_0, pending_1; 2906 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 2907 NVSWITCH_RAW_ERROR_LOG_TYPE credit_data = {0, { 0 }}; 2908 NVSWITCH_RAW_ERROR_LOG_TYPE buffer_data = {0, { 0 }}; 2909 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 2910 NvlStatus status = NVL_SUCCESS; 2911 2912 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0); 2913 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FATAL_REPORT_EN_0); 2914 report.mask = report.raw_enable & chip_device->intr_mask.egress[0].fatal; 2915 pending = report.raw_pending & report.mask; 2916 pending_0 = pending; 2917 2918 if (pending == 0) 2919 { 2920 goto _nvswitch_service_egress_fatal_ls10_err_status_1; 2921 } 2922 2923 unhandled = pending; 2924 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FIRST_0); 2925 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_CONTAIN_EN_0); 2926 _nvswitch_collect_error_info_ls10(device, link, 2927 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME | 2928 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC | 2929 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR, 2930 &data); 2931 2932 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _EGRESSBUFERR, 1); 2933 if (nvswitch_test_flags(pending, bit)) 2934 { 2935 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_EGRESSBUFERR, "egress crossbar overflow", NV_TRUE); 2936 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_EGRESSBUFERR, data); 2937 2938 buffer_data.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS0); 2939 buffer_data.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS1); 2940 buffer_data.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS2); 2941 buffer_data.data[3] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS3); 2942 buffer_data.data[4] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS4); 2943 buffer_data.data[5] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS5); 2944 buffer_data.data[6] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS6); 2945 buffer_data.data[7] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS7); 2946 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_EGRESSBUFERR, buffer_data); 2947 nvswitch_clear_flags(&unhandled, bit); 2948 } 2949 2950 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _PKTROUTEERR, 1); 2951 if (nvswitch_test_flags(pending, bit)) 2952 { 2953 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_PKTROUTEERR, "egress packet route", NV_TRUE); 2954 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_PKTROUTEERR, data); 2955 nvswitch_clear_flags(&unhandled, bit); 2956 } 2957 2958 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _SEQIDERR, 1); 2959 if (nvswitch_test_flags(pending, bit)) 2960 { 2961 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_SEQIDERR, "egress sequence ID error", NV_TRUE); 2962 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_SEQIDERR, data); 2963 nvswitch_clear_flags(&unhandled, bit); 2964 } 2965 2966 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_ECC_DBE_ERR, 1); 2967 if (nvswitch_test_flags(pending, bit)) 2968 { 2969 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NXBAR_HDR_ECC_DBE_ERR, "egress input ECC DBE error", NV_FALSE); 2970 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NXBAR_HDR_ECC_DBE_ERR, data); 2971 nvswitch_clear_flags(&unhandled, bit); 2972 2973 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2974 NVSWITCH_ERR_HW_NPORT_EGRESS_NXBAR_HDR_ECC_DBE_ERR, link, NV_FALSE, 0, 2975 NV_TRUE, 1); 2976 2977 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2978 2979 // Clear associated LIMIT_ERR interrupt 2980 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_ECC_LIMIT_ERR, 1)) 2981 { 2982 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0, 2983 DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_ECC_LIMIT_ERR, 1)); 2984 } 2985 } 2986 2987 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _RAM_OUT_HDR_ECC_DBE_ERR, 1); 2988 if (nvswitch_test_flags(pending, bit)) 2989 { 2990 NvBool bAddressValid = NV_FALSE; 2991 NvU32 address = 0; 2992 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, 2993 _ERR_RAM_OUT_ECC_ERROR_ADDRESS_VALID); 2994 2995 if (FLD_TEST_DRF(_EGRESS_ERR_RAM_OUT, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 2996 addressValid)) 2997 { 2998 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, 2999 _ERR_RAM_OUT_ECC_ERROR_ADDRESS); 3000 bAddressValid = NV_TRUE; 3001 } 3002 3003 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_DBE_ERR, "egress output ECC DBE error", NV_FALSE); 3004 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_DBE_ERR, data); 3005 nvswitch_clear_flags(&unhandled, bit); 3006 3007 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3008 NVSWITCH_ERR_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_DBE_ERR, link, bAddressValid, 3009 address, NV_TRUE, 1); 3010 3011 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3012 3013 // Clear associated LIMIT_ERR interrupt 3014 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_0, _RAM_OUT_HDR_ECC_LIMIT_ERR, 1)) 3015 { 3016 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0, 3017 DRF_NUM(_EGRESS, _ERR_STATUS_0, _RAM_OUT_HDR_ECC_LIMIT_ERR, 1)); 3018 } 3019 } 3020 3021 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NCISOCCREDITOVFL, 1); 3022 if (nvswitch_test_flags(pending, bit)) 3023 { 3024 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NCISOCCREDITOVFL, "egress credit overflow", NV_FALSE); 3025 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NCISOCCREDITOVFL, data); 3026 3027 credit_data.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT0); 3028 credit_data.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT1); 3029 credit_data.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT2); 3030 credit_data.data[3] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT3); 3031 credit_data.data[4] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT4); 3032 credit_data.data[5] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT5); 3033 credit_data.data[6] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT6); 3034 credit_data.data[7] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT7); 3035 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NCISOCCREDITOVFL, credit_data); 3036 nvswitch_clear_flags(&unhandled, bit); 3037 } 3038 3039 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _REQTGTIDMISMATCHERR, 1); 3040 if (nvswitch_test_flags(pending, bit)) 3041 { 3042 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_REQTGTIDMISMATCHERR, "egress destination request ID error", NV_FALSE); 3043 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_REQTGTIDMISMATCHERR, data); 3044 nvswitch_clear_flags(&unhandled, bit); 3045 } 3046 3047 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _RSPREQIDMISMATCHERR, 1); 3048 if (nvswitch_test_flags(pending, bit)) 3049 { 3050 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_RSPREQIDMISMATCHERR, "egress destination response ID error", NV_FALSE); 3051 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_RSPREQIDMISMATCHERR, data); 3052 nvswitch_clear_flags(&unhandled, bit); 3053 } 3054 3055 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _URRSPERR, 1); 3056 if (nvswitch_test_flags(pending, bit)) 3057 { 3058 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_DROPNPURRSPERR, "egress non-posted UR error", NV_FALSE); 3059 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_DROPNPURRSPERR, data); 3060 nvswitch_clear_flags(&unhandled, bit); 3061 } 3062 3063 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _HWRSPERR, 1); 3064 if (nvswitch_test_flags(pending, bit)) 3065 { 3066 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_HWRSPERR, "egress non-posted HW error", NV_FALSE); 3067 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_HWRSPERR, data); 3068 nvswitch_clear_flags(&unhandled, bit); 3069 } 3070 3071 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_PARITY_ERR, 1); 3072 if (nvswitch_test_flags(pending, bit)) 3073 { 3074 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NXBAR_HDR_PARITY_ERR, "egress control parity error", NV_FALSE); 3075 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NXBAR_HDR_PARITY_ERR, data); 3076 nvswitch_clear_flags(&unhandled, bit); 3077 3078 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3079 NVSWITCH_ERR_HW_NPORT_EGRESS_NXBAR_HDR_PARITY_ERR, link, NV_FALSE, 0, 3080 NV_TRUE, 1); 3081 3082 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3083 } 3084 3085 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NCISOC_CREDIT_PARITY_ERR, 1); 3086 if (nvswitch_test_flags(pending, bit)) 3087 { 3088 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NCISOC_CREDIT_PARITY_ERR, "egress credit parity error", NV_FALSE); 3089 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NCISOC_CREDIT_PARITY_ERR, data); 3090 3091 credit_data.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT0); 3092 credit_data.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT1); 3093 credit_data.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT2); 3094 credit_data.data[3] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT3); 3095 credit_data.data[4] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT4); 3096 credit_data.data[5] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT5); 3097 credit_data.data[6] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT6); 3098 credit_data.data[7] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT7); 3099 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NCISOC_CREDIT_PARITY_ERR, credit_data); 3100 nvswitch_clear_flags(&unhandled, bit); 3101 3102 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3103 NVSWITCH_ERR_HW_NPORT_EGRESS_NCISOC_CREDIT_PARITY_ERR, link, NV_FALSE, 0, 3104 NV_TRUE, 1); 3105 3106 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3107 } 3108 3109 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_FLITTYPE_MISMATCH_ERR, 1); 3110 if (nvswitch_test_flags(pending, bit)) 3111 { 3112 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NXBAR_FLITTYPE_MISMATCH_ERR, "egress flit type mismatch", NV_FALSE); 3113 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NXBAR_FLITTYPE_MISMATCH_ERR, data); 3114 nvswitch_clear_flags(&unhandled, bit); 3115 } 3116 3117 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _CREDIT_TIME_OUT_ERR, 1); 3118 if (nvswitch_test_flags(pending, bit)) 3119 { 3120 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_CREDIT_TIME_OUT_ERR, "egress credit timeout", NV_FALSE); 3121 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_CREDIT_TIME_OUT_ERR, data); 3122 nvswitch_clear_flags(&unhandled, bit); 3123 } 3124 3125 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_SIDEBAND_PD_PARITY_ERR, 1); 3126 if (nvswitch_test_flags(pending, bit)) 3127 { 3128 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NXBAR_SIDEBAND_PD_PARITY_ERR, "egress crossbar SB parity", NV_FALSE); 3129 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NXBAR_SIDEBAND_PD_PARITY_ERR, data); 3130 nvswitch_clear_flags(&unhandled, bit); 3131 } 3132 3133 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _INVALIDVCSET_ERR, 1); 3134 if (nvswitch_test_flags(pending, bit)) 3135 { 3136 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_INVALIDVCSET_ERR, "egress invalid VC set", NV_FALSE); 3137 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_INVALIDVCSET_ERR, data); 3138 nvswitch_clear_flags(&unhandled, bit); 3139 } 3140 3141 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 3142 3143 // Disable interrupts that have occurred after fatal error. 3144 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 3145 if (device->link[link].fatal_error_occurred) 3146 { 3147 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FATAL_REPORT_EN_0, 3148 report.raw_enable ^ pending); 3149 } 3150 3151 if (report.raw_first & report.mask) 3152 { 3153 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FIRST_0, 3154 report.raw_first & report.mask); 3155 } 3156 3157 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0, pending); 3158 3159 if (unhandled != 0) 3160 { 3161 status = -NVL_MORE_PROCESSING_REQUIRED; 3162 } 3163 3164 _nvswitch_service_egress_fatal_ls10_err_status_1: 3165 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1); 3166 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FATAL_REPORT_EN_1); 3167 report.mask = report.raw_enable & chip_device->intr_mask.egress[1].fatal; 3168 pending = report.raw_pending & report.mask; 3169 pending_1 = pending; 3170 3171 if ((pending_0 == 0) && (pending_1 == 0)) 3172 { 3173 return -NVL_NOT_FOUND; 3174 } 3175 3176 unhandled = pending; 3177 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FIRST_1); 3178 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_CONTAIN_EN_1); 3179 3180 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSPCTRLSTORE_ECC_DBE_ERR, 1); 3181 if (nvswitch_test_flags(pending, bit)) 3182 { 3183 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_DBE_ERR, "egress MC response ECC DBE error", NV_FALSE); 3184 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_DBE_ERR, data); 3185 nvswitch_clear_flags(&unhandled, bit); 3186 3187 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3188 NVSWITCH_ERR_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_DBE_ERR, link, NV_FALSE, 0, 3189 NV_TRUE, 1); 3190 3191 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3192 3193 // Clear associated LIMIT_ERR interrupt 3194 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSPCTRLSTORE_ECC_LIMIT_ERR, 1)) 3195 { 3196 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 3197 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSPCTRLSTORE_ECC_LIMIT_ERR, 1)); 3198 } 3199 } 3200 3201 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBCTRLSTORE_ECC_DBE_ERR, 1); 3202 if (nvswitch_test_flags(pending, bit)) 3203 { 3204 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_DBE_ERR, "egress reduction ECC DBE error", NV_FALSE); 3205 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_DBE_ERR, data); 3206 nvswitch_clear_flags(&unhandled, bit); 3207 3208 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3209 NVSWITCH_ERR_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_DBE_ERR, link, NV_FALSE, 0, 3210 NV_TRUE, 1); 3211 3212 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3213 3214 // Clear associated LIMIT_ERR interrupt 3215 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBCTRLSTORE_ECC_LIMIT_ERR, 1)) 3216 { 3217 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 3218 DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBCTRLSTORE_ECC_LIMIT_ERR, 1)); 3219 } 3220 } 3221 3222 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDSGT_ECC_DBE_ERR, 1); 3223 if (nvswitch_test_flags(pending, bit)) 3224 { 3225 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_MCREDSGT_ECC_DBE_ERR, "egress MC SG ECC DBE error", NV_FALSE); 3226 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_MCREDSGT_ECC_DBE_ERR, data); 3227 nvswitch_clear_flags(&unhandled, bit); 3228 3229 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3230 NVSWITCH_ERR_HW_NPORT_EGRESS_MCREDSGT_ECC_DBE_ERR, link, NV_FALSE, 0, 3231 NV_TRUE, 1); 3232 3233 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3234 3235 // Clear associated LIMIT_ERR interrupt 3236 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDSGT_ECC_LIMIT_ERR, 1)) 3237 { 3238 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 3239 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDSGT_ECC_LIMIT_ERR, 1)); 3240 } 3241 } 3242 3243 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_RAM_HDR_ECC_DBE_ERR, 1); 3244 if (nvswitch_test_flags(pending, bit)) 3245 { 3246 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_DBE_ERR, "egress MC ram ECC DBE error", NV_FALSE); 3247 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_DBE_ERR, data); 3248 nvswitch_clear_flags(&unhandled, bit); 3249 3250 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3251 NVSWITCH_ERR_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_DBE_ERR, link, NV_FALSE, 0, 3252 NV_TRUE, 1); 3253 3254 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3255 3256 // Clear associated LIMIT_ERR interrupt 3257 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_RAM_HDR_ECC_LIMIT_ERR, 1)) 3258 { 3259 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 3260 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_RAM_HDR_ECC_LIMIT_ERR, 1)); 3261 } 3262 } 3263 3264 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 3265 3266 // Disable interrupts that have occurred after fatal error. 3267 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 3268 if (device->link[link].fatal_error_occurred) 3269 { 3270 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FATAL_REPORT_EN_1, 3271 report.raw_enable ^ pending); 3272 } 3273 3274 if (report.raw_first & report.mask) 3275 { 3276 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FIRST_1, 3277 report.raw_first & report.mask); 3278 } 3279 3280 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, pending); 3281 3282 // Clear all pending interrupts! 3283 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 0xFFFFFFFF); 3284 3285 if (unhandled != 0) 3286 { 3287 status = -NVL_MORE_PROCESSING_REQUIRED; 3288 } 3289 3290 return status; 3291 } 3292 3293 static NvlStatus 3294 _nvswitch_service_sourcetrack_nonfatal_ls10 3295 ( 3296 nvswitch_device *device, 3297 NvU32 link 3298 ) 3299 { 3300 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 3301 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 3302 NvU32 pending, bit, unhandled; 3303 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 3304 3305 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, 3306 _SOURCETRACK, _ERR_STATUS_0); 3307 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, 3308 _SOURCETRACK, _ERR_NON_FATAL_REPORT_EN_0); 3309 report.mask = report.raw_enable & chip_device->intr_mask.sourcetrack.nonfatal; 3310 3311 pending = report.raw_pending & report.mask; 3312 3313 if (pending == 0) 3314 { 3315 return -NVL_NOT_FOUND; 3316 } 3317 3318 unhandled = pending; 3319 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, _ERR_FIRST_0); 3320 3321 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, 1); 3322 if (nvswitch_test_flags(pending, bit)) 3323 { 3324 // Ignore LIMIT error if DBE is pending 3325 if (!(nvswitch_test_flags(report.raw_pending, 3326 DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _CREQ_TCEN0_CRUMBSTORE_ECC_DBE_ERR, 1)))) 3327 { 3328 NvBool bAddressValid = NV_FALSE; 3329 NvU32 address = 0; 3330 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3331 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 3332 3333 if (FLD_TEST_DRF(_SOURCETRACK_ERR_CREQ_TCEN0_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, 3334 _VALID, _VALID, addressValid)) 3335 { 3336 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3337 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS); 3338 bAddressValid = NV_TRUE; 3339 } 3340 3341 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3342 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_COUNTER); 3343 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3344 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS); 3345 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3346 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 3347 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_SOURCETRACK_CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, 3348 "sourcetrack TCEN0 crumbstore ECC limit err"); 3349 3350 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3351 NVSWITCH_ERR_HW_NPORT_SOURCETRACK_CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, link, 3352 bAddressValid, address, NV_FALSE, 1); 3353 3354 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3355 } 3356 3357 nvswitch_clear_flags(&unhandled, bit); 3358 } 3359 3360 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 3361 3362 // 3363 // Disable interrupts that have occurred after fatal error. 3364 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 3365 // 3366 if (device->link[link].fatal_error_occurred) 3367 { 3368 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_NON_FATAL_REPORT_EN_0, 3369 report.raw_enable ^ pending); 3370 } 3371 3372 if (report.raw_first & report.mask) 3373 { 3374 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_FIRST_0, 3375 report.raw_first & report.mask); 3376 } 3377 3378 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_STATUS_0, pending); 3379 3380 if (unhandled != 0) 3381 { 3382 return -NVL_MORE_PROCESSING_REQUIRED; 3383 } 3384 3385 return NVL_SUCCESS; 3386 } 3387 3388 static NvlStatus 3389 _nvswitch_service_sourcetrack_fatal_ls10 3390 ( 3391 nvswitch_device *device, 3392 NvU32 link 3393 ) 3394 { 3395 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 3396 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 3397 NvU32 pending, bit, contain, unhandled; 3398 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 3399 3400 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, 3401 _SOURCETRACK, _ERR_STATUS_0); 3402 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, 3403 _SOURCETRACK, _ERR_FATAL_REPORT_EN_0); 3404 report.mask = report.raw_enable & chip_device->intr_mask.sourcetrack.fatal; 3405 pending = report.raw_pending & report.mask; 3406 3407 if (pending == 0) 3408 { 3409 return -NVL_NOT_FOUND; 3410 } 3411 3412 unhandled = pending; 3413 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, _ERR_FIRST_0); 3414 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, _ERR_CONTAIN_EN_0); 3415 3416 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _CREQ_TCEN0_CRUMBSTORE_ECC_DBE_ERR, 1); 3417 if (nvswitch_test_flags(pending, bit)) 3418 { 3419 NvBool bAddressValid = NV_FALSE; 3420 NvU32 address = 0; 3421 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3422 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 3423 3424 if (FLD_TEST_DRF(_SOURCETRACK_ERR_CREQ_TCEN0_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, 3425 _VALID, _VALID, addressValid)) 3426 { 3427 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3428 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS); 3429 bAddressValid = NV_TRUE; 3430 } 3431 3432 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3433 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS); 3434 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3435 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 3436 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_SOURCETRACK_CREQ_TCEN0_CRUMBSTORE_ECC_DBE_ERR, 3437 "sourcetrack TCEN0 crumbstore DBE", NV_FALSE); 3438 nvswitch_clear_flags(&unhandled, bit); 3439 3440 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3441 NVSWITCH_ERR_HW_NPORT_SOURCETRACK_CREQ_TCEN0_CRUMBSTORE_ECC_DBE_ERR, 3442 link, bAddressValid, address, NV_TRUE, 1); 3443 3444 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3445 3446 // Clear associated LIMIT_ERR interrupt 3447 if (report.raw_pending & DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, 1)) 3448 { 3449 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_STATUS_0, 3450 DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, 1)); 3451 } 3452 } 3453 3454 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _DUP_CREQ_TCEN0_TAG_ERR, 1); 3455 if (nvswitch_test_flags(pending, bit)) 3456 { 3457 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_SOURCETRACK_DUP_CREQ_TCEN0_TAG_ERR, 3458 "sourcetrack duplicate CREQ", NV_FALSE); 3459 nvswitch_clear_flags(&unhandled, bit); 3460 } 3461 3462 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _INVALID_TCEN0_RSP_ERR, 1); 3463 if (nvswitch_test_flags(pending, bit)) 3464 { 3465 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_SOURCETRACK_INVALID_TCEN0_RSP_ERR, 3466 "sourcetrack invalid TCEN0 CREQ", NV_FALSE); 3467 nvswitch_clear_flags(&unhandled, bit); 3468 } 3469 3470 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _INVALID_TCEN1_RSP_ERR, 1); 3471 if (nvswitch_test_flags(pending, bit)) 3472 { 3473 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_SOURCETRACK_INVALID_TCEN1_RSP_ERR, 3474 "sourcetrack invalid TCEN1 CREQ", NV_FALSE); 3475 nvswitch_clear_flags(&unhandled, bit); 3476 } 3477 3478 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _SOURCETRACK_TIME_OUT_ERR, 1); 3479 if (nvswitch_test_flags(pending, bit)) 3480 { 3481 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_SOURCETRACK_SOURCETRACK_TIME_OUT_ERR, 3482 "sourcetrack timeout error", NV_FALSE); 3483 nvswitch_clear_flags(&unhandled, bit); 3484 } 3485 3486 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 3487 3488 // 3489 // Disable interrupts that have occurred after fatal error. 3490 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 3491 // 3492 if (device->link[link].fatal_error_occurred) 3493 { 3494 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_FATAL_REPORT_EN_0, 3495 report.raw_enable ^ pending); 3496 } 3497 3498 if (report.raw_first & report.mask) 3499 { 3500 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_FIRST_0, 3501 report.raw_first & report.mask); 3502 } 3503 3504 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_STATUS_0, pending); 3505 3506 if (unhandled != 0) 3507 { 3508 return -NVL_MORE_PROCESSING_REQUIRED; 3509 } 3510 3511 return NVL_SUCCESS; 3512 3513 } 3514 3515 // 3516 // Multicast Tstate 3517 // 3518 3519 static NvlStatus 3520 _nvswitch_service_multicast_nonfatal_ls10 3521 ( 3522 nvswitch_device *device, 3523 NvU32 link 3524 ) 3525 { 3526 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 3527 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 3528 NvU32 pending, bit, unhandled; 3529 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 3530 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 3531 3532 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0); 3533 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_NON_FATAL_REPORT_EN_0); 3534 report.mask = report.raw_enable & chip_device->intr_mask.mc_tstate.nonfatal; 3535 pending = report.raw_pending & report.mask; 3536 3537 if (pending == 0) 3538 { 3539 return -NVL_NOT_FOUND; 3540 } 3541 3542 unhandled = pending; 3543 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FIRST_0); 3544 _nvswitch_collect_error_info_ls10(device, link, 3545 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_MC_TIME, 3546 &data); 3547 3548 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1); 3549 if (nvswitch_test_flags(pending, bit)) 3550 { 3551 // Ignore LIMIT error if DBE is pending 3552 if(!(nvswitch_test_flags(report.raw_pending, 3553 DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1)))) 3554 { 3555 NvBool bAddressValid = NV_FALSE; 3556 NvU32 address = 0; 3557 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, 3558 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID); 3559 3560 if (FLD_TEST_DRF(_MULTICASTTSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 3561 addressValid)) 3562 { 3563 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, 3564 _ERR_TAGPOOL_ECC_ERROR_ADDRESS); 3565 bAddressValid = NV_TRUE; 3566 } 3567 3568 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER); 3569 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, 3570 DRF_DEF(_MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 3571 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_LIMIT_ERR, "MC TS tag store single-bit threshold"); 3572 NVSWITCH_REPORT_DATA(_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_LIMIT_ERR, data); 3573 3574 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3575 NVSWITCH_ERR_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_LIMIT_ERR, link, 3576 bAddressValid, address, NV_FALSE, 1); 3577 3578 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3579 } 3580 3581 nvswitch_clear_flags(&unhandled, bit); 3582 } 3583 3584 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1); 3585 if (nvswitch_test_flags(pending, bit)) 3586 { 3587 // Ignore LIMIT error if DBE is pending 3588 if(!(nvswitch_test_flags(report.raw_pending, 3589 DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1)))) 3590 { 3591 NvBool bAddressValid = NV_FALSE; 3592 NvU32 address = 0; 3593 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, 3594 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 3595 3596 if (FLD_TEST_DRF(_MULTICASTTSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 3597 addressValid)) 3598 { 3599 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, 3600 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS); 3601 bAddressValid = NV_TRUE; 3602 } 3603 3604 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER); 3605 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, 3606 DRF_DEF(_MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 3607 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, "MC TS crumbstore single-bit threshold"); 3608 NVSWITCH_REPORT_DATA(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, data); 3609 3610 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3611 NVSWITCH_ERR_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, link, 3612 bAddressValid, address, NV_FALSE, 1); 3613 3614 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3615 } 3616 3617 nvswitch_clear_flags(&unhandled, bit); 3618 } 3619 3620 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_MCTO_ERR, 1); 3621 if (nvswitch_test_flags(pending, bit)) 3622 { 3623 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_MCTO_ERR, "MC TS crumbstore MCTO"); 3624 _nvswitch_collect_error_info_ls10(device, link, 3625 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME | 3626 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC | 3627 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR, 3628 &data); 3629 NVSWITCH_REPORT_DATA(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_MCTO_ERR, data); 3630 3631 nvswitch_clear_flags(&unhandled, bit); 3632 } 3633 3634 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 3635 3636 // Disable interrupts that have occurred after fatal error. 3637 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 3638 if (device->link[link].fatal_error_occurred) 3639 { 3640 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_NON_FATAL_REPORT_EN_0, 3641 report.raw_enable ^ pending); 3642 } 3643 3644 if (report.raw_first & report.mask) 3645 { 3646 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FIRST_0, 3647 report.raw_first & report.mask); 3648 } 3649 3650 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0, pending); 3651 3652 if (unhandled != 0) 3653 { 3654 return -NVL_MORE_PROCESSING_REQUIRED; 3655 } 3656 3657 return NVL_SUCCESS; 3658 } 3659 3660 static NvlStatus 3661 _nvswitch_service_multicast_fatal_ls10 3662 ( 3663 nvswitch_device *device, 3664 NvU32 link 3665 ) 3666 { 3667 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 3668 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 3669 NvU32 pending, bit, contain, unhandled; 3670 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 3671 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 3672 3673 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0); 3674 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FATAL_REPORT_EN_0); 3675 report.mask = report.raw_enable & chip_device->intr_mask.mc_tstate.fatal; 3676 pending = report.raw_pending & report.mask; 3677 3678 if (pending == 0) 3679 { 3680 return -NVL_NOT_FOUND; 3681 } 3682 3683 unhandled = pending; 3684 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FIRST_0); 3685 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_CONTAIN_EN_0); 3686 _nvswitch_collect_error_info_ls10(device, link, 3687 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_MC_TIME, 3688 &data); 3689 3690 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1); 3691 if (nvswitch_test_flags(pending, bit)) 3692 { 3693 NvBool bAddressValid = NV_FALSE; 3694 NvU32 address = 0; 3695 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, 3696 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID); 3697 3698 if (FLD_TEST_DRF(_MULTICASTTSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 3699 addressValid)) 3700 { 3701 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, 3702 _ERR_TAGPOOL_ECC_ERROR_ADDRESS); 3703 bAddressValid = NV_TRUE; 3704 } 3705 3706 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER); 3707 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, 3708 DRF_DEF(_MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 3709 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_DBE_ERR, "MC TS tag store fatal ECC", NV_FALSE); 3710 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_DBE_ERR, data); 3711 nvswitch_clear_flags(&unhandled, bit); 3712 3713 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3714 NVSWITCH_ERR_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_DBE_ERR, link, bAddressValid, 3715 address, NV_TRUE, 1); 3716 3717 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3718 3719 // Clear associated LIMIT_ERR interrupt 3720 if (report.raw_pending & DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1)) 3721 { 3722 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0, 3723 DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1)); 3724 } 3725 } 3726 3727 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1); 3728 if (nvswitch_test_flags(pending, bit)) 3729 { 3730 NvBool bAddressValid = NV_FALSE; 3731 NvU32 address = 0; 3732 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, 3733 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 3734 3735 if (FLD_TEST_DRF(_MULTICASTTSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 3736 addressValid)) 3737 { 3738 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, 3739 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS); 3740 bAddressValid = NV_TRUE; 3741 } 3742 3743 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER); 3744 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, 3745 DRF_DEF(_MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 3746 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_DBE_ERR, "MC TS crumbstore fatal ECC", NV_FALSE); 3747 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_DBE_ERR, data); 3748 nvswitch_clear_flags(&unhandled, bit); 3749 3750 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3751 NVSWITCH_ERR_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_DBE_ERR, link, bAddressValid, 3752 address, NV_TRUE, 1); 3753 3754 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3755 3756 // Clear associated LIMIT_ERR interrupt 3757 if (report.raw_pending & DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1)) 3758 { 3759 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0, 3760 DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1)); 3761 } 3762 } 3763 3764 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_BUF_OVERWRITE_ERR, 1); 3765 if (nvswitch_test_flags(pending, bit)) 3766 { 3767 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_BUF_OVERWRITE_ERR, "MC crumbstore overwrite", NV_FALSE); 3768 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_BUF_OVERWRITE_ERR, data); 3769 nvswitch_clear_flags(&unhandled, bit); 3770 } 3771 3772 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 3773 3774 // Disable interrupts that have occurred after fatal error. 3775 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 3776 if (device->link[link].fatal_error_occurred) 3777 { 3778 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FATAL_REPORT_EN_0, 3779 report.raw_enable ^ pending); 3780 } 3781 3782 if (report.raw_first & report.mask) 3783 { 3784 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FIRST_0, 3785 report.raw_first & report.mask); 3786 } 3787 3788 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0, pending); 3789 3790 if (unhandled != 0) 3791 { 3792 return -NVL_MORE_PROCESSING_REQUIRED; 3793 } 3794 3795 return NVL_SUCCESS; 3796 } 3797 3798 // 3799 // Reduction Tstate 3800 // 3801 3802 static NvlStatus 3803 _nvswitch_service_reduction_nonfatal_ls10 3804 ( 3805 nvswitch_device *device, 3806 NvU32 link 3807 ) 3808 { 3809 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 3810 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 3811 NvU32 pending, bit, unhandled; 3812 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 3813 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 3814 3815 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0); 3816 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_NON_FATAL_REPORT_EN_0); 3817 report.mask = report.raw_enable & chip_device->intr_mask.mc_tstate.nonfatal; 3818 pending = report.raw_pending & report.mask; 3819 3820 if (pending == 0) 3821 { 3822 return -NVL_NOT_FOUND; 3823 } 3824 3825 unhandled = pending; 3826 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FIRST_0); 3827 _nvswitch_collect_error_info_ls10(device, link, 3828 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_RED_TIME, 3829 &data); 3830 3831 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1); 3832 if (nvswitch_test_flags(pending, bit)) 3833 { 3834 // Ignore LIMIT error if DBE is pending 3835 if(!(nvswitch_test_flags(report.raw_pending, 3836 DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1)))) 3837 { 3838 NvBool bAddressValid = NV_FALSE; 3839 NvU32 address = 0; 3840 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, 3841 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID); 3842 3843 if (FLD_TEST_DRF(_REDUCTIONTSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 3844 addressValid)) 3845 { 3846 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, 3847 _ERR_TAGPOOL_ECC_ERROR_ADDRESS); 3848 bAddressValid = NV_TRUE; 3849 } 3850 3851 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER); 3852 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, 3853 DRF_DEF(_REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 3854 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_LIMIT_ERR, "Red TS tag store single-bit threshold"); 3855 NVSWITCH_REPORT_DATA(_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_LIMIT_ERR, data); 3856 3857 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3858 NVSWITCH_ERR_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_LIMIT_ERR, link, 3859 bAddressValid, address, NV_FALSE, 1); 3860 3861 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3862 } 3863 3864 nvswitch_clear_flags(&unhandled, bit); 3865 } 3866 3867 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1); 3868 if (nvswitch_test_flags(pending, bit)) 3869 { 3870 // Ignore LIMIT error if DBE is pending 3871 if(!(nvswitch_test_flags(report.raw_pending, 3872 DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1)))) 3873 { 3874 NvBool bAddressValid = NV_FALSE; 3875 NvU32 address = 0; 3876 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, 3877 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 3878 3879 if (FLD_TEST_DRF(_REDUCTIONTSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 3880 addressValid)) 3881 { 3882 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, 3883 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS); 3884 bAddressValid = NV_TRUE; 3885 } 3886 3887 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER); 3888 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, 3889 DRF_DEF(_REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 3890 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, "Red TS crumbstore single-bit threshold"); 3891 NVSWITCH_REPORT_DATA(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, data); 3892 3893 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3894 NVSWITCH_ERR_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, link, 3895 bAddressValid, address, NV_FALSE, 1); 3896 3897 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3898 } 3899 3900 nvswitch_clear_flags(&unhandled, bit); 3901 } 3902 3903 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_RTO_ERR, 1); 3904 if (nvswitch_test_flags(pending, bit)) 3905 { 3906 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_RTO_ERR, "Red TS crumbstore RTO"); 3907 NVSWITCH_REPORT_DATA(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_RTO_ERR, data); 3908 3909 nvswitch_clear_flags(&unhandled, bit); 3910 } 3911 3912 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 3913 3914 // Disable interrupts that have occurred after fatal error. 3915 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 3916 if (device->link[link].fatal_error_occurred) 3917 { 3918 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_NON_FATAL_REPORT_EN_0, 3919 report.raw_enable ^ pending); 3920 } 3921 3922 if (report.raw_first & report.mask) 3923 { 3924 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FIRST_0, 3925 report.raw_first & report.mask); 3926 } 3927 3928 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0, pending); 3929 3930 if (unhandled != 0) 3931 { 3932 return -NVL_MORE_PROCESSING_REQUIRED; 3933 } 3934 3935 return NVL_SUCCESS; 3936 } 3937 3938 static NvlStatus 3939 _nvswitch_service_reduction_fatal_ls10 3940 ( 3941 nvswitch_device *device, 3942 NvU32 link 3943 ) 3944 { 3945 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 3946 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 3947 NvU32 pending, bit, contain, unhandled; 3948 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 3949 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 3950 3951 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0); 3952 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FATAL_REPORT_EN_0); 3953 report.mask = report.raw_enable & chip_device->intr_mask.mc_tstate.fatal; 3954 pending = report.raw_pending & report.mask; 3955 3956 if (pending == 0) 3957 { 3958 return -NVL_NOT_FOUND; 3959 } 3960 3961 unhandled = pending; 3962 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FIRST_0); 3963 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_CONTAIN_EN_0); 3964 _nvswitch_collect_error_info_ls10(device, link, 3965 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_RED_TIME, 3966 &data); 3967 3968 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1); 3969 if (nvswitch_test_flags(pending, bit)) 3970 { 3971 NvBool bAddressValid = NV_FALSE; 3972 NvU32 address = 0; 3973 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, 3974 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID); 3975 3976 if (FLD_TEST_DRF(_REDUCTIONTSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 3977 addressValid)) 3978 { 3979 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, 3980 _ERR_TAGPOOL_ECC_ERROR_ADDRESS); 3981 bAddressValid = NV_TRUE; 3982 } 3983 3984 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER); 3985 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, 3986 DRF_DEF(_REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 3987 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_DBE_ERR, "Red TS tag store fatal ECC", NV_FALSE); 3988 _nvswitch_collect_error_info_ls10(device, link, 3989 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME | 3990 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC | 3991 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR, 3992 &data); 3993 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_DBE_ERR, data); 3994 nvswitch_clear_flags(&unhandled, bit); 3995 3996 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3997 NVSWITCH_ERR_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_DBE_ERR, link, bAddressValid, 3998 address, NV_TRUE, 1); 3999 4000 nvswitch_inforom_ecc_log_err_event(device, &err_event); 4001 4002 // Clear associated LIMIT_ERR interrupt 4003 if (report.raw_pending & DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1)) 4004 { 4005 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0, 4006 DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1)); 4007 } 4008 } 4009 4010 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1); 4011 if (nvswitch_test_flags(pending, bit)) 4012 { 4013 NvBool bAddressValid = NV_FALSE; 4014 NvU32 address = 0; 4015 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, 4016 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 4017 4018 if (FLD_TEST_DRF(_REDUCTIONTSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 4019 addressValid)) 4020 { 4021 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, 4022 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS); 4023 bAddressValid = NV_TRUE; 4024 } 4025 4026 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER); 4027 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, 4028 DRF_DEF(_REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 4029 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_DBE_ERR, "Red TS crumbstore fatal ECC", NV_FALSE); 4030 _nvswitch_collect_error_info_ls10(device, link, 4031 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME | 4032 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC | 4033 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR, 4034 &data); 4035 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_DBE_ERR, data); 4036 nvswitch_clear_flags(&unhandled, bit); 4037 4038 _nvswitch_construct_ecc_error_event_ls10(&err_event, 4039 NVSWITCH_ERR_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_DBE_ERR, link, bAddressValid, 4040 address, NV_TRUE, 1); 4041 4042 nvswitch_inforom_ecc_log_err_event(device, &err_event); 4043 4044 // Clear associated LIMIT_ERR interrupt 4045 if (report.raw_pending & DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1)) 4046 { 4047 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0, 4048 DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1)); 4049 } 4050 } 4051 4052 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_BUF_OVERWRITE_ERR, 1); 4053 if (nvswitch_test_flags(pending, bit)) 4054 { 4055 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_BUF_OVERWRITE_ERR, "Red crumbstore overwrite", NV_FALSE); 4056 _nvswitch_collect_error_info_ls10(device, link, 4057 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME | 4058 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC | 4059 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR, 4060 &data); 4061 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_BUF_OVERWRITE_ERR, data); 4062 nvswitch_clear_flags(&unhandled, bit); 4063 } 4064 4065 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 4066 4067 // Disable interrupts that have occurred after fatal error. 4068 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 4069 if (device->link[link].fatal_error_occurred) 4070 { 4071 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FATAL_REPORT_EN_0, 4072 report.raw_enable ^ pending); 4073 } 4074 4075 if (report.raw_first & report.mask) 4076 { 4077 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FIRST_0, 4078 report.raw_first & report.mask); 4079 } 4080 4081 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0, pending); 4082 4083 if (unhandled != 0) 4084 { 4085 return -NVL_MORE_PROCESSING_REQUIRED; 4086 } 4087 4088 return NVL_SUCCESS; 4089 } 4090 4091 static NvlStatus 4092 _nvswitch_service_nport_fatal_ls10 4093 ( 4094 nvswitch_device *device, 4095 NvU32 link 4096 ) 4097 { 4098 NvlStatus status[7]; 4099 4100 status[0] = _nvswitch_service_route_fatal_ls10(device, link); 4101 status[1] = _nvswitch_service_ingress_fatal_ls10(device, link); 4102 status[2] = _nvswitch_service_egress_fatal_ls10(device, link); 4103 status[3] = _nvswitch_service_tstate_fatal_ls10(device, link); 4104 status[4] = _nvswitch_service_sourcetrack_fatal_ls10(device, link); 4105 status[5] = _nvswitch_service_multicast_fatal_ls10(device, link); 4106 status[6] = _nvswitch_service_reduction_fatal_ls10(device, link); 4107 4108 if ((status[0] != NVL_SUCCESS) && 4109 (status[1] != NVL_SUCCESS) && 4110 (status[2] != NVL_SUCCESS) && 4111 (status[3] != NVL_SUCCESS) && 4112 (status[4] != NVL_SUCCESS) && 4113 (status[5] != NVL_SUCCESS) && 4114 (status[6] != NVL_SUCCESS)) 4115 { 4116 return -NVL_MORE_PROCESSING_REQUIRED; 4117 } 4118 4119 return NVL_SUCCESS; 4120 } 4121 4122 static NvlStatus 4123 _nvswitch_service_npg_fatal_ls10 4124 ( 4125 nvswitch_device *device, 4126 NvU32 npg 4127 ) 4128 { 4129 NvU32 pending, mask, bit, unhandled; 4130 NvU32 nport; 4131 NvU32 link; 4132 4133 pending = NVSWITCH_ENG_RD32(device, NPG, , npg, _NPG, _NPG_INTERRUPT_STATUS); 4134 4135 if (pending == 0) 4136 { 4137 return -NVL_NOT_FOUND; 4138 } 4139 4140 mask = 4141 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV0_INT_STATUS, _FATAL) | 4142 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV1_INT_STATUS, _FATAL) | 4143 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV2_INT_STATUS, _FATAL) | 4144 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV3_INT_STATUS, _FATAL); 4145 pending &= mask; 4146 unhandled = pending; 4147 4148 for (nport = 0; nport < NVSWITCH_NPORT_PER_NPG_LS10; nport++) 4149 { 4150 switch (nport) 4151 { 4152 case 0: 4153 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV0_INT_STATUS, _FATAL); 4154 break; 4155 case 1: 4156 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV1_INT_STATUS, _FATAL); 4157 break; 4158 case 2: 4159 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV2_INT_STATUS, _FATAL); 4160 break; 4161 case 3: 4162 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV3_INT_STATUS, _FATAL); 4163 break; 4164 } 4165 if (nvswitch_test_flags(pending, bit)) 4166 { 4167 link = NPORT_TO_LINK_LS10(device, npg, nport); 4168 if (NVSWITCH_ENG_IS_VALID(device, NPORT, link)) 4169 { 4170 if (_nvswitch_service_nport_fatal_ls10(device, link) == NVL_SUCCESS) 4171 { 4172 nvswitch_clear_flags(&unhandled, bit); 4173 } 4174 } 4175 } 4176 } 4177 4178 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 4179 4180 if (unhandled != 0) 4181 { 4182 return -NVL_MORE_PROCESSING_REQUIRED; 4183 } 4184 4185 return NVL_SUCCESS; 4186 } 4187 4188 static NvlStatus 4189 _nvswitch_service_nport_nonfatal_ls10 4190 ( 4191 nvswitch_device *device, 4192 NvU32 link 4193 ) 4194 { 4195 NvlStatus status[7]; 4196 4197 status[0] = _nvswitch_service_route_nonfatal_ls10(device, link); 4198 status[1] = _nvswitch_service_ingress_nonfatal_ls10(device, link); 4199 status[2] = _nvswitch_service_egress_nonfatal_ls10(device, link); 4200 status[3] = _nvswitch_service_tstate_nonfatal_ls10(device, link); 4201 status[4] = _nvswitch_service_sourcetrack_nonfatal_ls10(device, link); 4202 status[5] = _nvswitch_service_multicast_nonfatal_ls10(device, link); 4203 status[6] = _nvswitch_service_reduction_nonfatal_ls10(device, link); 4204 4205 if ((status[0] != NVL_SUCCESS) && 4206 (status[1] != NVL_SUCCESS) && 4207 (status[2] != NVL_SUCCESS) && 4208 (status[3] != NVL_SUCCESS) && 4209 (status[4] != NVL_SUCCESS) && 4210 (status[5] != NVL_SUCCESS) && 4211 (status[6] != NVL_SUCCESS)) 4212 { 4213 return -NVL_MORE_PROCESSING_REQUIRED; 4214 } 4215 4216 return NVL_SUCCESS; 4217 } 4218 4219 static NvlStatus 4220 _nvswitch_service_npg_nonfatal_ls10 4221 ( 4222 nvswitch_device *device, 4223 NvU32 npg 4224 ) 4225 { 4226 NvU32 pending, mask, bit, unhandled; 4227 NvU32 nport; 4228 NvU32 link; 4229 4230 pending = NVSWITCH_ENG_RD32(device, NPG, , npg, _NPG, _NPG_INTERRUPT_STATUS); 4231 4232 if (pending == 0) 4233 { 4234 return -NVL_NOT_FOUND; 4235 } 4236 4237 mask = 4238 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV0_INT_STATUS, _NONFATAL) | 4239 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV1_INT_STATUS, _NONFATAL) | 4240 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV2_INT_STATUS, _NONFATAL) | 4241 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV3_INT_STATUS, _NONFATAL); 4242 pending &= mask; 4243 unhandled = pending; 4244 4245 for (nport = 0; nport < NVSWITCH_NPORT_PER_NPG_LS10; nport++) 4246 { 4247 switch (nport) 4248 { 4249 case 0: 4250 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV0_INT_STATUS, _NONFATAL); 4251 break; 4252 case 1: 4253 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV1_INT_STATUS, _NONFATAL); 4254 break; 4255 case 2: 4256 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV2_INT_STATUS, _NONFATAL); 4257 break; 4258 case 3: 4259 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV3_INT_STATUS, _NONFATAL); 4260 break; 4261 } 4262 if (nvswitch_test_flags(pending, bit)) 4263 { 4264 link = NPORT_TO_LINK_LS10(device, npg, nport); 4265 if (NVSWITCH_ENG_IS_VALID(device, NPORT, link)) 4266 { 4267 if (_nvswitch_service_nport_nonfatal_ls10(device, link) == NVL_SUCCESS) 4268 { 4269 nvswitch_clear_flags(&unhandled, bit); 4270 } 4271 } 4272 } 4273 } 4274 4275 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 4276 4277 if (unhandled != 0) 4278 { 4279 return -NVL_MORE_PROCESSING_REQUIRED; 4280 } 4281 4282 return NVL_SUCCESS; 4283 } 4284 4285 static NvlStatus 4286 _nvswitch_service_nvldl_fatal_ls10 4287 ( 4288 nvswitch_device *device, 4289 NvU32 nvlipt_instance 4290 ) 4291 { 4292 NvU64 enabledLinkMask, localLinkMask, localEnabledLinkMask, runtimeErrorMask = 0; 4293 NvU32 i; 4294 nvlink_link *link; 4295 NvU32 clocksMask = NVSWITCH_PER_LINK_CLOCK_SET(RXCLK) | NVSWITCH_PER_LINK_CLOCK_SET(TXCLK); 4296 NvlStatus status = -NVL_MORE_PROCESSING_REQUIRED; 4297 NVSWITCH_LINK_TRAINING_ERROR_INFO linkTrainingErrorInfo = { 0 }; 4298 NVSWITCH_LINK_RUNTIME_ERROR_INFO linkRuntimeErrorInfo = { 0 }; 4299 4300 enabledLinkMask = nvswitch_get_enabled_link_mask(device); 4301 localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(nvlipt_instance); 4302 localEnabledLinkMask = enabledLinkMask & localLinkMask; 4303 4304 FOR_EACH_INDEX_IN_MASK(64, i, localEnabledLinkMask) 4305 { 4306 link = nvswitch_get_link(device, i); 4307 if (link == NULL) 4308 { 4309 // An interrupt on an invalid link should never occur 4310 NVSWITCH_ASSERT(link != NULL); 4311 continue; 4312 } 4313 4314 if (NVSWITCH_GET_LINK_ENG_INST(device, i, NVLIPT) != nvlipt_instance) 4315 { 4316 NVSWITCH_ASSERT(0); 4317 break; 4318 } 4319 4320 if (nvswitch_is_link_in_reset(device, link) || 4321 !nvswitch_are_link_clocks_on_ls10(device, link, clocksMask)) 4322 { 4323 continue; 4324 } 4325 4326 if (device->hal.nvswitch_service_nvldl_fatal_link(device, nvlipt_instance, i) == NVL_SUCCESS) 4327 { 4328 runtimeErrorMask |= NVBIT64(i); 4329 status = NVL_SUCCESS; 4330 } 4331 } 4332 FOR_EACH_INDEX_IN_MASK_END; 4333 4334 linkTrainingErrorInfo.isValid = NV_FALSE; 4335 linkRuntimeErrorInfo.isValid = NV_TRUE; 4336 linkRuntimeErrorInfo.mask0 = runtimeErrorMask; 4337 4338 // Check runtimeErrorMask is non-zero before consuming it further. 4339 if ((runtimeErrorMask != 0) && 4340 (nvswitch_smbpbi_set_link_error_info(device, 4341 &linkTrainingErrorInfo, &linkRuntimeErrorInfo) != NVL_SUCCESS)) 4342 { 4343 NVSWITCH_PRINT(device, ERROR, 4344 "%s: NVLDL[0x%x, 0x%llx]: Unable to send Runtime Error bitmask: 0x%llx,\n", 4345 __FUNCTION__, 4346 nvlipt_instance, localLinkMask, 4347 runtimeErrorMask); 4348 } 4349 4350 return status; 4351 } 4352 4353 static NvlStatus 4354 _nvswitch_service_nvltlc_tx_sys_fatal_ls10 4355 ( 4356 nvswitch_device *device, 4357 NvU32 nvlipt_instance, 4358 NvU32 link 4359 ) 4360 { 4361 NvU32 pending, bit, unhandled; 4362 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 4363 4364 report.raw_pending = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_STATUS_0); 4365 report.raw_enable = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_FATAL_REPORT_EN_0); 4366 report.mask = report.raw_enable; 4367 pending = report.raw_pending & report.mask; 4368 4369 if (pending == 0) 4370 { 4371 return -NVL_NOT_FOUND; 4372 } 4373 4374 unhandled = pending; 4375 report.raw_first = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_FIRST_0); 4376 4377 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _NCISOC_PARITY_ERR, 1); 4378 if (nvswitch_test_flags(pending, bit)) 4379 { 4380 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_NCISOC_PARITY_ERR, "NCISOC Parity Error", NV_FALSE); 4381 nvswitch_clear_flags(&unhandled, bit); 4382 } 4383 4384 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _NCISOC_HDR_ECC_DBE_ERR, 1); 4385 if (nvswitch_test_flags(pending, bit)) 4386 { 4387 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_NCISOC_HDR_ECC_DBE_ERR, "NCISOC HDR ECC DBE Error", NV_FALSE); 4388 nvswitch_clear_flags(&unhandled, bit); 4389 } 4390 4391 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _NCISOC_DAT_ECC_DBE_ERR, 1); 4392 if (nvswitch_test_flags(pending, bit)) 4393 { 4394 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_NCISOC_DAT_ECC_DBE_ERR, "NCISOC DAT ECC DBE Error", NV_FALSE); 4395 nvswitch_clear_flags(&unhandled, bit); 4396 } 4397 4398 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _NCISOC_ECC_LIMIT_ERR, 1); 4399 if (nvswitch_test_flags(pending, bit)) 4400 { 4401 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_NCISOC_ECC_LIMIT_ERR, "NCISOC ECC Limit Error", NV_FALSE); 4402 nvswitch_clear_flags(&unhandled, bit); 4403 } 4404 4405 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _TXPOISONDET, 1); 4406 if (nvswitch_test_flags(pending, bit)) 4407 { 4408 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TXPOISONDET, "Poison Error", NV_FALSE); 4409 nvswitch_clear_flags(&unhandled, bit); 4410 } 4411 4412 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _TXRSPSTATUS_HW_ERR, 1); 4413 if (nvswitch_test_flags(pending, bit)) 4414 { 4415 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_TXRSPSTATUS_HW_ERR, "TX Response Status HW Error", NV_FALSE); 4416 nvswitch_clear_flags(&unhandled, bit); 4417 } 4418 4419 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _TXRSPSTATUS_UR_ERR, 1); 4420 if (nvswitch_test_flags(pending, bit)) 4421 { 4422 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_TXRSPSTATUS_UR_ERR, "TX Response Status UR Error", NV_FALSE); 4423 nvswitch_clear_flags(&unhandled, bit); 4424 } 4425 4426 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _TXRSPSTATUS_PRIV_ERR, 1); 4427 if (nvswitch_test_flags(pending, bit)) 4428 { 4429 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_TXRSPSTATUS_PRIV_ERR, "TX Response Status PRIV Error", NV_FALSE); 4430 nvswitch_clear_flags(&unhandled, bit); 4431 } 4432 4433 if (report.raw_first & report.mask) 4434 { 4435 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_FIRST_0, 4436 report.raw_first & report.mask); 4437 } 4438 4439 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 4440 4441 // Disable interrupts that have occurred after fatal error. 4442 if (device->link[link].fatal_error_occurred) 4443 { 4444 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_FATAL_REPORT_EN_0, 4445 report.raw_enable ^ pending); 4446 } 4447 4448 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_STATUS_0, pending); 4449 4450 if (unhandled != 0) 4451 { 4452 NVSWITCH_PRINT(device, WARN, 4453 "%s: Unhandled NVLTLC_TX_SYS interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 4454 __FUNCTION__, link, pending, report.raw_enable); 4455 return -NVL_MORE_PROCESSING_REQUIRED; 4456 } 4457 4458 return NVL_SUCCESS; 4459 } 4460 4461 static NvlStatus 4462 _nvswitch_service_nvltlc_rx_sys_fatal_ls10 4463 ( 4464 nvswitch_device *device, 4465 NvU32 nvlipt_instance, 4466 NvU32 link 4467 ) 4468 { 4469 NvU32 pending, bit, unhandled; 4470 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 4471 4472 report.raw_pending = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_STATUS_0); 4473 report.raw_enable = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_FATAL_REPORT_EN_0); 4474 report.mask = report.raw_enable; 4475 pending = report.raw_pending & report.mask; 4476 4477 if (pending == 0) 4478 { 4479 return -NVL_NOT_FOUND; 4480 } 4481 4482 unhandled = pending; 4483 report.raw_first = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_FIRST_0); 4484 4485 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _NCISOC_PARITY_ERR, 1); 4486 if (nvswitch_test_flags(pending, bit)) 4487 { 4488 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_NCISOC_PARITY_ERR, "NCISOC Parity Error", NV_FALSE); 4489 nvswitch_clear_flags(&unhandled, bit); 4490 } 4491 4492 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _HDR_RAM_ECC_DBE_ERR, 1); 4493 if (nvswitch_test_flags(pending, bit)) 4494 { 4495 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_HDR_RAM_ECC_DBE_ERR, "HDR RAM ECC DBE Error", NV_FALSE); 4496 nvswitch_clear_flags(&unhandled, bit); 4497 } 4498 4499 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _HDR_RAM_ECC_LIMIT_ERR, 1); 4500 if (nvswitch_test_flags(pending, bit)) 4501 { 4502 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_HDR_RAM_ECC_LIMIT_ERR, "HDR RAM ECC Limit Error", NV_FALSE); 4503 nvswitch_clear_flags(&unhandled, bit); 4504 } 4505 4506 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _DAT0_RAM_ECC_DBE_ERR, 1); 4507 if (nvswitch_test_flags(pending, bit)) 4508 { 4509 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_DAT0_RAM_ECC_DBE_ERR, "DAT0 RAM ECC DBE Error", NV_FALSE); 4510 nvswitch_clear_flags(&unhandled, bit); 4511 } 4512 4513 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _DAT0_RAM_ECC_LIMIT_ERR, 1); 4514 if (nvswitch_test_flags(pending, bit)) 4515 { 4516 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_DAT0_RAM_ECC_LIMIT_ERR, "DAT0 RAM ECC Limit Error", NV_FALSE); 4517 nvswitch_clear_flags(&unhandled, bit); 4518 } 4519 4520 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _DAT1_RAM_ECC_DBE_ERR, 1); 4521 if (nvswitch_test_flags(pending, bit)) 4522 { 4523 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_DAT1_RAM_ECC_DBE_ERR, "DAT1 RAM ECC DBE Error", NV_FALSE); 4524 nvswitch_clear_flags(&unhandled, bit); 4525 } 4526 4527 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _DAT1_RAM_ECC_LIMIT_ERR, 1); 4528 if (nvswitch_test_flags(pending, bit)) 4529 { 4530 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_DAT1_RAM_ECC_LIMIT_ERR, "DAT1 RAM ECC Limit Error", NV_FALSE); 4531 nvswitch_clear_flags(&unhandled, bit); 4532 } 4533 4534 if (report.raw_first & report.mask) 4535 { 4536 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_FIRST_0, 4537 report.raw_first & report.mask); 4538 } 4539 4540 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 4541 4542 // Disable interrupts that have occurred after fatal error. 4543 if (device->link[link].fatal_error_occurred) 4544 { 4545 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_FATAL_REPORT_EN_0, 4546 report.raw_enable ^ pending); 4547 } 4548 4549 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_STATUS_0, pending); 4550 4551 if (unhandled != 0) 4552 { 4553 NVSWITCH_PRINT(device, WARN, 4554 "%s: Unhandled NVLTLC_RX_SYS interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 4555 __FUNCTION__, link, pending, report.raw_enable); 4556 return -NVL_MORE_PROCESSING_REQUIRED; 4557 } 4558 4559 return NVL_SUCCESS; 4560 } 4561 4562 static NvlStatus 4563 _nvswitch_service_nvltlc_tx_lnk_fatal_0_ls10 4564 ( 4565 nvswitch_device *device, 4566 NvU32 nvlipt_instance, 4567 NvU32 link 4568 ) 4569 { 4570 NvU32 pending, bit, unhandled; 4571 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 4572 4573 report.raw_pending = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_0); 4574 report.raw_enable = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FATAL_REPORT_EN_0); 4575 report.mask = report.raw_enable; 4576 pending = report.raw_pending & report.mask; 4577 4578 if (pending == 0) 4579 { 4580 return -NVL_NOT_FOUND; 4581 } 4582 4583 unhandled = pending; 4584 report.raw_first = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_0); 4585 4586 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _TXDLCREDITPARITYERR, 1); 4587 if (nvswitch_test_flags(pending, bit)) 4588 { 4589 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TXDLCREDITPARITYERR, "TX DL Credit Parity Error", NV_FALSE); 4590 nvswitch_clear_flags(&unhandled, bit); 4591 } 4592 4593 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _CREQ_RAM_HDR_ECC_DBE_ERR, 1); 4594 if (nvswitch_test_flags(pending, bit)) 4595 { 4596 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_LNK_CREQ_RAM_HDR_ECC_DBE_ERR, "CREQ RAM HDR ECC DBE Error", NV_FALSE); 4597 nvswitch_clear_flags(&unhandled, bit); 4598 } 4599 4600 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP_RAM_HDR_ECC_DBE_ERR, 1); 4601 if (nvswitch_test_flags(pending, bit)) 4602 { 4603 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_LNK_RSP_RAM_HDR_ECC_DBE_ERR, "Response RAM HDR ECC DBE Error", NV_FALSE); 4604 nvswitch_clear_flags(&unhandled, bit); 4605 } 4606 4607 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _COM_RAM_HDR_ECC_DBE_ERR, 1); 4608 if (nvswitch_test_flags(pending, bit)) 4609 { 4610 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_LNK_COM_RAM_HDR_ECC_DBE_ERR, "COM RAM HDR ECC DBE Error", NV_FALSE); 4611 nvswitch_clear_flags(&unhandled, bit); 4612 } 4613 4614 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP1_RAM_HDR_ECC_DBE_ERR, 1); 4615 if (nvswitch_test_flags(pending, bit)) 4616 { 4617 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_LNK_RSP1_RAM_HDR_ECC_DBE_ERR, "RSP1 RAM HDR ECC DBE Error", NV_FALSE); 4618 nvswitch_clear_flags(&unhandled, bit); 4619 } 4620 4621 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP1_RAM_DAT_ECC_DBE_ERR, 1); 4622 if (nvswitch_test_flags(pending, bit)) 4623 { 4624 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_LNK_RSP1_RAM_DAT_ECC_DBE_ERR, "RSP1 RAM DAT ECC DBE Error", NV_FALSE); 4625 nvswitch_clear_flags(&unhandled, bit); 4626 } 4627 4628 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 4629 4630 // Disable interrupts that have occurred after fatal error. 4631 if (device->link[link].fatal_error_occurred) 4632 { 4633 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FATAL_REPORT_EN_0, 4634 report.raw_enable ^ pending); 4635 } 4636 4637 if (report.raw_first & report.mask) 4638 { 4639 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_0, 4640 report.raw_first & report.mask); 4641 } 4642 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_0, pending); 4643 4644 if (unhandled != 0) 4645 { 4646 NVSWITCH_PRINT(device, WARN, 4647 "%s: Unhandled NVLTLC_TX_LNK _0 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 4648 __FUNCTION__, link, pending, report.raw_enable); 4649 return -NVL_MORE_PROCESSING_REQUIRED; 4650 } 4651 4652 return NVL_SUCCESS; 4653 } 4654 4655 static NvlStatus 4656 _nvswitch_service_nvltlc_rx_lnk_fatal_0_ls10 4657 ( 4658 nvswitch_device *device, 4659 NvU32 nvlipt_instance, 4660 NvU32 link 4661 ) 4662 { 4663 NvU32 pending, bit, unhandled; 4664 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 4665 4666 report.raw_pending = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_0); 4667 report.raw_enable = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FATAL_REPORT_EN_0); 4668 report.mask = report.raw_enable; 4669 pending = report.raw_pending & report.mask; 4670 if (pending == 0) 4671 { 4672 return -NVL_NOT_FOUND; 4673 } 4674 4675 unhandled = pending; 4676 report.raw_first = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_0); 4677 4678 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXDLHDRPARITYERR, 1); 4679 if (nvswitch_test_flags(pending, bit)) 4680 { 4681 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXDLHDRPARITYERR, "RX DL HDR Parity Error", NV_FALSE); 4682 nvswitch_clear_flags(&unhandled, bit); 4683 } 4684 4685 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXDLDATAPARITYERR, 1); 4686 if (nvswitch_test_flags(pending, bit)) 4687 { 4688 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXDLDATAPARITYERR, "RX DL Data Parity Error", NV_FALSE); 4689 nvswitch_clear_flags(&unhandled, bit); 4690 } 4691 4692 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXDLCTRLPARITYERR, 1); 4693 if (nvswitch_test_flags(pending, bit)) 4694 { 4695 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXDLCTRLPARITYERR, "RX DL Ctrl Parity Error", NV_FALSE); 4696 nvswitch_clear_flags(&unhandled, bit); 4697 } 4698 4699 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXPKTLENERR, 1); 4700 if (nvswitch_test_flags(pending, bit)) 4701 { 4702 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXPKTLENERR, "RX Packet Length Error", NV_FALSE); 4703 nvswitch_clear_flags(&unhandled, bit); 4704 } 4705 4706 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RSVCACHEATTRPROBEREQERR, 1); 4707 if (nvswitch_test_flags(pending, bit)) 4708 { 4709 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RSVCACHEATTRPROBEREQERR, "RSV Packet Status Error", NV_FALSE); 4710 nvswitch_clear_flags(&unhandled, bit); 4711 } 4712 4713 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RSVCACHEATTRPROBERSPERR, 1); 4714 if (nvswitch_test_flags(pending, bit)) 4715 { 4716 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RSVCACHEATTRPROBERSPERR, "RSV CacheAttr Probe Rsp Error", NV_FALSE); 4717 nvswitch_clear_flags(&unhandled, bit); 4718 } 4719 4720 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _DATLENGTRMWREQMAXERR, 1); 4721 if (nvswitch_test_flags(pending, bit)) 4722 { 4723 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_DATLENGTRMWREQMAXERR, "Data Length RMW Req Max Error", NV_FALSE); 4724 nvswitch_clear_flags(&unhandled, bit); 4725 } 4726 4727 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _DATLENLTATRRSPMINERR, 1); 4728 if (nvswitch_test_flags(pending, bit)) 4729 { 4730 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_DATLENLTATRRSPMINERR, "Data Len Lt ATR RSP Min Error", NV_FALSE); 4731 nvswitch_clear_flags(&unhandled, bit); 4732 } 4733 4734 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _INVALIDCACHEATTRPOERR, 1); 4735 if (nvswitch_test_flags(pending, bit)) 4736 { 4737 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_INVALIDCACHEATTRPOERR, "Invalid Cache Attr PO Error", NV_FALSE); 4738 nvswitch_clear_flags(&unhandled, bit); 4739 } 4740 4741 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXRSPSTATUS_HW_ERR, 1); 4742 if (nvswitch_test_flags(pending, bit)) 4743 { 4744 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_LNK_RXRSPSTATUS_HW_ERR, "RX Rsp Status HW Error", NV_FALSE); 4745 nvswitch_clear_flags(&unhandled, bit); 4746 } 4747 4748 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXRSPSTATUS_UR_ERR, 1); 4749 if (nvswitch_test_flags(pending, bit)) 4750 { 4751 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_LNK_RXRSPSTATUS_UR_ERR, "RX Rsp Status UR Error", NV_FALSE); 4752 nvswitch_clear_flags(&unhandled, bit); 4753 } 4754 4755 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _INVALID_COLLAPSED_RESPONSE_ERR, 1); 4756 if (nvswitch_test_flags(pending, bit)) 4757 { 4758 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_LNK_INVALID_COLLAPSED_RESPONSE_ERR, "Invalid Collapsed Response Error", NV_FALSE); 4759 nvswitch_clear_flags(&unhandled, bit); 4760 } 4761 4762 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 4763 4764 // Disable interrupts that have occurred after fatal error. 4765 if (device->link[link].fatal_error_occurred) 4766 { 4767 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FATAL_REPORT_EN_0, 4768 report.raw_enable ^ pending); 4769 } 4770 4771 if (report.raw_first & report.mask) 4772 { 4773 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_0, 4774 report.raw_first & report.mask); 4775 } 4776 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_0, pending); 4777 4778 if (unhandled != 0) 4779 { 4780 NVSWITCH_PRINT(device, WARN, 4781 "%s: Unhandled NVLTLC_RX_LNK _0 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 4782 __FUNCTION__, link, pending, report.raw_enable); 4783 return -NVL_MORE_PROCESSING_REQUIRED; 4784 } 4785 4786 return NVL_SUCCESS; 4787 } 4788 4789 static NvlStatus 4790 _nvswitch_service_nvltlc_rx_lnk_fatal_1_ls10 4791 ( 4792 nvswitch_device *device, 4793 NvU32 nvlipt_instance, 4794 NvU32 link 4795 ) 4796 { 4797 NvU32 pending, bit, unhandled; 4798 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 4799 4800 report.raw_pending = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_1); 4801 report.raw_enable = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FATAL_REPORT_EN_1); 4802 report.mask = report.raw_enable; 4803 pending = report.raw_pending & report.mask; 4804 4805 if (pending == 0) 4806 { 4807 return -NVL_NOT_FOUND; 4808 } 4809 4810 unhandled = pending; 4811 report.raw_first = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_1); 4812 4813 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _RXHDROVFERR, 1); 4814 if (nvswitch_test_flags(pending, bit)) 4815 { 4816 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXHDROVFERR, "RX HDR OVF Error", NV_FALSE); 4817 nvswitch_clear_flags(&unhandled, bit); 4818 } 4819 4820 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _RXDATAOVFERR, 1); 4821 if (nvswitch_test_flags(pending, bit)) 4822 { 4823 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXDATAOVFERR, "RX Data OVF Error", NV_FALSE); 4824 nvswitch_clear_flags(&unhandled, bit); 4825 } 4826 4827 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _STOMPDETERR, 1); 4828 if (nvswitch_test_flags(pending, bit)) 4829 { 4830 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_STOMPDETERR, "Stomp Det Error", NV_FALSE); 4831 nvswitch_clear_flags(&unhandled, bit); 4832 } 4833 4834 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _RXPOISONERR, 1); 4835 if (nvswitch_test_flags(pending, bit)) 4836 { 4837 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXPOISONERR, "RX Poison Error", NV_FALSE); 4838 nvswitch_clear_flags(&unhandled, bit); 4839 } 4840 4841 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 4842 4843 // Disable interrupts that have occurred after fatal error. 4844 if (device->link[link].fatal_error_occurred) 4845 { 4846 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FATAL_REPORT_EN_1, 4847 report.raw_enable ^ pending); 4848 } 4849 4850 if (report.raw_first & report.mask) 4851 { 4852 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_1, 4853 report.raw_first & report.mask); 4854 } 4855 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_1, pending); 4856 4857 if (unhandled != 0) 4858 { 4859 NVSWITCH_PRINT(device, WARN, 4860 "%s: Unhandled NVLTLC_RX_LNK _1 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 4861 __FUNCTION__, link, pending, report.raw_enable); 4862 return -NVL_MORE_PROCESSING_REQUIRED; 4863 } 4864 4865 return NVL_SUCCESS; 4866 } 4867 4868 NvlStatus 4869 _nvswitch_service_nvltlc_fatal_ls10 4870 ( 4871 nvswitch_device *device, 4872 NvU32 nvlipt_instance 4873 ) 4874 { 4875 NvU64 enabledLinkMask, localLinkMask, localEnabledLinkMask; 4876 NvU32 i; 4877 nvlink_link *link; 4878 NvlStatus status = -NVL_MORE_PROCESSING_REQUIRED; 4879 4880 enabledLinkMask = nvswitch_get_enabled_link_mask(device); 4881 localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(nvlipt_instance); 4882 localEnabledLinkMask = enabledLinkMask & localLinkMask; 4883 4884 FOR_EACH_INDEX_IN_MASK(64, i, localEnabledLinkMask) 4885 { 4886 link = nvswitch_get_link(device, i); 4887 if (link == NULL) 4888 { 4889 // An interrupt on an invalid link should never occur 4890 NVSWITCH_ASSERT(link != NULL); 4891 continue; 4892 } 4893 4894 if (NVSWITCH_GET_LINK_ENG_INST(device, i, NVLIPT) != nvlipt_instance) 4895 { 4896 NVSWITCH_ASSERT(0); 4897 break; 4898 } 4899 4900 // 4901 // If link is in reset or NCISOC clock is off then 4902 // don't need to check the link for NVLTLC errors 4903 // as the IP's registers are off 4904 // 4905 if (nvswitch_is_link_in_reset(device, link) || 4906 !nvswitch_are_link_clocks_on_ls10(device, link,NVSWITCH_PER_LINK_CLOCK_SET(NCISOCCLK))); 4907 { 4908 continue; 4909 } 4910 4911 if (_nvswitch_service_nvltlc_tx_sys_fatal_ls10(device, nvlipt_instance, i) == NVL_SUCCESS) 4912 { 4913 status = NVL_SUCCESS; 4914 } 4915 4916 if (_nvswitch_service_nvltlc_rx_sys_fatal_ls10(device, nvlipt_instance, i) == NVL_SUCCESS) 4917 { 4918 status = NVL_SUCCESS; 4919 } 4920 4921 if (_nvswitch_service_nvltlc_tx_lnk_fatal_0_ls10(device, nvlipt_instance, i) == NVL_SUCCESS) 4922 { 4923 status = NVL_SUCCESS; 4924 } 4925 4926 if (_nvswitch_service_nvltlc_rx_lnk_fatal_0_ls10(device, nvlipt_instance, i) == NVL_SUCCESS) 4927 { 4928 status = NVL_SUCCESS; 4929 } 4930 4931 if (_nvswitch_service_nvltlc_rx_lnk_fatal_1_ls10(device, nvlipt_instance, i) == NVL_SUCCESS) 4932 { 4933 status = NVL_SUCCESS; 4934 } 4935 } 4936 FOR_EACH_INDEX_IN_MASK_END; 4937 4938 return status; 4939 } 4940 4941 static NvlStatus 4942 _nvswitch_service_nvlipt_common_fatal_ls10 4943 ( 4944 nvswitch_device *device, 4945 NvU32 instance 4946 ) 4947 { 4948 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 4949 NvU32 pending, bit, contain, unhandled; 4950 NvU32 link, local_link_idx; 4951 4952 report.raw_pending = NVSWITCH_ENG_RD32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_STATUS_0); 4953 report.raw_enable = NVSWITCH_ENG_RD32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_FATAL_REPORT_EN_0); 4954 report.mask = report.raw_enable & (DRF_NUM(_NVLIPT_COMMON, _ERR_STATUS_0, _CLKCTL_ILLEGAL_REQUEST, 1)); 4955 4956 pending = report.raw_pending & report.mask; 4957 if (pending == 0) 4958 { 4959 return -NVL_NOT_FOUND; 4960 } 4961 4962 unhandled = pending; 4963 report.raw_first = NVSWITCH_ENG_RD32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_FIRST_0); 4964 contain = NVSWITCH_ENG_RD32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_CONTAIN_EN_0); 4965 4966 bit = DRF_NUM(_NVLIPT_COMMON, _ERR_STATUS_0, _CLKCTL_ILLEGAL_REQUEST, 1); 4967 if (nvswitch_test_flags(pending, bit)) 4968 { 4969 for (local_link_idx = 0; local_link_idx < NVSWITCH_LINKS_PER_NVLIPT_LS10; local_link_idx++) 4970 { 4971 link = (instance * NVSWITCH_LINKS_PER_NVLIPT_LS10) + local_link_idx; 4972 if (nvswitch_is_link_valid(device, link)) 4973 { 4974 NVSWITCH_REPORT_CONTAIN(_HW_NVLIPT_CLKCTL_ILLEGAL_REQUEST, "CLKCTL_ILLEGAL_REQUEST", NV_FALSE); 4975 } 4976 } 4977 4978 nvswitch_clear_flags(&unhandled, bit); 4979 } 4980 4981 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 4982 4983 // Disable interrupts that have occurred after fatal error. 4984 for (local_link_idx = 0; local_link_idx < NVSWITCH_LINKS_PER_NVLIPT_LS10; local_link_idx++) 4985 { 4986 link = (instance * NVSWITCH_LINKS_PER_NVLIPT_LS10) + local_link_idx; 4987 if (nvswitch_is_link_valid(device, link) && 4988 (device->link[link].fatal_error_occurred)) 4989 { 4990 NVSWITCH_ENG_WR32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_FATAL_REPORT_EN_0, 4991 report.raw_enable ^ pending); 4992 break; 4993 } 4994 } 4995 4996 // clear the interrupts 4997 if (report.raw_first & report.mask) 4998 { 4999 NVSWITCH_ENG_WR32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_FIRST_0, 5000 report.raw_first & report.mask); 5001 } 5002 NVSWITCH_ENG_WR32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_STATUS_0, pending); 5003 5004 if (unhandled != 0) 5005 { 5006 NVSWITCH_PRINT(device, WARN, 5007 "%s: Unhandled NVLIPT_COMMON FATAL interrupts, pending: 0x%x enabled: 0x%x.\n", 5008 __FUNCTION__, pending, report.raw_enable); 5009 return -NVL_MORE_PROCESSING_REQUIRED; 5010 } 5011 5012 return NVL_SUCCESS; 5013 } 5014 5015 static NvlStatus 5016 _nvswitch_service_nxbar_tile_ls10 5017 ( 5018 nvswitch_device *device, 5019 NvU32 tile 5020 ) 5021 { 5022 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5023 NvU32 pending, bit, unhandled; 5024 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5025 NvU32 link = tile; 5026 5027 report.raw_pending = NVSWITCH_TILE_RD32(device, tile, _NXBAR_TILE, _ERR_STATUS); 5028 report.raw_enable = NVSWITCH_TILE_RD32(device, tile, _NXBAR_TILE, _ERR_FATAL_INTR_EN); 5029 report.mask = chip_device->intr_mask.tile.fatal; 5030 pending = report.raw_pending & report.mask; 5031 5032 if (pending == 0) 5033 { 5034 return -NVL_NOT_FOUND; 5035 } 5036 5037 unhandled = pending; 5038 report.raw_first = NVSWITCH_TILE_RD32(device, tile, _NXBAR_TILE, _ERR_FIRST); 5039 5040 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_BUFFER_OVERFLOW, 1); 5041 if (nvswitch_test_flags(pending, bit)) 5042 { 5043 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_BUFFER_OVERFLOW, "ingress SRC-VC buffer overflow", NV_TRUE); 5044 nvswitch_clear_flags(&unhandled, bit); 5045 } 5046 5047 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_BUFFER_UNDERFLOW, 1); 5048 if (nvswitch_test_flags(pending, bit)) 5049 { 5050 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_BUFFER_UNDERFLOW, "ingress SRC-VC buffer underflow", NV_TRUE); 5051 nvswitch_clear_flags(&unhandled, bit); 5052 } 5053 5054 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _EGRESS_CREDIT_OVERFLOW, 1); 5055 if (nvswitch_test_flags(pending, bit)) 5056 { 5057 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_EGRESS_CREDIT_OVERFLOW, "egress DST-VC credit overflow", NV_TRUE); 5058 nvswitch_clear_flags(&unhandled, bit); 5059 } 5060 5061 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _EGRESS_CREDIT_UNDERFLOW, 1); 5062 if (nvswitch_test_flags(pending, bit)) 5063 { 5064 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_EGRESS_CREDIT_UNDERFLOW, "egress DST-VC credit underflow", NV_TRUE); 5065 nvswitch_clear_flags(&unhandled, bit); 5066 } 5067 5068 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_NON_BURSTY_PKT, 1); 5069 if (nvswitch_test_flags(pending, bit)) 5070 { 5071 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_NON_BURSTY_PKT, "ingress packet burst error", NV_TRUE); 5072 nvswitch_clear_flags(&unhandled, bit); 5073 } 5074 5075 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_NON_STICKY_PKT, 1); 5076 if (nvswitch_test_flags(pending, bit)) 5077 { 5078 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_NON_STICKY_PKT, "ingress packet sticky error", NV_TRUE); 5079 nvswitch_clear_flags(&unhandled, bit); 5080 } 5081 5082 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_BURST_GT_9_DATA_VC, 1); 5083 if (nvswitch_test_flags(pending, bit)) 5084 { 5085 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_BURST_GT_9_DATA_VC, "possible bubbles at ingress", NV_TRUE); 5086 nvswitch_clear_flags(&unhandled, bit); 5087 } 5088 5089 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_PKT_INVALID_DST, 1); 5090 if (nvswitch_test_flags(pending, bit)) 5091 { 5092 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_PKT_INVALID_DST, "ingress packet invalid dst error", NV_TRUE); 5093 nvswitch_clear_flags(&unhandled, bit); 5094 } 5095 5096 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_PKT_PARITY_ERROR, 1); 5097 if (nvswitch_test_flags(pending, bit)) 5098 { 5099 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_PKT_PARITY_ERROR, "ingress packet parity error", NV_TRUE); 5100 nvswitch_clear_flags(&unhandled, bit); 5101 } 5102 5103 if (report.raw_first & report.mask) 5104 { 5105 NVSWITCH_TILE_WR32(device, tile, _NXBAR_TILE, _ERR_FIRST, 5106 report.raw_first & report.mask); 5107 } 5108 5109 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 5110 5111 // Disable interrupts that have occurred after fatal error. 5112 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 5113 NVSWITCH_TILE_WR32(device, tile, _NXBAR_TILE, _ERR_FATAL_INTR_EN, 5114 report.raw_enable ^ pending); 5115 5116 NVSWITCH_TILE_WR32(device, link, _NXBAR_TILE, _ERR_STATUS, pending); 5117 5118 if (unhandled != 0) 5119 { 5120 return -NVL_MORE_PROCESSING_REQUIRED; 5121 } 5122 5123 return NVL_SUCCESS; 5124 } 5125 5126 static NvlStatus 5127 _nvswitch_service_nxbar_tileout_ls10 5128 ( 5129 nvswitch_device *device, 5130 NvU32 tileout 5131 ) 5132 { 5133 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5134 NvU32 pending, bit, unhandled; 5135 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5136 NvU32 link = tileout; 5137 5138 report.raw_pending = NVSWITCH_TILEOUT_RD32(device, tileout, _NXBAR_TILEOUT, _ERR_STATUS); 5139 report.raw_enable = NVSWITCH_TILEOUT_RD32(device, tileout, _NXBAR_TILEOUT, _ERR_FATAL_INTR_EN); 5140 report.mask = chip_device->intr_mask.tileout.fatal; 5141 pending = report.raw_pending & report.mask; 5142 5143 if (pending == 0) 5144 { 5145 return -NVL_NOT_FOUND; 5146 } 5147 5148 unhandled = pending; 5149 report.raw_first = NVSWITCH_TILEOUT_RD32(device, tileout, _NXBAR_TILEOUT, _ERR_FIRST); 5150 5151 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _INGRESS_BUFFER_OVERFLOW, 1); 5152 if (nvswitch_test_flags(pending, bit)) 5153 { 5154 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_INGRESS_BUFFER_OVERFLOW, "ingress SRC-VC buffer overflow", NV_TRUE); 5155 nvswitch_clear_flags(&unhandled, bit); 5156 } 5157 5158 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _INGRESS_BUFFER_UNDERFLOW, 1); 5159 if (nvswitch_test_flags(pending, bit)) 5160 { 5161 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_INGRESS_BUFFER_UNDERFLOW, "ingress SRC-VC buffer underflow", NV_TRUE); 5162 nvswitch_clear_flags(&unhandled, bit); 5163 } 5164 5165 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _EGRESS_CREDIT_OVERFLOW, 1); 5166 if (nvswitch_test_flags(pending, bit)) 5167 { 5168 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_EGRESS_CREDIT_OVERFLOW, "egress DST-VC credit overflow", NV_TRUE); 5169 nvswitch_clear_flags(&unhandled, bit); 5170 } 5171 5172 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _EGRESS_CREDIT_UNDERFLOW, 1); 5173 if (nvswitch_test_flags(pending, bit)) 5174 { 5175 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_EGRESS_CREDIT_UNDERFLOW, "egress DST-VC credit underflow", NV_TRUE); 5176 nvswitch_clear_flags(&unhandled, bit); 5177 } 5178 5179 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _INGRESS_NON_BURSTY_PKT, 1); 5180 if (nvswitch_test_flags(pending, bit)) 5181 { 5182 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_INGRESS_NON_BURSTY_PKT, "ingress packet burst error", NV_TRUE); 5183 nvswitch_clear_flags(&unhandled, bit); 5184 } 5185 5186 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _INGRESS_NON_STICKY_PKT, 1); 5187 if (nvswitch_test_flags(pending, bit)) 5188 { 5189 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_INGRESS_NON_STICKY_PKT, "ingress packet sticky error", NV_TRUE); 5190 nvswitch_clear_flags(&unhandled, bit); 5191 } 5192 5193 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _INGRESS_BURST_GT_9_DATA_VC, 1); 5194 if (nvswitch_test_flags(pending, bit)) 5195 { 5196 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_INGRESS_BURST_GT_9_DATA_VC, "possible bubbles at ingress", NV_TRUE); 5197 nvswitch_clear_flags(&unhandled, bit); 5198 } 5199 5200 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _EGRESS_CDT_PARITY_ERROR, 1); 5201 if (nvswitch_test_flags(pending, bit)) 5202 { 5203 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_EGRESS_CDT_PARITY_ERROR, "ingress credit parity error", NV_TRUE); 5204 nvswitch_clear_flags(&unhandled, bit); 5205 } 5206 5207 if (report.raw_first & report.mask) 5208 { 5209 NVSWITCH_TILEOUT_WR32(device, tileout, _NXBAR_TILEOUT, _ERR_FIRST, 5210 report.raw_first & report.mask); 5211 } 5212 5213 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 5214 5215 // Disable interrupts that have occurred after fatal error. 5216 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 5217 NVSWITCH_TILEOUT_WR32(device, tileout, _NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, 5218 report.raw_enable ^ pending); 5219 5220 NVSWITCH_TILEOUT_WR32(device, tileout, _NXBAR_TILEOUT, _ERR_STATUS, pending); 5221 5222 if (unhandled != 0) 5223 { 5224 return -NVL_MORE_PROCESSING_REQUIRED; 5225 } 5226 5227 return NVL_SUCCESS; 5228 } 5229 5230 static NvlStatus 5231 _nvswitch_service_nxbar_fatal_ls10 5232 ( 5233 nvswitch_device *device, 5234 NvU32 nxbar 5235 ) 5236 { 5237 NvU32 pending, bit, unhandled; 5238 NvU32 tile_idx; 5239 NvU32 tile, tileout; 5240 5241 pending = NVSWITCH_ENG_RD32(device, NXBAR, , nxbar, _NXBAR, _TCP_ERROR_STATUS); 5242 if (pending == 0) 5243 { 5244 return -NVL_NOT_FOUND; 5245 } 5246 5247 unhandled = pending; 5248 5249 for (tile = 0; tile < NUM_NXBAR_TILES_PER_TC_LS10; tile++) 5250 { 5251 bit = DRF_NUM(_NXBAR, _TCP_ERROR_STATUS, _TILE0, 1) << tile; 5252 if (nvswitch_test_flags(pending, bit)) 5253 { 5254 tile_idx = TILE_INDEX_LS10(device, nxbar, tile); 5255 if (NVSWITCH_ENG_VALID_LS10(device, TILE, tile_idx)) 5256 { 5257 if (_nvswitch_service_nxbar_tile_ls10(device, tile_idx) == NVL_SUCCESS) 5258 { 5259 nvswitch_clear_flags(&unhandled, bit); 5260 } 5261 } 5262 } 5263 } 5264 5265 for (tileout = 0; tileout < NUM_NXBAR_TILEOUTS_PER_TC_LS10; tileout++) 5266 { 5267 bit = DRF_NUM(_NXBAR, _TCP_ERROR_STATUS, _TILEOUT0, 1) << tileout; 5268 if (nvswitch_test_flags(pending, bit)) 5269 { 5270 tile_idx = TILE_INDEX_LS10(device, nxbar, tileout); 5271 if (NVSWITCH_ENG_VALID_LS10(device, TILEOUT, tile_idx)) 5272 { 5273 if (_nvswitch_service_nxbar_tileout_ls10(device, tile_idx) == NVL_SUCCESS) 5274 { 5275 nvswitch_clear_flags(&unhandled, bit); 5276 } 5277 } 5278 } 5279 } 5280 5281 // TODO: Perform hot_reset to recover NXBAR 5282 5283 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 5284 5285 5286 if (unhandled != 0) 5287 { 5288 return -NVL_MORE_PROCESSING_REQUIRED; 5289 } 5290 5291 return NVL_SUCCESS; 5292 } 5293 5294 static void 5295 _nvswitch_emit_link_errors_nvldl_fatal_link_ls10 5296 ( 5297 nvswitch_device *device, 5298 NvU32 nvlipt_instance, 5299 NvU32 link 5300 ) 5301 { 5302 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5303 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5304 NvU32 pending, bit; 5305 5306 // Only enabled link errors are deffered 5307 pending = chip_device->deferredLinkErrors[link].fatalIntrMask.dl; 5308 report.raw_pending = pending; 5309 report.raw_enable = pending; 5310 report.mask = report.raw_enable; 5311 5312 bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1); 5313 if (nvswitch_test_flags(pending, bit)) 5314 { 5315 NVSWITCH_REPORT_FATAL(_HW_DLPL_LTSSM_FAULT_UP, "LTSSM Fault Up", NV_FALSE); 5316 } 5317 } 5318 5319 static void 5320 _nvswitch_emit_link_errors_nvldl_nonfatal_link_ls10 5321 ( 5322 nvswitch_device *device, 5323 NvU32 link 5324 ) 5325 { 5326 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5327 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5328 NvU32 pending, bit; 5329 5330 // Only enabled link errors are deffered 5331 pending = chip_device->deferredLinkErrors[link].nonFatalIntrMask.dl; 5332 report.raw_pending = pending; 5333 report.raw_enable = pending; 5334 report.mask = report.raw_enable; 5335 5336 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_SHORT_ERROR_RATE, 1); 5337 if (nvswitch_test_flags(pending, bit)) 5338 { 5339 // Disable further interrupts 5340 nvlink_link *nvlink = nvswitch_get_link(device, link); 5341 nvlink->errorThreshold.bInterruptTrigerred = NV_TRUE; 5342 nvswitch_configure_error_rate_threshold_interrupt_ls10(nvlink, NV_FALSE); 5343 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_RX_SHORT_ERROR_RATE, "RX Short Error Rate"); 5344 } 5345 } 5346 5347 static void 5348 _nvswitch_emit_link_errors_nvltlc_rx_lnk_nonfatal_1_ls10 5349 ( 5350 nvswitch_device *device, 5351 NvU32 nvlipt_instance, 5352 NvU32 link 5353 ) 5354 { 5355 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5356 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5357 NvU32 pending, bit, injected; 5358 5359 // Only enabled link errors are deffered 5360 pending = chip_device->deferredLinkErrors[link].nonFatalIntrMask.tlcRx1; 5361 injected = chip_device->deferredLinkErrors[link].nonFatalIntrMask.tlcRx1Injected; 5362 report.raw_pending = pending; 5363 report.raw_enable = pending; 5364 report.mask = report.raw_enable; 5365 5366 5367 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _HEARTBEAT_TIMEOUT_ERR, 1); 5368 if (nvswitch_test_flags(pending, bit)) 5369 { 5370 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_RX_LNK_AN1_HEARTBEAT_TIMEOUT_ERR, "AN1 Heartbeat Timeout Error"); 5371 5372 if (FLD_TEST_DRF_NUM(_NVLTLC_RX_LNK, _ERR_REPORT_INJECT_1, _HEARTBEAT_TIMEOUT_ERR, 0x0, injected)) 5373 { 5374 } 5375 } 5376 } 5377 5378 static void 5379 _nvswitch_emit_link_errors_nvlipt_lnk_nonfatal_ls10 5380 ( 5381 nvswitch_device *device, 5382 NvU32 nvlipt_instance, 5383 NvU32 link 5384 ) 5385 { 5386 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5387 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5388 NvU32 pending, bit; 5389 5390 // Only enabled link errors are deffered 5391 pending = chip_device->deferredLinkErrors[link].nonFatalIntrMask.liptLnk; 5392 report.raw_pending = pending; 5393 report.raw_enable = pending; 5394 report.mask = report.raw_enable; 5395 5396 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _FAILEDMINIONREQUEST, 1); 5397 if (nvswitch_test_flags(pending, bit)) 5398 { 5399 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_FAILEDMINIONREQUEST, "_FAILEDMINIONREQUEST"); 5400 5401 } 5402 } 5403 5404 static void 5405 _nvswitch_emit_deferred_link_errors_ls10 5406 ( 5407 nvswitch_device *device, 5408 NvU32 nvlipt_instance, 5409 NvU32 link 5410 ) 5411 { 5412 _nvswitch_emit_link_errors_nvldl_fatal_link_ls10(device, nvlipt_instance, link); 5413 _nvswitch_emit_link_errors_nvldl_nonfatal_link_ls10(device, link); 5414 _nvswitch_emit_link_errors_nvltlc_rx_lnk_nonfatal_1_ls10(device, nvlipt_instance, link); 5415 _nvswitch_emit_link_errors_nvlipt_lnk_nonfatal_ls10(device, nvlipt_instance, link); 5416 } 5417 5418 static void 5419 _nvswitch_clear_deferred_link_errors_ls10 5420 ( 5421 nvswitch_device *device, 5422 NvU32 link 5423 ) 5424 { 5425 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5426 NVLINK_LINK_ERROR_REPORTING *pLinkErrors; 5427 5428 pLinkErrors = &chip_device->deferredLinkErrors[link]; 5429 5430 nvswitch_os_memset(pLinkErrors, 0, sizeof(NVLINK_LINK_ERROR_REPORTING)); 5431 } 5432 5433 static void 5434 _nvswitch_deferred_link_state_check_ls10 5435 ( 5436 nvswitch_device *device, 5437 void *fn_args 5438 ) 5439 { 5440 NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams = 5441 (NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS*)fn_args; 5442 NvU32 nvlipt_instance = pErrorReportParams->nvlipt_instance; 5443 NvU32 link = pErrorReportParams->link; 5444 ls10_device *chip_device; 5445 nvlink_link *pLink; 5446 NvU64 linkState; 5447 5448 chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5449 pLink = nvswitch_get_link(device, pErrorReportParams->link); 5450 5451 // If is there a retry for reset_and_drain then re-create the state check for the current link 5452 if (chip_device->deferredLinkErrors[link].bResetAndDrainRetry == NV_TRUE) 5453 { 5454 if (pErrorReportParams) 5455 { 5456 nvswitch_os_free(pErrorReportParams); 5457 } 5458 5459 chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled = NV_FALSE; 5460 chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_FALSE; 5461 nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link); 5462 return; 5463 } 5464 5465 if ((pLink == NULL) || 5466 (device->hal.nvswitch_corelib_get_dl_link_mode(pLink, &linkState) != NVL_SUCCESS) || 5467 ((linkState != NVLINK_LINKSTATE_HS) && (linkState != NVLINK_LINKSTATE_SLEEP))) 5468 { 5469 _nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link); 5470 } 5471 5472 _nvswitch_clear_deferred_link_errors_ls10(device, link); 5473 nvswitch_os_free(pErrorReportParams); 5474 chip_device->deferredLinkErrors[link].bLinkStateCallBackEnabled = NV_FALSE; 5475 } 5476 5477 void 5478 nvswitch_create_deferred_link_state_check_task_ls10 5479 ( 5480 nvswitch_device *device, 5481 NvU32 nvlipt_instance, 5482 NvU32 link 5483 ) 5484 { 5485 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5486 NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams; 5487 NvlStatus status; 5488 5489 if (chip_device->deferredLinkErrors[link].bLinkStateCallBackEnabled) 5490 { 5491 return; 5492 } 5493 5494 status = NVL_ERR_GENERIC; 5495 pErrorReportParams = nvswitch_os_malloc(sizeof(NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS)); 5496 if(pErrorReportParams != NULL) 5497 { 5498 pErrorReportParams->nvlipt_instance = nvlipt_instance; 5499 pErrorReportParams->link = link; 5500 5501 status = nvswitch_task_create_args(device, (void*)pErrorReportParams, 5502 &_nvswitch_deferred_link_state_check_ls10, 5503 NVSWITCH_DEFERRED_LINK_STATE_CHECK_INTERVAL_NS, 5504 NVSWITCH_TASK_TYPE_FLAGS_RUN_ONCE | 5505 NVSWITCH_TASK_TYPE_FLAGS_VOID_PTR_ARGS); 5506 } 5507 5508 if (status == NVL_SUCCESS) 5509 { 5510 chip_device->deferredLinkErrors[link].bLinkStateCallBackEnabled = NV_TRUE; 5511 } 5512 else 5513 { 5514 NVSWITCH_PRINT(device, ERROR, 5515 "%s: Failed to allocate memory. Cannot defer link state check.\n", 5516 __FUNCTION__); 5517 _nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link); 5518 _nvswitch_clear_deferred_link_errors_ls10(device, link); 5519 nvswitch_os_free(pErrorReportParams); 5520 } 5521 } 5522 5523 static void 5524 _nvswitch_deferred_link_errors_check_ls10 5525 ( 5526 nvswitch_device *device, 5527 void *fn_args 5528 ) 5529 { 5530 NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams = 5531 (NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS*)fn_args; 5532 NvU32 nvlipt_instance = pErrorReportParams->nvlipt_instance; 5533 NvU32 link = pErrorReportParams->link; 5534 ls10_device *chip_device; 5535 NvU32 pending, bit; 5536 5537 chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5538 5539 pending = chip_device->deferredLinkErrors[link].fatalIntrMask.dl; 5540 bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1); 5541 if (nvswitch_test_flags(pending, bit)) 5542 { 5543 nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link); 5544 } 5545 else 5546 { 5547 _nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link); 5548 _nvswitch_clear_deferred_link_errors_ls10(device, link); 5549 } 5550 5551 if (pErrorReportParams) 5552 { 5553 nvswitch_os_free(pErrorReportParams); 5554 } 5555 chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled = NV_FALSE; 5556 } 5557 5558 static void 5559 _nvswitch_create_deferred_link_errors_task_ls10 5560 ( 5561 nvswitch_device *device, 5562 NvU32 nvlipt_instance, 5563 NvU32 link 5564 ) 5565 { 5566 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5567 NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams; 5568 NvlStatus status; 5569 5570 if (chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled) 5571 { 5572 return; 5573 } 5574 5575 chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_FALSE; 5576 5577 status = NVL_ERR_GENERIC; 5578 pErrorReportParams = nvswitch_os_malloc(sizeof(NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS)); 5579 if(pErrorReportParams != NULL) 5580 { 5581 pErrorReportParams->nvlipt_instance = nvlipt_instance; 5582 pErrorReportParams->link = link; 5583 5584 status = nvswitch_task_create_args(device, (void*)pErrorReportParams, 5585 &_nvswitch_deferred_link_errors_check_ls10, 5586 NVSWITCH_DEFERRED_FAULT_UP_CHECK_INTERVAL_NS, 5587 NVSWITCH_TASK_TYPE_FLAGS_RUN_ONCE | 5588 NVSWITCH_TASK_TYPE_FLAGS_VOID_PTR_ARGS); 5589 } 5590 5591 if (status == NVL_SUCCESS) 5592 { 5593 chip_device->deferredLinkErrors[link].bLinkErrorsCallBackEnabled = NV_TRUE; 5594 } 5595 else 5596 { 5597 NVSWITCH_PRINT(device, ERROR, 5598 "%s: Failed to create task. Cannot defer link error check.\n", 5599 __FUNCTION__); 5600 _nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link); 5601 _nvswitch_clear_deferred_link_errors_ls10(device, link); 5602 nvswitch_os_free(pErrorReportParams); 5603 } 5604 } 5605 5606 static NvlStatus 5607 _nvswitch_service_nvldl_nonfatal_link_ls10 5608 ( 5609 nvswitch_device *device, 5610 NvU32 nvlipt_instance, 5611 NvU32 link 5612 ) 5613 { 5614 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5615 NvU32 pending, bit, unhandled; 5616 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5617 5618 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLDL, _NVLDL_TOP, _INTR); 5619 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLDL, _NVLDL_TOP, _INTR_NONSTALL_EN); 5620 report.mask = report.raw_enable; 5621 pending = report.raw_pending & report.mask; 5622 5623 if (pending == 0) 5624 { 5625 return -NVL_NOT_FOUND; 5626 } 5627 5628 unhandled = pending; 5629 5630 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_REPLAY, 1); 5631 if (nvswitch_test_flags(pending, bit)) 5632 { 5633 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_TX_REPLAY, "TX Replay Error"); 5634 nvswitch_clear_flags(&unhandled, bit); 5635 } 5636 5637 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_RECOVERY_SHORT, 1); 5638 if (nvswitch_test_flags(pending, bit)) 5639 { 5640 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_TX_RECOVERY_SHORT, "TX Recovery Short"); 5641 nvswitch_clear_flags(&unhandled, bit); 5642 } 5643 5644 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_SHORT_ERROR_RATE, 1); 5645 if (nvswitch_test_flags(pending, bit)) 5646 { 5647 chip_device->deferredLinkErrors[link].nonFatalIntrMask.dl |= bit; 5648 _nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link); 5649 nvswitch_clear_flags(&unhandled, bit); 5650 } 5651 5652 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_LONG_ERROR_RATE, 1); 5653 if (nvswitch_test_flags(pending, bit)) 5654 { 5655 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_RX_LONG_ERROR_RATE, "RX Long Error Rate"); 5656 nvswitch_clear_flags(&unhandled, bit); 5657 } 5658 5659 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_ILA_TRIGGER, 1); 5660 if (nvswitch_test_flags(pending, bit)) 5661 { 5662 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_RX_ILA_TRIGGER, "RX ILA Trigger"); 5663 nvswitch_clear_flags(&unhandled, bit); 5664 } 5665 5666 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_CRC_COUNTER, 1); 5667 if (nvswitch_test_flags(pending, bit)) 5668 { 5669 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_RX_CRC_COUNTER, "RX CRC Counter"); 5670 nvswitch_clear_flags(&unhandled, bit); 5671 5672 // 5673 // Mask CRC counter after first occurrance - otherwise, this interrupt 5674 // will continue to fire once the CRC counter has hit the threshold 5675 // See Bug 3341528 5676 // 5677 report.raw_enable = report.raw_enable & (~bit); 5678 NVSWITCH_LINK_WR32(device, link, NVLDL, _NVLDL_TOP, _INTR_NONSTALL_EN, 5679 report.raw_enable); 5680 } 5681 5682 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 5683 5684 NVSWITCH_LINK_WR32(device, link, NVLDL, _NVLDL_TOP, _INTR, pending); 5685 5686 if (unhandled != 0) 5687 { 5688 NVSWITCH_PRINT(device, WARN, 5689 "%s: Unhandled NVLDL nonfatal interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 5690 __FUNCTION__, link, pending, report.raw_enable); 5691 return -NVL_MORE_PROCESSING_REQUIRED; 5692 } 5693 5694 return NVL_SUCCESS; 5695 } 5696 5697 static NvlStatus 5698 _nvswitch_service_nvldl_nonfatal_ls10 5699 ( 5700 nvswitch_device *device, 5701 NvU32 nvlipt_instance 5702 ) 5703 { 5704 NvU64 enabledLinkMask, localLinkMask, localEnabledLinkMask; 5705 NvU32 i; 5706 nvlink_link *link; 5707 NvlStatus status; 5708 NvlStatus return_status = -NVL_NOT_FOUND; 5709 NvU32 clocksMask = NVSWITCH_PER_LINK_CLOCK_SET(RXCLK) | NVSWITCH_PER_LINK_CLOCK_SET(TXCLK); 5710 enabledLinkMask = nvswitch_get_enabled_link_mask(device); 5711 localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(nvlipt_instance); 5712 localEnabledLinkMask = enabledLinkMask & localLinkMask; 5713 5714 FOR_EACH_INDEX_IN_MASK(64, i, localEnabledLinkMask) 5715 { 5716 link = nvswitch_get_link(device, i); 5717 if (link == NULL) 5718 { 5719 // An interrupt on an invalid link should never occur 5720 NVSWITCH_ASSERT(link != NULL); 5721 continue; 5722 } 5723 5724 if (NVSWITCH_GET_LINK_ENG_INST(device, i, NVLIPT) != nvlipt_instance) 5725 { 5726 NVSWITCH_ASSERT(0); 5727 break; 5728 } 5729 5730 if (nvswitch_is_link_in_reset(device, link) || 5731 !nvswitch_are_link_clocks_on_ls10(device, link, clocksMask)) 5732 { 5733 continue; 5734 } 5735 5736 status = _nvswitch_service_nvldl_nonfatal_link_ls10(device, nvlipt_instance, i); 5737 if (status != NVL_SUCCESS) 5738 { 5739 return_status = status; 5740 } 5741 } 5742 FOR_EACH_INDEX_IN_MASK_END; 5743 5744 return return_status; 5745 } 5746 5747 static NvlStatus 5748 _nvswitch_service_nvltlc_rx_lnk_nonfatal_0_ls10 5749 ( 5750 nvswitch_device *device, 5751 NvU32 nvlipt_instance, 5752 NvU32 link 5753 ) 5754 { 5755 NvU32 pending, bit, unhandled; 5756 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5757 5758 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_0); 5759 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_NON_FATAL_REPORT_EN_0); 5760 report.mask = report.raw_enable; 5761 5762 pending = report.raw_pending & report.mask; 5763 if (pending == 0) 5764 { 5765 return -NVL_NOT_FOUND; 5766 } 5767 5768 unhandled = pending; 5769 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_0); 5770 5771 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXRSPSTATUS_PRIV_ERR, 1); 5772 if (nvswitch_test_flags(pending, bit)) 5773 { 5774 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_RX_LNK_RXRSPSTATUS_PRIV_ERR, "RX Rsp Status PRIV Error"); 5775 nvswitch_clear_flags(&unhandled, bit); 5776 } 5777 5778 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 5779 5780 if (report.raw_first & report.mask) 5781 { 5782 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_0, 5783 report.raw_first & report.mask); 5784 } 5785 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_0, pending); 5786 5787 if (unhandled != 0) 5788 { 5789 NVSWITCH_PRINT(device, WARN, 5790 "%s: Unhandled NVLTLC_RX_LNK _0 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 5791 __FUNCTION__, link, pending, report.raw_enable); 5792 return -NVL_MORE_PROCESSING_REQUIRED; 5793 } 5794 5795 return NVL_SUCCESS; 5796 } 5797 5798 static NvlStatus 5799 _nvswitch_service_nvltlc_tx_lnk_nonfatal_0_ls10 5800 ( 5801 nvswitch_device *device, 5802 NvU32 nvlipt_instance, 5803 NvU32 link 5804 ) 5805 { 5806 NvU32 pending, bit, unhandled; 5807 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5808 5809 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_0); 5810 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_NON_FATAL_REPORT_EN_0); 5811 report.mask = report.raw_enable; 5812 pending = report.raw_pending & report.mask; 5813 5814 if (pending == 0) 5815 { 5816 return -NVL_NOT_FOUND; 5817 } 5818 5819 unhandled = pending; 5820 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_0); 5821 5822 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _CREQ_RAM_DAT_ECC_DBE_ERR, 1); 5823 if (nvswitch_test_flags(pending, bit)) 5824 { 5825 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_CREQ_RAM_DAT_ECC_DBE_ERR, "CREQ RAM DAT ECC DBE Error"); 5826 nvswitch_clear_flags(&unhandled, bit); 5827 } 5828 5829 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _CREQ_RAM_ECC_LIMIT_ERR, 1); 5830 if (nvswitch_test_flags(pending, bit)) 5831 { 5832 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_CREQ_RAM_ECC_LIMIT_ERR, "CREQ RAM DAT ECC Limit Error"); 5833 nvswitch_clear_flags(&unhandled, bit); 5834 } 5835 5836 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP_RAM_DAT_ECC_DBE_ERR, 1); 5837 if (nvswitch_test_flags(pending, bit)) 5838 { 5839 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_RSP_RAM_DAT_ECC_DBE_ERR, "Response RAM DAT ECC DBE Error"); 5840 nvswitch_clear_flags(&unhandled, bit); 5841 } 5842 5843 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP_RAM_ECC_LIMIT_ERR, 1); 5844 if (nvswitch_test_flags(pending, bit)) 5845 { 5846 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_RSP_RAM_ECC_LIMIT_ERR, "Response RAM ECC Limit Error"); 5847 nvswitch_clear_flags(&unhandled, bit); 5848 } 5849 5850 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _COM_RAM_DAT_ECC_DBE_ERR, 1); 5851 if (nvswitch_test_flags(pending, bit)) 5852 { 5853 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_COM_RAM_DAT_ECC_DBE_ERR, "COM RAM DAT ECC DBE Error"); 5854 nvswitch_clear_flags(&unhandled, bit); 5855 } 5856 5857 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _COM_RAM_ECC_LIMIT_ERR, 1); 5858 if (nvswitch_test_flags(pending, bit)) 5859 { 5860 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_COM_RAM_ECC_LIMIT_ERR, "COM RAM ECC Limit Error"); 5861 nvswitch_clear_flags(&unhandled, bit); 5862 } 5863 5864 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP1_RAM_ECC_LIMIT_ERR, 1); 5865 if (nvswitch_test_flags(pending, bit)) 5866 { 5867 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_RSP1_RAM_ECC_LIMIT_ERR, "RSP1 RAM ECC Limit Error"); 5868 nvswitch_clear_flags(&unhandled, bit); 5869 } 5870 5871 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 5872 5873 // Disable interrupts that have occurred after fatal error. 5874 if (device->link[link].fatal_error_occurred) 5875 { 5876 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_NON_FATAL_REPORT_EN_0, 5877 report.raw_enable ^ pending); 5878 } 5879 5880 if (report.raw_first & report.mask) 5881 { 5882 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_0, 5883 report.raw_first & report.mask); 5884 } 5885 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_0, pending); 5886 5887 if (unhandled != 0) 5888 { 5889 NVSWITCH_PRINT(device, WARN, 5890 "%s: Unhandled NVLTLC_TX_LNK _0 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 5891 __FUNCTION__, link, pending, report.raw_enable); 5892 return -NVL_MORE_PROCESSING_REQUIRED; 5893 } 5894 5895 return NVL_SUCCESS; 5896 } 5897 5898 static NvlStatus 5899 _nvswitch_service_nvltlc_rx_lnk_nonfatal_1_ls10 5900 ( 5901 nvswitch_device *device, 5902 NvU32 nvlipt_instance, 5903 NvU32 link 5904 ) 5905 { 5906 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5907 NvU32 pending, bit, unhandled, injected; 5908 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5909 5910 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_1); 5911 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_NON_FATAL_REPORT_EN_1); 5912 report.mask = report.raw_enable; 5913 pending = report.raw_pending & report.mask; 5914 5915 if (pending == 0) 5916 { 5917 return -NVL_NOT_FOUND; 5918 } 5919 5920 unhandled = pending; 5921 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_1); 5922 injected = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_REPORT_INJECT_1); 5923 5924 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _HEARTBEAT_TIMEOUT_ERR, 1); 5925 if (nvswitch_test_flags(pending, bit)) 5926 { 5927 chip_device->deferredLinkErrors[link].nonFatalIntrMask.tlcRx1 |= bit; 5928 chip_device->deferredLinkErrors[link].nonFatalIntrMask.tlcRx1Injected |= injected; 5929 _nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link); 5930 5931 if (FLD_TEST_DRF_NUM(_NVLTLC_RX_LNK, _ERR_REPORT_INJECT_1, _HEARTBEAT_TIMEOUT_ERR, 0x0, injected)) 5932 { 5933 // 5934 // WAR Bug 200627368: Mask off HBTO to avoid a storm 5935 // During the start of reset_and_drain, all links on the GPU 5936 // will go into contain, causing HBTO on other switch links connected 5937 // to that GPU. For the switch side, these interrupts are not fatal, 5938 // but until we get to reset_and_drain for this link, HBTO will continue 5939 // to fire repeatedly. After reset_and_drain, HBTO will be re-enabled 5940 // by MINION after links are trained. 5941 // 5942 report.raw_enable = report.raw_enable & (~bit); 5943 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_NON_FATAL_REPORT_EN_1, 5944 report.raw_enable); 5945 } 5946 nvswitch_clear_flags(&unhandled, bit); 5947 } 5948 5949 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 5950 5951 // Disable interrupts that have occurred after fatal error. 5952 if (device->link[link].fatal_error_occurred) 5953 { 5954 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_NON_FATAL_REPORT_EN_1, 5955 report.raw_enable & (~pending)); 5956 } 5957 5958 if (report.raw_first & report.mask) 5959 { 5960 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_1, 5961 report.raw_first & report.mask); 5962 } 5963 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_1, pending); 5964 5965 if (unhandled != 0) 5966 { 5967 NVSWITCH_PRINT(device, WARN, 5968 "%s: Unhandled NVLTLC_RX_LNK _1 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 5969 __FUNCTION__, link, pending, report.raw_enable); 5970 return -NVL_MORE_PROCESSING_REQUIRED; 5971 } 5972 5973 return NVL_SUCCESS; 5974 } 5975 5976 static NvlStatus 5977 _nvswitch_service_nvltlc_tx_lnk_nonfatal_1_ls10 5978 ( 5979 nvswitch_device *device, 5980 NvU32 nvlipt_instance, 5981 NvU32 link 5982 ) 5983 { 5984 NvU32 pending, bit, unhandled; 5985 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5986 5987 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_1); 5988 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_NON_FATAL_REPORT_EN_1); 5989 report.mask = report.raw_enable; 5990 pending = report.raw_pending & report.mask; 5991 5992 if (pending == 0) 5993 { 5994 return -NVL_NOT_FOUND; 5995 } 5996 5997 unhandled = pending; 5998 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_1); 5999 6000 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC0, 1); 6001 if (nvswitch_test_flags(pending, bit)) 6002 { 6003 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC0, "AN1 Timeout VC0"); 6004 nvswitch_clear_flags(&unhandled, bit); 6005 } 6006 6007 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC1, 1); 6008 if (nvswitch_test_flags(pending, bit)) 6009 { 6010 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC1, "AN1 Timeout VC1"); 6011 nvswitch_clear_flags(&unhandled, bit); 6012 } 6013 6014 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC2, 1); 6015 if (nvswitch_test_flags(pending, bit)) 6016 { 6017 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC2, "AN1 Timeout VC2"); 6018 nvswitch_clear_flags(&unhandled, bit); 6019 } 6020 6021 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC3, 1); 6022 if (nvswitch_test_flags(pending, bit)) 6023 { 6024 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC3, "AN1 Timeout VC3"); 6025 nvswitch_clear_flags(&unhandled, bit); 6026 } 6027 6028 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC4, 1); 6029 if (nvswitch_test_flags(pending, bit)) 6030 { 6031 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC4, "AN1 Timeout VC4"); 6032 nvswitch_clear_flags(&unhandled, bit); 6033 } 6034 6035 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC5, 1); 6036 if (nvswitch_test_flags(pending, bit)) 6037 { 6038 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC5, "AN1 Timeout VC5"); 6039 nvswitch_clear_flags(&unhandled, bit); 6040 } 6041 6042 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC6, 1); 6043 if (nvswitch_test_flags(pending, bit)) 6044 { 6045 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC6, "AN1 Timeout VC6"); 6046 nvswitch_clear_flags(&unhandled, bit); 6047 } 6048 6049 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC7, 1); 6050 if (nvswitch_test_flags(pending, bit)) 6051 { 6052 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC7, "AN1 Timeout VC7"); 6053 nvswitch_clear_flags(&unhandled, bit); 6054 } 6055 6056 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 6057 6058 // Disable interrupts that have occurred after fatal error. 6059 if (device->link[link].fatal_error_occurred) 6060 { 6061 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_NON_FATAL_REPORT_EN_1, 6062 report.raw_enable ^ pending); 6063 } 6064 6065 if (report.raw_first & report.mask) 6066 { 6067 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_1, 6068 report.raw_first & report.mask); 6069 } 6070 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_1, pending); 6071 6072 if (unhandled != 0) 6073 { 6074 NVSWITCH_PRINT(device, WARN, 6075 "%s: Unhandled NVLTLC_TX_LNK _1 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 6076 __FUNCTION__, link, pending, report.raw_enable); 6077 return -NVL_MORE_PROCESSING_REQUIRED; 6078 } 6079 6080 return NVL_SUCCESS; 6081 } 6082 6083 static NvlStatus 6084 _nvswitch_service_nvltlc_nonfatal_ls10 6085 ( 6086 nvswitch_device *device, 6087 NvU32 nvlipt_instance 6088 ) 6089 { 6090 NvU64 enabledLinkMask, localLinkMask, localEnabledLinkMask; 6091 NvU32 i; 6092 nvlink_link *link; 6093 NvlStatus status; 6094 NvlStatus return_status = NVL_SUCCESS; 6095 6096 enabledLinkMask = nvswitch_get_enabled_link_mask(device); 6097 localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(nvlipt_instance); 6098 localEnabledLinkMask = enabledLinkMask & localLinkMask; 6099 6100 FOR_EACH_INDEX_IN_MASK(64, i, localEnabledLinkMask) 6101 { 6102 link = nvswitch_get_link(device, i); 6103 if (link == NULL) 6104 { 6105 // An interrupt on an invalid link should never occur 6106 NVSWITCH_ASSERT(link != NULL); 6107 continue; 6108 } 6109 6110 if (NVSWITCH_GET_LINK_ENG_INST(device, i, NVLIPT) != nvlipt_instance) 6111 { 6112 NVSWITCH_ASSERT(0); 6113 break; 6114 } 6115 6116 // 6117 // If link is in reset or NCISOC clock is off then 6118 // don't need to check the link for NVLTLC errors 6119 // as the IP's registers are off 6120 // 6121 if (nvswitch_is_link_in_reset(device, link) || 6122 !nvswitch_are_link_clocks_on_ls10(device, link, NVSWITCH_PER_LINK_CLOCK_SET(NCISOCCLK))) 6123 { 6124 continue; 6125 } 6126 6127 status = _nvswitch_service_nvltlc_rx_lnk_nonfatal_0_ls10(device, nvlipt_instance, i); 6128 if (status != NVL_SUCCESS) 6129 { 6130 return_status = status; 6131 } 6132 6133 status = _nvswitch_service_nvltlc_tx_lnk_nonfatal_0_ls10(device, nvlipt_instance, i); 6134 if (status != NVL_SUCCESS) 6135 { 6136 return_status = status; 6137 } 6138 6139 status = _nvswitch_service_nvltlc_rx_lnk_nonfatal_1_ls10(device, nvlipt_instance, i); 6140 if (status != NVL_SUCCESS) 6141 { 6142 return_status = status; 6143 } 6144 6145 status = _nvswitch_service_nvltlc_tx_lnk_nonfatal_1_ls10(device, nvlipt_instance, i); 6146 if (status != NVL_SUCCESS) 6147 { 6148 return_status = status; 6149 } 6150 } 6151 FOR_EACH_INDEX_IN_MASK_END; 6152 6153 return return_status; 6154 } 6155 6156 static NvlStatus 6157 _nvswitch_service_nvlipt_lnk_status_ls10 6158 ( 6159 nvswitch_device *device, 6160 NvU32 nvlipt_instance, 6161 NvU32 link_id 6162 ) 6163 { 6164 NvU32 pending, enabled, unhandled, bit; 6165 NvU64 mode; 6166 nvlink_link *link; 6167 link = nvswitch_get_link(device, link_id); 6168 6169 pending = NVSWITCH_LINK_RD32(device, link_id, NVLIPT_LNK, _NVLIPT_LNK, _INTR_STATUS); 6170 enabled = NVSWITCH_LINK_RD32(device, link_id, NVLIPT_LNK, _NVLIPT_LNK, _INTR_INT1_EN); 6171 pending &= enabled; 6172 unhandled = pending; 6173 6174 bit = DRF_NUM(_NVLIPT_LNK, _INTR_STATUS, _LINKSTATEREQUESTREADYSET, 1); 6175 if (nvswitch_test_flags(pending, bit)) 6176 { 6177 nvswitch_clear_flags(&unhandled, bit); 6178 if(nvswitch_corelib_get_dl_link_mode_ls10(link, &mode) != NVL_SUCCESS) 6179 { 6180 NVSWITCH_PRINT(device, ERROR, "%s: nvlipt_lnk_status: Failed to check link mode! LinkId %d\n", 6181 __FUNCTION__, link_id); 6182 } 6183 else if(mode == NVLINK_LINKSTATE_HS) 6184 { 6185 NVSWITCH_PRINT(device, INFO, "%s: nvlipt_lnk_status: Link is up!. LinkId %d\n", 6186 __FUNCTION__, link_id); 6187 if (nvswitch_lib_notify_client_events(device, 6188 NVSWITCH_DEVICE_EVENT_PORT_UP) != NVL_SUCCESS) 6189 { 6190 NVSWITCH_PRINT(device, ERROR, "%s: Failed to notify PORT_UP event. LinkId %d\n", 6191 __FUNCTION__, link_id); 6192 } 6193 6194 // 6195 // When a link comes up ensure that we finish off the post-training tasks: 6196 // -- enabling per-link DL interrupts 6197 // -- releasing buffer_ready on the link 6198 // 6199 nvswitch_corelib_training_complete_ls10(link); 6200 nvswitch_init_buffer_ready(device, link, NV_TRUE); 6201 } 6202 } 6203 6204 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 6205 NVSWITCH_LINK_WR32(device, link_id, NVLIPT_LNK, _NVLIPT_LNK, _INTR_STATUS, pending); 6206 6207 if (unhandled != 0) 6208 { 6209 NVSWITCH_PRINT(device, WARN, 6210 "%s: Unhandled NVLIPT_LNK STATUS interrupts, pending: 0x%x enabled: 0x%x.\n", 6211 __FUNCTION__, pending, enabled); 6212 return -NVL_MORE_PROCESSING_REQUIRED; 6213 } 6214 6215 return NVL_SUCCESS; 6216 } 6217 6218 static NvlStatus 6219 _nvswitch_service_nvlipt_lnk_nonfatal_ls10 6220 ( 6221 nvswitch_device *device, 6222 NvU32 nvlipt_instance, 6223 NvU32 link 6224 ) 6225 { 6226 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 6227 nvlink_link *link_info = nvswitch_get_link(device, link); 6228 NvU32 lnkStateRequest, lnkStateStatus; 6229 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 6230 NvU32 pending, bit, unhandled; 6231 6232 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0); 6233 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_NON_FATAL_REPORT_EN_0); 6234 report.mask = report.raw_enable; 6235 6236 pending = report.raw_pending & report.mask; 6237 if (pending == 0) 6238 { 6239 return -NVL_NOT_FOUND; 6240 } 6241 6242 unhandled = pending; 6243 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FIRST_0); 6244 6245 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _ILLEGALLINKSTATEREQUEST, 1); 6246 if (nvswitch_test_flags(pending, bit)) 6247 { 6248 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_ILLEGALLINKSTATEREQUEST, "_HW_NVLIPT_LNK_ILLEGALLINKSTATEREQUEST"); 6249 nvswitch_clear_flags(&unhandled, bit); 6250 } 6251 6252 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _FAILEDMINIONREQUEST, 1); 6253 if (nvswitch_test_flags(pending, bit)) 6254 { 6255 // 6256 // Read back LINK_STATE_REQUESTS and LINK_STATE_STATUS registers 6257 // If request == ACTIVE, LINK_STATE_STATUS == ACTIVE_PENDING, request == ERROR 6258 // and there is a pending FAULT_UP interrupt then redo reset_and_drain since the 6259 // last try failed 6260 // 6261 // Mark that the defered link error mechanism as seeing a reset_and_train re-try so 6262 // the deferred task needs to re-create itself instead of continuing with the linkstate 6263 // checks 6264 // 6265 lnkStateStatus = NVSWITCH_LINK_RD32_LS10(device, link_info->linkNumber, NVLIPT_LNK, 6266 _NVLIPT_LNK, _CTRL_LINK_STATE_STATUS); 6267 6268 lnkStateRequest = NVSWITCH_LINK_RD32_LS10(device, link_info->linkNumber, 6269 NVLIPT_LNK , _NVLIPT_LNK , _CTRL_LINK_STATE_REQUEST); 6270 6271 if(FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_REQUEST, _REQUEST, _ACTIVE, lnkStateRequest) && 6272 !(FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_REQUEST, _STATUS, _REQUEST_SUCCESSFUL, lnkStateRequest) || 6273 FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_REQUEST, _STATUS, _INIT, lnkStateRequest))&& 6274 FLD_TEST_DRF(_NVLIPT_LNK, _CTRL_LINK_STATE_STATUS, _CURRENTLINKSTATE, _ACTIVE_PENDING, lnkStateStatus) && 6275 DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1) & chip_device->deferredLinkErrors[link].fatalIntrMask.dl) 6276 { 6277 chip_device->deferredLinkErrors[link].bResetAndDrainRetry = NV_TRUE; 6278 device->hal.nvswitch_reset_and_drain_links(device, NVBIT64(link)); 6279 } 6280 6281 chip_device->deferredLinkErrors[link].nonFatalIntrMask.liptLnk |= bit; 6282 _nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link); 6283 nvswitch_clear_flags(&unhandled, bit); 6284 } 6285 6286 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _RESERVEDREQUESTVALUE, 1); 6287 if (nvswitch_test_flags(pending, bit)) 6288 { 6289 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_RESERVEDREQUESTVALUE, "_RESERVEDREQUESTVALUE"); 6290 nvswitch_clear_flags(&unhandled, bit); 6291 } 6292 6293 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _LINKSTATEWRITEWHILEBUSY, 1); 6294 if (nvswitch_test_flags(pending, bit)) 6295 { 6296 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_LINKSTATEWRITEWHILEBUSY, "_LINKSTATEWRITEWHILEBUSY"); 6297 nvswitch_clear_flags(&unhandled, bit); 6298 } 6299 6300 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _LINK_STATE_REQUEST_TIMEOUT, 1); 6301 if (nvswitch_test_flags(pending, bit)) 6302 { 6303 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_LINK_STATE_REQUEST_TIMEOUT, "_LINK_STATE_REQUEST_TIMEOUT"); 6304 nvswitch_clear_flags(&unhandled, bit); 6305 } 6306 6307 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _WRITE_TO_LOCKED_SYSTEM_REG_ERR, 1); 6308 if (nvswitch_test_flags(pending, bit)) 6309 { 6310 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_WRITE_TO_LOCKED_SYSTEM_REG_ERR, "_WRITE_TO_LOCKED_SYSTEM_REG_ERR"); 6311 nvswitch_clear_flags(&unhandled, bit); 6312 } 6313 6314 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 6315 6316 if (report.raw_first & report.mask) 6317 { 6318 NVSWITCH_LINK_WR32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FIRST_0, 6319 report.raw_first & report.mask); 6320 } 6321 NVSWITCH_LINK_WR32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0, pending); 6322 6323 if (unhandled != 0) 6324 { 6325 NVSWITCH_PRINT(device, WARN, 6326 "%s: Unhandled NVLIPT_LNK NON_FATAL interrupts, pending: 0x%x enabled: 0x%x.\n", 6327 __FUNCTION__, pending, report.raw_enable); 6328 return -NVL_MORE_PROCESSING_REQUIRED; 6329 } 6330 6331 return NVL_SUCCESS; 6332 } 6333 6334 static NvlStatus 6335 _nvswitch_service_nvlipt_link_nonfatal_ls10 6336 ( 6337 nvswitch_device *device, 6338 NvU32 instance 6339 ) 6340 { 6341 NvU32 i, globalLink, bit, intrLink; 6342 NvU32 interruptingLinks = 0; 6343 NvU32 lnkStatusChangeLinks = 0; 6344 NvlStatus status; 6345 NvU64 link_enable_mask; 6346 6347 link_enable_mask = ((NvU64)device->regkeys.link_enable_mask2 << 32 | 6348 (NvU64)device->regkeys.link_enable_mask); 6349 for (i = 0; i < NVSWITCH_LINKS_PER_NVLIPT_LS10; ++i) 6350 { 6351 globalLink = (instance * NVSWITCH_LINKS_PER_NVLIPT_LS10) + i; 6352 if ((NVBIT64(globalLink) & link_enable_mask) == 0) 6353 { 6354 continue; 6355 } 6356 intrLink = NVSWITCH_LINK_RD32(device, globalLink, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0); 6357 6358 if(intrLink) 6359 { 6360 interruptingLinks |= NVBIT(i); 6361 } 6362 6363 intrLink = NVSWITCH_LINK_RD32(device, globalLink, NVLIPT_LNK, _NVLIPT_LNK, _INTR_STATUS); 6364 6365 if(intrLink) 6366 { 6367 lnkStatusChangeLinks |= NVBIT(i); 6368 } 6369 } 6370 6371 if(lnkStatusChangeLinks) 6372 { 6373 for (i = 0; i < NVSWITCH_LINKS_PER_NVLIPT_LS10; ++i) 6374 { 6375 bit = NVBIT(i); 6376 globalLink = (instance * NVSWITCH_LINKS_PER_NVLIPT_LS10) + i; 6377 if (nvswitch_test_flags(lnkStatusChangeLinks, bit)) 6378 { 6379 if( _nvswitch_service_nvlipt_lnk_status_ls10(device, instance, globalLink) != NVL_SUCCESS) 6380 { 6381 NVSWITCH_PRINT(device, WARN, "%s: Could not process nvlipt link status interrupt. Continuing. LinkId %d\n", 6382 __FUNCTION__, globalLink); 6383 } 6384 } 6385 } 6386 } 6387 6388 if(interruptingLinks) 6389 { 6390 for (i = 0; i < NVSWITCH_LINKS_PER_NVLIPT_LS10; ++i) 6391 { 6392 bit = NVBIT(i); 6393 globalLink = (instance * NVSWITCH_LINKS_PER_NVLIPT_LS10) + i; 6394 if (nvswitch_test_flags(interruptingLinks, bit)) 6395 { 6396 status = _nvswitch_service_nvlipt_lnk_nonfatal_ls10(device, instance, globalLink); 6397 if (status != NVL_SUCCESS && status != -NVL_NOT_FOUND) 6398 { 6399 return -NVL_MORE_PROCESSING_REQUIRED; 6400 } 6401 } 6402 } 6403 return NVL_SUCCESS; 6404 } 6405 else 6406 { 6407 return -NVL_NOT_FOUND; 6408 } 6409 } 6410 6411 6412 NvlStatus 6413 _nvswitch_service_minion_fatal_ls10 6414 ( 6415 nvswitch_device *device, 6416 NvU32 instance 6417 ) 6418 { 6419 NvU32 pending, bit, unhandled, mask; 6420 6421 pending = NVSWITCH_MINION_RD32_LS10(device, instance, _MINION, _MINION_INTR); 6422 mask = NVSWITCH_MINION_RD32_LS10(device, instance, _MINION, _MINION_INTR_STALL_EN); 6423 6424 // Don't consider MINION Link interrupts in this handler 6425 mask &= ~(DRF_NUM(_MINION, _MINION_INTR_STALL_EN, _LINK, NV_MINION_MINION_INTR_STALL_EN_LINK_ENABLE_ALL)); 6426 6427 pending &= mask; 6428 6429 if (pending == 0) 6430 { 6431 return -NVL_NOT_FOUND; 6432 } 6433 6434 unhandled = pending; 6435 6436 bit = DRF_NUM(_MINION, _MINION_INTR, _FALCON_STALL, 0x1); 6437 if (nvswitch_test_flags(pending, bit)) 6438 { 6439 if (nvswitch_minion_service_falcon_interrupts_ls10(device, instance) == NVL_SUCCESS) 6440 { 6441 nvswitch_clear_flags(&unhandled, bit); 6442 } 6443 } 6444 6445 bit = DRF_NUM(_MINION, _MINION_INTR, _NONFATAL, 0x1); 6446 if (nvswitch_test_flags(pending, bit)) 6447 { 6448 NVSWITCH_PRINT(device, ERROR, "%s: servicing minion nonfatal interrupt\n", 6449 __FUNCTION__); 6450 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _MINION_INTR, bit); 6451 nvswitch_clear_flags(&unhandled, bit); 6452 } 6453 6454 bit = DRF_NUM(_MINION, _MINION_INTR, _FATAL, 0x1); 6455 if (nvswitch_test_flags(pending, bit)) 6456 { 6457 NVSWITCH_PRINT(device, ERROR, "%s: servicing minion fatal interrupt\n", 6458 __FUNCTION__); 6459 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _MINION_INTR, bit); 6460 nvswitch_clear_flags(&unhandled, bit); 6461 } 6462 6463 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 6464 6465 if (unhandled != 0) 6466 { 6467 return -NVL_MORE_PROCESSING_REQUIRED; 6468 } 6469 6470 return NVL_SUCCESS; 6471 } 6472 6473 static NvlStatus 6474 _nvswitch_service_nvlw_nonfatal_ls10 6475 ( 6476 nvswitch_device *device, 6477 NvU32 instance 6478 ) 6479 { 6480 NvlStatus status[3]; 6481 6482 // TODO: @achaudhry invert handling so nvlipt_lnk is first 6483 status[0] = _nvswitch_service_nvldl_nonfatal_ls10(device, instance); 6484 status[1] = _nvswitch_service_nvltlc_nonfatal_ls10(device, instance); 6485 status[2] = _nvswitch_service_nvlipt_link_nonfatal_ls10(device, instance); 6486 6487 if ((status[0] != NVL_SUCCESS) && (status[0] != -NVL_NOT_FOUND) && 6488 (status[1] != NVL_SUCCESS) && (status[1] != -NVL_NOT_FOUND) && 6489 (status[2] != NVL_SUCCESS) && (status[2] != -NVL_NOT_FOUND)) 6490 { 6491 return -NVL_MORE_PROCESSING_REQUIRED; 6492 } 6493 6494 return NVL_SUCCESS; 6495 } 6496 6497 #if 0 6498 static NvlStatus 6499 _nvswitch_service_soe_fatal_ls10 6500 ( 6501 nvswitch_device *device 6502 ) 6503 { 6504 // We only support 1 SOE as of LS10. 6505 if (soeService_HAL(device, (PSOE)device->pSoe) != 0) 6506 { 6507 return -NVL_MORE_PROCESSING_REQUIRED; 6508 } 6509 6510 return NVL_SUCCESS; 6511 } 6512 #endif //0 6513 6514 static NvlStatus 6515 _nvswitch_service_nvlipt_lnk_fatal_ls10 6516 ( 6517 nvswitch_device *device, 6518 NvU32 nvlipt_instance, 6519 NvU32 link 6520 ) 6521 { 6522 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 6523 NvU32 pending, bit, unhandled; 6524 6525 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0); 6526 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FATAL_REPORT_EN_0); 6527 report.mask = report.raw_enable; 6528 6529 pending = report.raw_pending & report.mask; 6530 if (pending == 0) 6531 { 6532 return -NVL_NOT_FOUND; 6533 } 6534 6535 unhandled = pending; 6536 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FIRST_0); 6537 6538 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _SLEEPWHILEACTIVELINK, 1); 6539 if (nvswitch_test_flags(pending, bit)) 6540 { 6541 NVSWITCH_REPORT_FATAL(_HW_NVLIPT_LNK_SLEEPWHILEACTIVELINK, "No non-empty link is detected", NV_FALSE); 6542 nvswitch_clear_flags(&unhandled, bit); 6543 } 6544 6545 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _RSTSEQ_PHYCTL_TIMEOUT, 1); 6546 if (nvswitch_test_flags(pending, bit)) 6547 { 6548 NVSWITCH_REPORT_FATAL(_HW_NVLIPT_LNK_RSTSEQ_PHYCTL_TIMEOUT, "Reset sequencer timed out waiting for a handshake from PHYCTL", NV_FALSE); 6549 nvswitch_clear_flags(&unhandled, bit); 6550 } 6551 6552 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _RSTSEQ_CLKCTL_TIMEOUT, 1); 6553 if (nvswitch_test_flags(pending, bit)) 6554 { 6555 NVSWITCH_REPORT_FATAL(_HW_NVLIPT_LNK_RSTSEQ_CLKCTL_TIMEOUT, "Reset sequencer timed out waiting for a handshake from CLKCTL", NV_FALSE); 6556 nvswitch_clear_flags(&unhandled, bit); 6557 } 6558 6559 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 6560 6561 // Disable interrupts that have occurred after fatal error. 6562 if (device->link[link].fatal_error_occurred) 6563 { 6564 NVSWITCH_LINK_WR32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FATAL_REPORT_EN_0, 6565 report.raw_enable ^ pending); 6566 } 6567 6568 // clear interrupts 6569 if (report.raw_first & report.mask) 6570 { 6571 NVSWITCH_LINK_WR32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FIRST_0, 6572 report.raw_first & report.mask); 6573 } 6574 NVSWITCH_LINK_WR32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0, pending); 6575 6576 if (unhandled != 0) 6577 { 6578 NVSWITCH_PRINT(device, WARN, 6579 "%s: Unhandled NVLIPT_LNK FATAL interrupts, pending: 0x%x enabled: 0x%x.\n", 6580 __FUNCTION__, pending, report.raw_enable); 6581 return -NVL_MORE_PROCESSING_REQUIRED; 6582 } 6583 6584 return NVL_SUCCESS; 6585 } 6586 6587 static NvlStatus 6588 _nvswitch_service_nvlipt_link_fatal_ls10 6589 ( 6590 nvswitch_device *device, 6591 NvU32 instance 6592 ) 6593 { 6594 NvU32 i, globalLink, bit, intrLink; 6595 NvU32 interruptingLinks = 0; 6596 6597 //read in error status of current link 6598 for (i = 0; i < NVSWITCH_LINKS_PER_NVLIPT_LS10; ++i) 6599 { 6600 globalLink = (instance * NVSWITCH_LINKS_PER_NVLIPT_LS10) + i; 6601 6602 intrLink = NVSWITCH_LINK_RD32(device, globalLink, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0); 6603 6604 if(intrLink) 6605 { 6606 interruptingLinks |= NVBIT(i); 6607 } 6608 } 6609 6610 if(interruptingLinks) 6611 { 6612 for (i = 0; i < NVSWITCH_LINKS_PER_NVLIPT_LS10; ++i) 6613 { 6614 bit = NVBIT(i); 6615 globalLink = (instance * NVSWITCH_LINKS_PER_NVLIPT_LS10) + i; 6616 if (nvswitch_test_flags(interruptingLinks, bit)) 6617 { 6618 if( _nvswitch_service_nvlipt_lnk_fatal_ls10(device, instance, globalLink) != NVL_SUCCESS) 6619 { 6620 return -NVL_MORE_PROCESSING_REQUIRED; 6621 } 6622 } 6623 } 6624 return NVL_SUCCESS; 6625 } 6626 else 6627 { 6628 return -NVL_NOT_FOUND; 6629 } 6630 } 6631 6632 static NvlStatus 6633 _nvswitch_service_nvlw_fatal_ls10 6634 ( 6635 nvswitch_device *device, 6636 NvU32 instance 6637 ) 6638 { 6639 NvlStatus status[6]; 6640 6641 status[0] = device->hal.nvswitch_service_minion_link(device, instance); 6642 status[1] = _nvswitch_service_nvldl_fatal_ls10(device, instance); 6643 status[2] = _nvswitch_service_nvltlc_fatal_ls10(device, instance); 6644 status[3] = _nvswitch_service_minion_fatal_ls10(device, instance); 6645 status[4] = _nvswitch_service_nvlipt_common_fatal_ls10(device, instance); 6646 status[5] = _nvswitch_service_nvlipt_link_fatal_ls10(device, instance); 6647 6648 6649 if (status[0] != NVL_SUCCESS && status[0] != -NVL_NOT_FOUND && 6650 status[1] != NVL_SUCCESS && status[1] != -NVL_NOT_FOUND && 6651 status[2] != NVL_SUCCESS && status[2] != -NVL_NOT_FOUND && 6652 status[3] != NVL_SUCCESS && status[3] != -NVL_NOT_FOUND && 6653 status[4] != NVL_SUCCESS && status[4] != -NVL_NOT_FOUND && 6654 status[5] != NVL_SUCCESS && status[5] != -NVL_NOT_FOUND) 6655 { 6656 return -NVL_MORE_PROCESSING_REQUIRED; 6657 } 6658 6659 return NVL_SUCCESS; 6660 } 6661 6662 /* 6663 * @Brief : Enable top level HW interrupts. 6664 * 6665 * @Description : 6666 * 6667 * @param[in] device operate on this device 6668 */ 6669 void 6670 nvswitch_lib_enable_interrupts_ls10 6671 ( 6672 nvswitch_device *device 6673 ) 6674 { 6675 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NPG_FATAL_IDX), 0xFFFF); 6676 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NPG_NON_FATAL_IDX), 0xFFFF); 6677 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NPG_CORRECTABLE_IDX), 0); 6678 6679 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NVLW_FATAL_IDX), 0xFFFF); 6680 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_IDX), 0xFFFF); 6681 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NVLW_CORRECTABLE_IDX), 0); 6682 6683 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NXBAR_FATAL_IDX), 0x7); 6684 6685 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_UNITS_IDX), 0xFFFFFFFF); 6686 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_UNITS_IDX), 6687 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PMGR_HOST, 1) | 6688 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER, 1) | 6689 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER_ALARM, 1) | 6690 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XTL_CPU, 1) | 6691 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XAL_EP, 1) | 6692 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PRIV_RING, 1)); 6693 6694 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_TOP_EN_SET(0), 0xFFFFFFFF); 6695 } 6696 6697 /* 6698 * @Brief : Disable top level HW interrupts. 6699 * 6700 * @Description : 6701 * 6702 * @param[in] device operate on this device 6703 */ 6704 void 6705 nvswitch_lib_disable_interrupts_ls10 6706 ( 6707 nvswitch_device *device 6708 ) 6709 { 6710 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NPG_FATAL_IDX), 0xFFFF); 6711 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NPG_NON_FATAL_IDX), 0xFFFF); 6712 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NPG_CORRECTABLE_IDX), 0); 6713 6714 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NVLW_FATAL_IDX), 0xFFFF); 6715 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_IDX), 0xFFFF); 6716 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NVLW_CORRECTABLE_IDX), 0); 6717 6718 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NXBAR_FATAL_IDX), 0x7); 6719 6720 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_UNITS_IDX), 6721 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PMGR_HOST, 1) | 6722 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER, 1) | 6723 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER_ALARM, 1) | 6724 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XTL_CPU, 1) | 6725 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XAL_EP, 1) | 6726 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PRIV_RING, 1)); 6727 6728 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_TOP_EN_CLEAR(0), 0xFFFFFFFF); 6729 } 6730 6731 // 6732 // Check if there are interrupts pending. 6733 // 6734 // On silicon/emulation we only use MSIs which are not shared, so this 6735 // function does not need to be called. 6736 // 6737 NvlStatus 6738 nvswitch_lib_check_interrupts_ls10 6739 ( 6740 nvswitch_device *device 6741 ) 6742 { 6743 NvlStatus retval = NVL_SUCCESS; 6744 NvU32 val; 6745 6746 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_TOP(0)); 6747 if (DRF_NUM(_CTRL, _CPU_INTR_TOP, _VALUE, val) != 0) 6748 { 6749 retval = -NVL_MORE_PROCESSING_REQUIRED; 6750 } 6751 6752 return retval; 6753 } 6754 6755 static void 6756 _nvswitch_retrigger_engine_intr_ls10 6757 ( 6758 nvswitch_device *device 6759 ) 6760 { 6761 6762 // re-trigger engine to gin interrupts for CPR and NPG on the FATAL and NONFATAL trees 6763 NVSWITCH_BCAST_WR32_LS10(device, CPR, _CPR_SYS, _INTR_RETRIGGER(0), 1); 6764 NVSWITCH_BCAST_WR32_LS10(device, CPR, _CPR_SYS, _INTR_RETRIGGER(1), 1); 6765 6766 NVSWITCH_BCAST_WR32_LS10(device, NPG, _NPG, _INTR_RETRIGGER(0), 1); 6767 NVSWITCH_BCAST_WR32_LS10(device, NPG, _NPG, _INTR_RETRIGGER(1), 1); 6768 } 6769 6770 void 6771 nvswitch_service_minion_all_links_ls10 6772 ( 6773 nvswitch_device *device 6774 ) 6775 { 6776 NvU32 val, i; 6777 6778 // Check NVLW 6779 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_FATAL); 6780 val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_FATAL, _MASK, val); 6781 if (val != 0) 6782 { 6783 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, 6784 _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NVLW_FATAL_IDX), val); 6785 6786 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NVLW_FATAL_MASK); i++) 6787 { 6788 if (val & NVBIT(i)) 6789 (void)_nvswitch_service_nvlw_fatal_ls10(device, i); 6790 } 6791 } 6792 } 6793 6794 // 6795 // Service interrupt and re-enable interrupts. Interrupts should disabled when 6796 // this is called. 6797 // 6798 NvlStatus 6799 nvswitch_lib_service_interrupts_ls10 6800 ( 6801 nvswitch_device *device 6802 ) 6803 { 6804 NvlStatus status = NVL_SUCCESS; 6805 NvlStatus return_status = NVL_SUCCESS; 6806 NvU32 val; 6807 NvU32 i; 6808 6809 // 6810 // Interrupt handler steps: 6811 // 1. Read Leaf interrupt 6812 // 2. Clear leaf interrupt 6813 // 3. Run leaf specific interrupt handler 6814 // 6815 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_FATAL); 6816 val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_FATAL, _MASK, val); 6817 if (val != 0) 6818 { 6819 NVSWITCH_PRINT(device, INFO, "%s: NVLW FATAL interrupts pending = 0x%x\n", 6820 __FUNCTION__, val); 6821 6822 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NVLW_FATAL_IDX), val); 6823 6824 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NVLW_FATAL_MASK); i++) 6825 { 6826 if (val & NVBIT(i)) 6827 { 6828 status = _nvswitch_service_nvlw_fatal_ls10(device, i); 6829 if (status != NVL_SUCCESS) 6830 { 6831 NVSWITCH_PRINT(device, INFO, "%s: NVLW[%d] FATAL interrupt handling status = %d\n", 6832 __FUNCTION__, i, status); 6833 return_status = status; 6834 } 6835 } 6836 } 6837 } 6838 6839 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_NON_FATAL); 6840 val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_NON_FATAL, _MASK, val); 6841 if (val != 0) 6842 { 6843 NVSWITCH_PRINT(device, INFO, "%s: NVLW NON_FATAL interrupts pending = 0x%x\n", 6844 __FUNCTION__, val); 6845 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_IDX), val); 6846 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_MASK); i++) 6847 { 6848 if (val & NVBIT(i)) 6849 { 6850 status = _nvswitch_service_nvlw_nonfatal_ls10(device, i); 6851 if (status != NVL_SUCCESS) 6852 { 6853 NVSWITCH_PRINT(device, INFO, "%s: NVLW[%d] NON_FATAL interrupt handling status = %d\n", 6854 __FUNCTION__, i, status); 6855 return_status = status; 6856 } 6857 } 6858 } 6859 } 6860 6861 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_CORRECTABLE); 6862 val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_CORRECTABLE, _MASK, val); 6863 if (val != 0) 6864 { 6865 NVSWITCH_PRINT(device, ERROR, "%s: NVLW CORRECTABLE interrupts pending = 0x%x\n", 6866 __FUNCTION__, val); 6867 return_status = -NVL_MORE_PROCESSING_REQUIRED; 6868 } 6869 6870 // Check NPG 6871 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NPG_FATAL); 6872 val = DRF_NUM(_CTRL, _CPU_INTR_NPG_FATAL, _MASK, val); 6873 if (val != 0) 6874 { 6875 NVSWITCH_PRINT(device, INFO, "%s: NPG FATAL interrupts pending = 0x%x\n", 6876 __FUNCTION__, val); 6877 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NPG_FATAL_IDX), val); 6878 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NPG_FATAL_MASK); i++) 6879 { 6880 if (val & NVBIT(i)) 6881 { 6882 status = _nvswitch_service_npg_fatal_ls10(device, i); 6883 if (status != NVL_SUCCESS) 6884 { 6885 NVSWITCH_PRINT(device, INFO, "%s: NPG[%d] FATAL interrupt handling status = %d\n", 6886 __FUNCTION__, i, status); 6887 return_status = status; 6888 } 6889 } 6890 } 6891 } 6892 6893 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NPG_NON_FATAL); 6894 val = DRF_NUM(_CTRL, _CPU_INTR_NPG_NON_FATAL, _MASK, val); 6895 if (val != 0) 6896 { 6897 NVSWITCH_PRINT(device, INFO, "%s: NPG NON_FATAL interrupts pending = 0x%x\n", 6898 __FUNCTION__, val); 6899 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NPG_NON_FATAL_IDX), val); 6900 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NPG_NON_FATAL_MASK); i++) 6901 { 6902 if (val & NVBIT(i)) 6903 { 6904 status = _nvswitch_service_npg_nonfatal_ls10(device, i); 6905 if (status != NVL_SUCCESS) 6906 { 6907 NVSWITCH_PRINT(device, INFO, "%s: NPG[%d] NON_FATAL interrupt handling status = %d\n", 6908 __FUNCTION__, i, status); 6909 return_status = status; 6910 } 6911 } 6912 } 6913 } 6914 6915 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NPG_CORRECTABLE); 6916 val = DRF_NUM(_CTRL, _CPU_INTR_NPG_CORRECTABLE, _MASK, val); 6917 if (val != 0) 6918 { 6919 NVSWITCH_PRINT(device, ERROR, "%s: NPG CORRECTABLE interrupts pending = 0x%x\n", 6920 __FUNCTION__, val); 6921 return_status = -NVL_MORE_PROCESSING_REQUIRED; 6922 } 6923 6924 // Check NXBAR 6925 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NXBAR_FATAL); 6926 val = DRF_NUM(_CTRL, _CPU_INTR_NXBAR_FATAL, _MASK, val); 6927 if (val != 0) 6928 { 6929 NVSWITCH_PRINT(device, INFO, "%s: NXBAR FATAL interrupts pending = 0x%x\n", 6930 __FUNCTION__, val); 6931 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NXBAR_FATAL_IDX), val); 6932 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NXBAR_FATAL_MASK); i++) 6933 { 6934 if (val & NVBIT(i)) 6935 { 6936 status = _nvswitch_service_nxbar_fatal_ls10(device, i); 6937 if (status != NVL_SUCCESS) 6938 { 6939 NVSWITCH_PRINT(device, INFO, "%s: NXBAR[%d] FATAL interrupt handling status = %d\n", 6940 __FUNCTION__, i, status); 6941 return_status = status; 6942 } 6943 } 6944 } 6945 } 6946 6947 // Check UNITS 6948 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_UNITS); 6949 if (val != 0) 6950 { 6951 NVSWITCH_PRINT(device, INFO, "%s: UNIT interrupts pending = 0x%x\n", 6952 __FUNCTION__, val); 6953 6954 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_UNITS_IDX), val); 6955 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PMGR_HOST, 1, val)) 6956 { 6957 NVSWITCH_PRINT(device, ERROR, "%s: _PMGR_HOST interrupt pending\n", 6958 __FUNCTION__); 6959 return_status = -NVL_MORE_PROCESSING_REQUIRED; 6960 } 6961 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER, 1, val)) 6962 { 6963 NVSWITCH_PRINT(device, ERROR, "%s: _PTIMER interrupt pending\n", 6964 __FUNCTION__); 6965 return_status = -NVL_MORE_PROCESSING_REQUIRED; 6966 } 6967 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER_ALARM, 1, val)) 6968 { 6969 NVSWITCH_PRINT(device, ERROR, "%s: _PTIMER_ALARM interrupt pending\n", 6970 __FUNCTION__); 6971 return_status = -NVL_MORE_PROCESSING_REQUIRED; 6972 } 6973 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XTL_CPU, 1, val)) 6974 { 6975 NVSWITCH_PRINT(device, ERROR, "%s: _XTL_CPU interrupt pending\n", 6976 __FUNCTION__); 6977 return_status = -NVL_MORE_PROCESSING_REQUIRED; 6978 } 6979 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XAL_EP, 1, val)) 6980 { 6981 NVSWITCH_PRINT(device, ERROR, "%s: _XAL_EP interrupt pending\n", 6982 __FUNCTION__); 6983 return_status = -NVL_MORE_PROCESSING_REQUIRED; 6984 } 6985 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PRIV_RING, 1, val)) 6986 { 6987 status = _nvswitch_service_priv_ring_ls10(device); 6988 if (status != NVL_SUCCESS) 6989 { 6990 NVSWITCH_PRINT(device, ERROR, "%s: Problem handling PRI errors\n", 6991 __FUNCTION__); 6992 return_status = status; 6993 } 6994 } 6995 } 6996 6997 // step 4 -- retrigger engine interrupts 6998 _nvswitch_retrigger_engine_intr_ls10(device); 6999 7000 // step 5 -- retrigger top level GIN interrupts 7001 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_TOP_EN_CLEAR(0), 0xFFFFFFFF); 7002 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_TOP_EN_SET(0), 0xFFFFFFFF); 7003 7004 return return_status; 7005 } 7006 7007 /* 7008 * Initialize interrupt tree HW for all units. 7009 * 7010 * Init and servicing both depend on bits matching across STATUS/MASK 7011 * and IErr STATUS/LOG/REPORT/CONTAIN registers. 7012 */ 7013 void 7014 nvswitch_initialize_interrupt_tree_ls10 7015 ( 7016 nvswitch_device *device 7017 ) 7018 { 7019 NvU64 link_mask = nvswitch_get_enabled_link_mask(device); 7020 NvU32 i, val; 7021 7022 // NPG/NPORT 7023 _nvswitch_initialize_nport_interrupts_ls10(device); 7024 7025 // NXBAR 7026 _nvswitch_initialize_nxbar_interrupts_ls10(device); 7027 7028 FOR_EACH_INDEX_IN_MASK(64, i, link_mask) 7029 { 7030 val = NVSWITCH_LINK_RD32(device, i, 7031 NVLW, _NVLW, _LINK_INTR_0_MASK(i)); 7032 val = FLD_SET_DRF(_NVLW, _LINK_INTR_0_MASK, _FATAL, _ENABLE, val); 7033 val = FLD_SET_DRF(_NVLW, _LINK_INTR_0_MASK, _NONFATAL, _ENABLE, val); 7034 val = FLD_SET_DRF(_NVLW, _LINK_INTR_0_MASK, _CORRECTABLE, _ENABLE, val); 7035 val = FLD_SET_DRF(_NVLW, _LINK_INTR_0_MASK, _INTR0, _ENABLE, val); 7036 val = FLD_SET_DRF(_NVLW, _LINK_INTR_0_MASK, _INTR1, _ENABLE, val); 7037 NVSWITCH_LINK_WR32(device, i, NVLW, _NVLW, _LINK_INTR_0_MASK(i), val); 7038 } 7039 FOR_EACH_INDEX_IN_MASK_END; 7040 7041 FOR_EACH_INDEX_IN_MASK(64, i, link_mask) 7042 { 7043 val = NVSWITCH_LINK_RD32(device, i, 7044 NVLW, _NVLW, _LINK_INTR_1_MASK(i)); 7045 val = FLD_SET_DRF(_NVLW, _LINK_INTR_1_MASK, _FATAL, _ENABLE, val); 7046 val = FLD_SET_DRF(_NVLW, _LINK_INTR_1_MASK, _NONFATAL, _ENABLE, val); 7047 val = FLD_SET_DRF(_NVLW, _LINK_INTR_1_MASK, _CORRECTABLE, _ENABLE, val); 7048 val = FLD_SET_DRF(_NVLW, _LINK_INTR_1_MASK, _INTR0, _ENABLE, val); 7049 val = FLD_SET_DRF(_NVLW, _LINK_INTR_1_MASK, _INTR1, _ENABLE, val); 7050 NVSWITCH_LINK_WR32(device, i, NVLW, _NVLW, _LINK_INTR_1_MASK(i), val); 7051 } 7052 FOR_EACH_INDEX_IN_MASK_END; 7053 7054 // NVLIPT 7055 _nvswitch_initialize_nvlipt_interrupts_ls10(device); 7056 } 7057 7058 // 7059 // Service Nvswitch NVLDL Fatal interrupts 7060 // 7061 NvlStatus 7062 nvswitch_service_nvldl_fatal_link_ls10 7063 ( 7064 nvswitch_device *device, 7065 NvU32 nvlipt_instance, 7066 NvU32 link 7067 ) 7068 { 7069 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 7070 NvU32 pending, bit, unhandled; 7071 NvBool bSkipIntrClear = NV_FALSE; 7072 7073 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 7074 7075 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLDL, _NVLDL_TOP, _INTR); 7076 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLDL, _NVLDL_TOP, _INTR_STALL_EN); 7077 report.mask = report.raw_enable; 7078 pending = report.raw_pending & report.mask; 7079 7080 if (pending == 0) 7081 { 7082 return -NVL_NOT_FOUND; 7083 } 7084 7085 unhandled = pending; 7086 7087 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_FAULT_RAM, 1); 7088 if (nvswitch_test_flags(pending, bit)) 7089 { 7090 NVSWITCH_REPORT_FATAL(_HW_DLPL_TX_FAULT_RAM, "TX Fault Ram", NV_FALSE); 7091 nvswitch_clear_flags(&unhandled, bit); 7092 } 7093 7094 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_FAULT_INTERFACE, 1); 7095 if (nvswitch_test_flags(pending, bit)) 7096 { 7097 NVSWITCH_REPORT_FATAL(_HW_DLPL_TX_FAULT_INTERFACE, "TX Fault Interface", NV_FALSE); 7098 nvswitch_clear_flags(&unhandled, bit); 7099 } 7100 7101 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_FAULT_SUBLINK_CHANGE, 1); 7102 if (nvswitch_test_flags(pending, bit)) 7103 { 7104 NVSWITCH_REPORT_FATAL(_HW_DLPL_TX_FAULT_SUBLINK_CHANGE, "TX Fault Sublink Change", NV_FALSE); 7105 nvswitch_clear_flags(&unhandled, bit); 7106 } 7107 7108 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_FAULT_SUBLINK_CHANGE, 1); 7109 if (nvswitch_test_flags(pending, bit)) 7110 { 7111 NVSWITCH_REPORT_FATAL(_HW_DLPL_RX_FAULT_SUBLINK_CHANGE, "RX Fault Sublink Change", NV_FALSE); 7112 nvswitch_clear_flags(&unhandled, bit); 7113 } 7114 7115 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_FAULT_DL_PROTOCOL, 1); 7116 if (nvswitch_test_flags(pending, bit)) 7117 { 7118 NVSWITCH_REPORT_FATAL(_HW_DLPL_RX_FAULT_DL_PROTOCOL, "RX Fault DL Protocol", NV_FALSE); 7119 nvswitch_clear_flags(&unhandled, bit); 7120 } 7121 7122 bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_DOWN, 1); 7123 if (nvswitch_test_flags(pending, bit)) 7124 { 7125 NVSWITCH_REPORT_FATAL(_HW_DLPL_LTSSM_FAULT_DOWN, "LTSSM Fault Down", NV_FALSE); 7126 nvswitch_clear_flags(&unhandled, bit); 7127 } 7128 7129 bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_PROTOCOL, 1); 7130 if (nvswitch_test_flags(pending, bit)) 7131 { 7132 NVSWITCH_REPORT_FATAL(_HW_DLPL_LTSSM_PROTOCOL, "LTSSM Protocol Error", NV_FALSE); 7133 nvswitch_clear_flags(&unhandled, bit); 7134 } 7135 7136 bit = DRF_NUM(_NVLDL_TOP, _INTR, _PHY_A, 1); 7137 if (nvswitch_test_flags(pending, bit)) 7138 { 7139 NVSWITCH_REPORT_FATAL(_HW_DLPL_PHY_A, "PHY_A Error", NV_FALSE); 7140 nvswitch_clear_flags(&unhandled, bit); 7141 } 7142 7143 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_PL_ERROR, 1); 7144 if (nvswitch_test_flags(pending, bit)) 7145 { 7146 NVSWITCH_REPORT_FATAL(_HW_DLPL_TX_PL_ERROR, "TX_PL Error", NV_FALSE); 7147 nvswitch_clear_flags(&unhandled, bit); 7148 } 7149 7150 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_PL_ERROR, 1); 7151 if (nvswitch_test_flags(pending, bit)) 7152 { 7153 NVSWITCH_REPORT_FATAL(_HW_DLPL_RX_PL_ERROR, "RX_PL Error", NV_FALSE); 7154 nvswitch_clear_flags(&unhandled, bit); 7155 } 7156 7157 // 7158 // Note: LTSSM_FAULT_UP must be the last interrupt serviced in the NVLDL 7159 // Fatal tree. The last step of handling this interrupt is going into the 7160 // reset_and_drain flow for the given link which will shutdown and reset 7161 // the link. The reset portion will also wipe away any link state including 7162 // pending DL interrupts. In order to log all error before wiping that state, 7163 // service all other interrupts before this one 7164 // 7165 bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1); 7166 if (nvswitch_test_flags(pending, bit)) 7167 { 7168 7169 chip_device->deferredLinkErrors[link].fatalIntrMask.dl |= bit; 7170 _nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link); 7171 7172 nvswitch_clear_flags(&unhandled, bit); 7173 device->hal.nvswitch_reset_and_drain_links(device, NVBIT64(link)); 7174 7175 // 7176 // Since reset and drain will reset the link, including clearing 7177 // pending interrupts, skip the clear write below. There are cases 7178 // where link clocks will not be on after reset and drain so there 7179 // maybe PRI errors on writing to the register 7180 // 7181 bSkipIntrClear = NV_TRUE; 7182 } 7183 7184 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 7185 7186 // Disable interrupts that have occurred after fatal error. 7187 if (device->link[link].fatal_error_occurred) 7188 { 7189 NVSWITCH_LINK_WR32(device, link, NVLDL, _NVLDL_TOP, _INTR_STALL_EN, 7190 report.raw_enable ^ pending); 7191 } 7192 7193 if (!bSkipIntrClear) 7194 { 7195 NVSWITCH_LINK_WR32(device, link, NVLDL, _NVLDL_TOP, _INTR, pending); 7196 } 7197 7198 if (unhandled != 0) 7199 { 7200 NVSWITCH_PRINT(device, WARN, 7201 "%s: Unhandled NVLDL fatal interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 7202 __FUNCTION__, link, pending, report.raw_enable); 7203 return -NVL_MORE_PROCESSING_REQUIRED; 7204 } 7205 7206 return NVL_SUCCESS; 7207 } 7208 7209 NvlStatus 7210 nvswitch_service_minion_link_ls10 7211 ( 7212 nvswitch_device *device, 7213 NvU32 instance 7214 ) 7215 { 7216 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 7217 NvU32 pending, unhandled, minionIntr, linkIntr, reg, enabledLinks, bit; 7218 NvU32 localLinkIdx, link; 7219 7220 // 7221 // _MINION_MINION_INTR shows all interrupts currently at the host on this minion 7222 // Note: _MINIO_MINION_INTR is not used to clear link specific interrupts 7223 // 7224 minionIntr = NVSWITCH_MINION_RD32_LS10(device, instance, _MINION, _MINION_INTR); 7225 7226 // get all possible interrupting links associated with this minion 7227 report.raw_pending = DRF_VAL(_MINION, _MINION_INTR, _LINK, minionIntr); 7228 7229 // read in the enaled minion interrupts on this minion 7230 reg = NVSWITCH_MINION_RD32_LS10(device, instance, _MINION, _MINION_INTR_STALL_EN); 7231 7232 // get the links with enabled interrupts on this minion 7233 enabledLinks = DRF_VAL(_MINION, _MINION_INTR_STALL_EN, _LINK, reg); 7234 7235 report.raw_enable = enabledLinks; 7236 report.mask = report.raw_enable; 7237 7238 // pending bit field contains interrupting links after being filtered 7239 pending = report.raw_pending & report.mask; 7240 7241 if (pending == 0) 7242 { 7243 return -NVL_NOT_FOUND; 7244 } 7245 7246 unhandled = pending; 7247 7248 FOR_EACH_INDEX_IN_MASK(32, localLinkIdx, pending) 7249 { 7250 link = (instance * NVSWITCH_LINKS_PER_NVLIPT_LS10) + localLinkIdx; 7251 bit = NVBIT(localLinkIdx); 7252 7253 // read in the interrupt register for the given link 7254 linkIntr = NVSWITCH_MINION_LINK_RD32_LS10(device, link, _MINION, _NVLINK_LINK_INTR(localLinkIdx)); 7255 7256 // _STATE must be set for _CODE to be valid 7257 if (!DRF_VAL(_MINION, _NVLINK_LINK_INTR, _STATE, linkIntr)) 7258 { 7259 continue; 7260 } 7261 7262 NVSWITCH_PRINT(device, INFO, 7263 "%s: link[%d] {%d, %d} linkIntr = 0x%x\n", 7264 __FUNCTION__, link, instance, localLinkIdx, linkIntr); 7265 7266 // 7267 // _MINION_INTR_LINK is a read-only register field for the host 7268 // Host must write 1 to _NVLINK_LINK_INTR_STATE to clear the interrupt on the link 7269 // 7270 reg = DRF_NUM(_MINION, _NVLINK_LINK_INTR, _STATE, 1); 7271 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _NVLINK_LINK_INTR(localLinkIdx), reg); 7272 7273 report.data[0] = linkIntr; 7274 7275 switch(DRF_VAL(_MINION, _NVLINK_LINK_INTR, _CODE, linkIntr)) 7276 { 7277 case NV_MINION_NVLINK_LINK_INTR_CODE_NA: 7278 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link NA interrupt", NV_FALSE); 7279 break; 7280 case NV_MINION_NVLINK_LINK_INTR_CODE_SWREQ: 7281 NVSWITCH_PRINT(device, INFO, 7282 "%s: Received MINION Link SW Generate interrupt on MINION %d : link %d.\n", 7283 __FUNCTION__, instance, link); 7284 break; 7285 case NV_MINION_NVLINK_LINK_INTR_CODE_DLREQ: 7286 NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link DLREQ interrupt"); 7287 break; 7288 case NV_MINION_NVLINK_LINK_INTR_CODE_PMDISABLED: 7289 NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link PMDISABLED interrupt"); 7290 break; 7291 case NV_MINION_NVLINK_LINK_INTR_CODE_DLCMDFAULT: 7292 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link DLCMDFAULT interrupt", NV_FALSE); 7293 break; 7294 case NV_MINION_NVLINK_LINK_INTR_CODE_TLREQ: 7295 NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link TLREQ interrupt"); 7296 break; 7297 case NV_MINION_NVLINK_LINK_INTR_CODE_NOINIT: 7298 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link NOINIT interrupt", NV_FALSE); 7299 break; 7300 case NV_MINION_NVLINK_LINK_INTR_CODE_NOTIFY: 7301 NVSWITCH_PRINT(device, INFO, 7302 "%s: Received MINION NOTIFY interrupt on MINION %d : link %d.\n", 7303 __FUNCTION__, instance, link); 7304 break; 7305 case NV_MINION_NVLINK_LINK_INTR_CODE_LOCAL_CONFIG_ERR: 7306 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link Local-Config-Error interrupt", NV_FALSE); 7307 break; 7308 case NV_MINION_NVLINK_LINK_INTR_CODE_NEGOTIATION_CONFIG_ERR: 7309 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link Negotiation Config Err Interrupt", NV_FALSE); 7310 break; 7311 case NV_MINION_NVLINK_LINK_INTR_CODE_BADINIT: 7312 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link BADINIT interrupt", NV_FALSE); 7313 break; 7314 case NV_MINION_NVLINK_LINK_INTR_CODE_PMFAIL: 7315 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link PMFAIL interrupt", NV_FALSE); 7316 break; 7317 case NV_MINION_NVLINK_LINK_INTR_CODE_INBAND_BUFFER_AVAILABLE: 7318 { 7319 NVSWITCH_PRINT(device, INFO, 7320 "Received INBAND_BUFFER_AVAILABLE interrupt on MINION %d,\n", instance); 7321 nvswitch_minion_receive_inband_data_ls10(device, link); 7322 break; 7323 } 7324 7325 default: 7326 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Interrupt code unknown", NV_FALSE); 7327 } 7328 nvswitch_clear_flags(&unhandled, bit); 7329 7330 // Disable interrupt bit for the given link - fatal error ocurred before 7331 if (device->link[link].fatal_error_occurred) 7332 { 7333 enabledLinks &= ~bit; 7334 reg = DRF_NUM(_MINION, _MINION_INTR_STALL_EN, _LINK, enabledLinks); 7335 NVSWITCH_MINION_LINK_WR32_LS10(device, link, _MINION, _MINION_INTR_STALL_EN, reg); 7336 } 7337 } 7338 FOR_EACH_INDEX_IN_MASK_END; 7339 7340 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 7341 7342 if (unhandled != 0) 7343 { 7344 return -NVL_MORE_PROCESSING_REQUIRED; 7345 } 7346 7347 return NVL_SUCCESS; 7348 } 7349 7350