1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "common_nvswitch.h" 25 #include "intr_nvswitch.h" 26 #include "regkey_nvswitch.h" 27 #include "soe/soe_nvswitch.h" 28 #include "cci/cci_nvswitch.h" 29 30 #include "ls10/ls10.h" 31 #include "ls10/minion_ls10.h" 32 #include "ls10/soe_ls10.h" 33 34 #include "nvswitch/ls10/dev_ctrl_ip.h" 35 #include "nvswitch/ls10/dev_pri_masterstation_ip.h" 36 #include "nvswitch/ls10/dev_pri_hub_sys_ip.h" 37 #include "nvswitch/ls10/dev_pri_hub_sysb_ip.h" 38 #include "nvswitch/ls10/dev_pri_hub_prt_ip.h" 39 40 #include "nvswitch/ls10/dev_npg_ip.h" 41 #include "nvswitch/ls10/dev_nport_ip.h" 42 #include "nvswitch/ls10/dev_route_ip.h" 43 #include "nvswitch/ls10/dev_ingress_ip.h" 44 #include "nvswitch/ls10/dev_sourcetrack_ip.h" 45 #include "nvswitch/ls10/dev_egress_ip.h" 46 #include "nvswitch/ls10/dev_tstate_ip.h" 47 #include "nvswitch/ls10/dev_multicasttstate_ip.h" 48 #include "nvswitch/ls10/dev_reductiontstate_ip.h" 49 50 #include "nvswitch/ls10/dev_nvlw_ip.h" 51 #include "nvswitch/ls10/dev_minion_ip.h" 52 #include "nvswitch/ls10/dev_minion_ip_addendum.h" 53 #include "nvswitch/ls10/dev_cpr_ip.h" 54 #include "nvswitch/ls10/dev_nvlipt_ip.h" 55 #include "nvswitch/ls10/dev_nvlipt_lnk_ip.h" 56 #include "nvswitch/ls10/dev_nvltlc_ip.h" 57 #include "nvswitch/ls10/dev_nvldl_ip.h" 58 59 #include "nvswitch/ls10/dev_nxbar_tcp_global_ip.h" 60 #include "nvswitch/ls10/dev_nxbar_tile_ip.h" 61 #include "nvswitch/ls10/dev_nxbar_tileout_ip.h" 62 63 #include "nvswitch/ls10/dev_ctrl_ip_addendum.h" 64 65 static void _nvswitch_create_deferred_link_errors_task_ls10(nvswitch_device *device, NvU32 nvlipt_instance, NvU32 link); 66 67 static void 68 _nvswitch_construct_ecc_error_event_ls10 69 ( 70 INFOROM_NVS_ECC_ERROR_EVENT *err_event, 71 NvU32 sxid, 72 NvU32 linkId, 73 NvBool bAddressValid, 74 NvU32 address, 75 NvBool bUncErr, 76 NvU32 errorCount 77 ) 78 { 79 err_event->sxid = sxid; 80 err_event->linkId = linkId; 81 err_event->bAddressValid = bAddressValid; 82 err_event->address = address; 83 err_event->bUncErr = bUncErr; 84 err_event->errorCount = errorCount; 85 } 86 87 static void 88 _nvswitch_initialize_minion_interrupts 89 ( 90 nvswitch_device *device, 91 NvU32 instance 92 ) 93 { 94 NvU32 intrEn, localDiscoveredLinks, globalLink, i; 95 localDiscoveredLinks = 0; 96 97 // Tree 1 (non-stall) is disabled until there is a need 98 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _MINION_INTR_NONSTALL_EN, 0); 99 100 // Tree 0 (stall) is where we route _all_ MINION interrupts for now 101 intrEn = DRF_DEF(_MINION, _MINION_INTR_STALL_EN, _FATAL, _ENABLE) | 102 DRF_DEF(_MINION, _MINION_INTR_STALL_EN, _NONFATAL, _ENABLE) | 103 DRF_DEF(_MINION, _MINION_INTR_STALL_EN, _FALCON_STALL, _ENABLE) | 104 DRF_DEF(_MINION, _MINION_INTR_STALL_EN, _FALCON_NOSTALL, _DISABLE); 105 106 for (i = 0; i < NVSWITCH_LINKS_PER_MINION_LS10; ++i) 107 { 108 // get the global link number of the link we are iterating over 109 globalLink = (instance * NVSWITCH_LINKS_PER_MINION_LS10) + i; 110 111 // the link is valid place bit in link mask 112 if (device->link[globalLink].valid) 113 { 114 localDiscoveredLinks |= NVBIT(i); 115 } 116 } 117 118 intrEn = FLD_SET_DRF_NUM(_MINION, _MINION_INTR_STALL_EN, _LINK, 119 localDiscoveredLinks, intrEn); 120 121 { 122 // Disable interrupts only if explicitly requested to. Default to enable. 123 if (device->regkeys.minion_intr != NV_SWITCH_REGKEY_MINION_INTERRUPTS_DISABLE) 124 { 125 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _MINION_INTR_STALL_EN, intrEn); 126 } 127 } 128 } 129 130 static void 131 _nvswitch_initialize_nvlipt_interrupts_ls10 132 ( 133 nvswitch_device *device 134 ) 135 { 136 NvU32 i; 137 NvU32 regval = 0; 138 139 // 140 // NVLipt interrupt routing (NVLIPT_COMMON, NVLIPT_LNK, NVLDL, NVLTLC) 141 // will be initialized by MINION NVLPROD flow 142 // 143 // We must enable interrupts at the top levels in NVLW, NVLIPT_COMMON, 144 // NVLIPT_LNK and MINION 145 // 146 147 // NVLW 148 regval = DRF_NUM(_NVLW_COMMON, _INTR_0_MASK, _FATAL, 0x1) | 149 DRF_NUM(_NVLW_COMMON, _INTR_0_MASK, _NONFATAL, 0x0) | 150 DRF_NUM(_NVLW_COMMON, _INTR_0_MASK, _CORRECTABLE, 0x0) | 151 DRF_NUM(_NVLW_COMMON, _INTR_0_MASK, _INTR0, 0x1) | 152 DRF_NUM(_NVLW_COMMON, _INTR_0_MASK, _INTR1, 0x0); 153 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_COMMON, _INTR_0_MASK, regval); 154 155 regval = DRF_NUM(_NVLW_COMMON, _INTR_1_MASK, _FATAL, 0x0) | 156 DRF_NUM(_NVLW_COMMON, _INTR_1_MASK, _NONFATAL, 0x1) | 157 DRF_NUM(_NVLW_COMMON, _INTR_1_MASK, _CORRECTABLE, 0x1) | 158 DRF_NUM(_NVLW_COMMON, _INTR_1_MASK, _INTR0, 0x0) | 159 DRF_NUM(_NVLW_COMMON, _INTR_1_MASK, _INTR1, 0x1); 160 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_COMMON, _INTR_1_MASK, regval); 161 162 regval = DRF_NUM(_NVLW_COMMON, _INTR_2_MASK, _FATAL, 0x0) | 163 DRF_NUM(_NVLW_COMMON, _INTR_2_MASK, _NONFATAL, 0x0) | 164 DRF_NUM(_NVLW_COMMON, _INTR_2_MASK, _CORRECTABLE, 0x0) | 165 DRF_NUM(_NVLW_COMMON, _INTR_2_MASK, _INTR0, 0x0) | 166 DRF_NUM(_NVLW_COMMON, _INTR_2_MASK, _INTR1, 0x0); 167 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_COMMON, _INTR_2_MASK, regval); 168 169 // NVLW link 170 for (i = 0; i < NV_NVLW_LINK_INTR_0_MASK__SIZE_1; i++) 171 { 172 regval = DRF_NUM(_NVLW_LINK, _INTR_0_MASK, _FATAL, 0x1) | 173 DRF_NUM(_NVLW_LINK, _INTR_0_MASK, _NONFATAL, 0x0) | 174 DRF_NUM(_NVLW_LINK, _INTR_0_MASK, _CORRECTABLE, 0x0) | 175 DRF_NUM(_NVLW_LINK, _INTR_0_MASK, _INTR0, 0x1) | 176 DRF_NUM(_NVLW_LINK, _INTR_0_MASK, _INTR1, 0x0); 177 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_LINK, _INTR_0_MASK(i), regval); 178 179 regval = DRF_NUM(_NVLW_LINK, _INTR_1_MASK, _FATAL, 0x0) | 180 DRF_NUM(_NVLW_LINK, _INTR_1_MASK, _NONFATAL, 0x1) | 181 DRF_NUM(_NVLW_LINK, _INTR_1_MASK, _CORRECTABLE, 0x1) | 182 DRF_NUM(_NVLW_LINK, _INTR_1_MASK, _INTR0, 0x0) | 183 DRF_NUM(_NVLW_LINK, _INTR_1_MASK, _INTR1, 0x1); 184 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_LINK, _INTR_1_MASK(i), regval); 185 186 regval = DRF_NUM(_NVLW_LINK, _INTR_2_MASK, _FATAL, 0x0) | 187 DRF_NUM(_NVLW_LINK, _INTR_2_MASK, _NONFATAL, 0x0) | 188 DRF_NUM(_NVLW_LINK, _INTR_2_MASK, _CORRECTABLE, 0x0) | 189 DRF_NUM(_NVLW_LINK, _INTR_2_MASK, _INTR0, 0x0) | 190 DRF_NUM(_NVLW_LINK, _INTR_2_MASK, _INTR1, 0x0); 191 NVSWITCH_BCAST_WR32_LS10(device, NVLW, _NVLW_LINK, _INTR_2_MASK(i), regval); 192 } 193 194 // NVLIPT_COMMON 195 regval = DRF_NUM(_NVLIPT_COMMON, _INTR_CONTROL_COMMON, _INT0_EN, 0x1) | 196 DRF_NUM(_NVLIPT_COMMON, _INTR_CONTROL_COMMON, _INT1_EN, 0x1); 197 198 NVSWITCH_BCAST_WR32_LS10(device, NVLIPT, _NVLIPT_COMMON, _INTR_CONTROL_COMMON, regval); 199 200 // NVLIPT_LNK 201 regval = DRF_NUM(_NVLIPT_LNK, _INTR_CONTROL_LINK, _INT0_EN, 0x1) | 202 DRF_NUM(_NVLIPT_LNK, _INTR_CONTROL_LINK, _INT1_EN, 0x1); 203 NVSWITCH_BCAST_WR32_LS10(device, NVLIPT_LNK, _NVLIPT_LNK, _INTR_CONTROL_LINK, regval); 204 205 // NVLIPT_LNK_INTR_1 206 regval = DRF_NUM(_NVLIPT_LNK, _INTR_INT1_EN, _LINKSTATEREQUESTREADYSET, 0x1); 207 NVSWITCH_BCAST_WR32_LS10(device, NVLIPT_LNK, _NVLIPT_LNK, _INTR_INT1_EN, regval); 208 209 // MINION 210 for (i = 0; i < NUM_MINION_ENGINE_LS10; ++i) 211 { 212 if (!NVSWITCH_ENG_VALID_LS10(device, MINION, i)) 213 { 214 continue; 215 } 216 217 _nvswitch_initialize_minion_interrupts(device,i); 218 } 219 220 // CPR 221 222 regval = NVSWITCH_ENG_RD32(device, CPR, _BCAST, 0, _CPR_SYS, _ERR_LOG_EN_0); 223 regval = FLD_SET_DRF(_CPR_SYS, _ERR_LOG_EN_0, _ENGINE_RESET_ERR, __PROD, regval); 224 NVSWITCH_ENG_WR32(device, CPR, _BCAST, 0, _CPR_SYS, _ERR_LOG_EN_0, regval); 225 226 regval = DRF_DEF(_CPR_SYS, _NVLW_INTR_0_MASK, _CPR_INTR, _ENABLE) | 227 DRF_DEF(_CPR_SYS, _NVLW_INTR_0_MASK, _INTR0, _ENABLE); 228 NVSWITCH_ENG_WR32(device, CPR, _BCAST, 0, _CPR_SYS, _NVLW_INTR_0_MASK, regval); 229 230 regval = DRF_DEF(_CPR_SYS, _NVLW_INTR_1_MASK, _CPR_INTR, _DISABLE) | 231 DRF_DEF(_CPR_SYS, _NVLW_INTR_1_MASK, _INTR1, _ENABLE); 232 NVSWITCH_ENG_WR32(device, CPR, _BCAST, 0, _CPR_SYS, _NVLW_INTR_1_MASK, regval); 233 234 regval = DRF_DEF(_CPR_SYS, _NVLW_INTR_2_MASK, _CPR_INTR, _DISABLE) | 235 DRF_DEF(_CPR_SYS, _NVLW_INTR_2_MASK, _INTR2, _ENABLE); 236 NVSWITCH_ENG_WR32(device, CPR, _BCAST, 0, _CPR_SYS, _NVLW_INTR_2_MASK, regval); 237 } 238 239 static void 240 _nvswitch_initialize_route_interrupts 241 ( 242 nvswitch_device *device 243 ) 244 { 245 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 246 247 chip_device->intr_mask.route.fatal = 248 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _ROUTEBUFERR, _ENABLE) | 249 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _GLT_ECC_DBE_ERR, _ENABLE) | 250 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _PDCTRLPARERR, _ENABLE) | 251 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _NVS_ECC_DBE_ERR, _ENABLE) | 252 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _CDTPARERR, _ENABLE) | 253 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _MCRID_ECC_DBE_ERR, _ENABLE) | 254 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _EXTMCRID_ECC_DBE_ERR, _ENABLE) | 255 DRF_DEF(_ROUTE, _ERR_FATAL_REPORT_EN_0, _RAM_ECC_DBE_ERR, _ENABLE); 256 257 chip_device->intr_mask.route.nonfatal = 258 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _NOPORTDEFINEDERR, _ENABLE) | 259 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _INVALIDROUTEPOLICYERR, _ENABLE) | 260 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _GLT_ECC_LIMIT_ERR, _ENABLE) | 261 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _NVS_ECC_LIMIT_ERR, _ENABLE) | 262 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _MCRID_ECC_LIMIT_ERR, _ENABLE) | 263 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _EXTMCRID_ECC_LIMIT_ERR, _ENABLE) | 264 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _RAM_ECC_LIMIT_ERR, _ENABLE) | 265 DRF_DEF(_ROUTE, _ERR_NON_FATAL_REPORT_EN_0, _INVALID_MCRID_ERR, _ENABLE); 266 // NOTE: _MC_TRIGGER_ERR is debug-use only 267 } 268 269 static void 270 _nvswitch_initialize_ingress_interrupts 271 ( 272 nvswitch_device *device 273 ) 274 { 275 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 276 277 chip_device->intr_mask.ingress[0].fatal = 278 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _CMDDECODEERR, _ENABLE) | 279 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _EXTAREMAPTAB_ECC_DBE_ERR, _ENABLE) | 280 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _NCISOC_HDR_ECC_DBE_ERR, _ENABLE) | 281 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _INVALIDVCSET, _ENABLE) | 282 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _REMAPTAB_ECC_DBE_ERR, _ENABLE) | 283 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _RIDTAB_ECC_DBE_ERR, _ENABLE) | 284 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _RLANTAB_ECC_DBE_ERR, _ENABLE) | 285 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _NCISOC_PARITY_ERR, _ENABLE) | 286 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _EXTBREMAPTAB_ECC_DBE_ERR, _ENABLE) | 287 DRF_DEF(_INGRESS, _ERR_FATAL_REPORT_EN_0, _MCREMAPTAB_ECC_DBE_ERR, _ENABLE); 288 289 chip_device->intr_mask.ingress[0].nonfatal = 290 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _REQCONTEXTMISMATCHERR, _ENABLE) | 291 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _ACLFAIL, _ENABLE) | 292 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _NCISOC_HDR_ECC_LIMIT_ERR, _ENABLE) | 293 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _ADDRBOUNDSERR, _ENABLE) | 294 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RIDTABCFGERR, _ENABLE) | 295 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RLANTABCFGERR, _ENABLE) | 296 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _REMAPTAB_ECC_LIMIT_ERR, _ENABLE) | 297 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RIDTAB_ECC_LIMIT_ERR, _ENABLE) | 298 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RLANTAB_ECC_LIMIT_ERR, _ENABLE) | 299 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _ADDRTYPEERR, _ENABLE) | 300 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTAREMAPTAB_INDEX_ERR, _ENABLE) | 301 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTBREMAPTAB_INDEX_ERR, _ENABLE) | 302 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _MCREMAPTAB_INDEX_ERR, _ENABLE) | 303 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTAREMAPTAB_REQCONTEXTMISMATCHERR, _ENABLE) | 304 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTBREMAPTAB_REQCONTEXTMISMATCHERR, _ENABLE) | 305 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _MCREMAPTAB_REQCONTEXTMISMATCHERR, _ENABLE) | 306 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTAREMAPTAB_ACLFAIL, _ENABLE) | 307 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTBREMAPTAB_ACLFAIL, _ENABLE) | 308 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _MCREMAPTAB_ACLFAIL, _ENABLE) | 309 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTAREMAPTAB_ADDRBOUNDSERR, _ENABLE) | 310 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _EXTBREMAPTAB_ADDRBOUNDSERR, _ENABLE) | 311 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_0, _MCREMAPTAB_ADDRBOUNDSERR, _ENABLE); 312 313 chip_device->intr_mask.ingress[1].fatal = 0; 314 315 chip_device->intr_mask.ingress[1].nonfatal = 316 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _EXTAREMAPTAB_ECC_LIMIT_ERR, _ENABLE) | 317 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _EXTBREMAPTAB_ECC_LIMIT_ERR, _ENABLE) | 318 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCREMAPTAB_ECC_LIMIT_ERR, _ENABLE) | 319 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCCMDTOUCADDRERR, _ENABLE) | 320 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _READMCREFLECTMEMERR, _ENABLE) | 321 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _EXTAREMAPTAB_ADDRTYPEERR, _ENABLE) | 322 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _EXTBREMAPTAB_ADDRTYPEERR, _ENABLE) | 323 DRF_DEF(_INGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCREMAPTAB_ADDRTYPEERR, _ENABLE); 324 } 325 326 static void 327 _nvswitch_initialize_egress_interrupts 328 ( 329 nvswitch_device *device 330 ) 331 { 332 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 333 334 chip_device->intr_mask.egress[0].fatal = 335 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _EGRESSBUFERR, _ENABLE) | 336 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _PKTROUTEERR, _ENABLE) | 337 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _SEQIDERR, _ENABLE) | 338 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NXBAR_HDR_ECC_DBE_ERR, _ENABLE) | 339 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _RAM_OUT_HDR_ECC_DBE_ERR, _ENABLE) | 340 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NCISOCCREDITOVFL, _ENABLE) | 341 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _REQTGTIDMISMATCHERR, _ENABLE) | 342 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _RSPREQIDMISMATCHERR, _ENABLE) | 343 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NXBAR_HDR_PARITY_ERR, _ENABLE) | 344 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NCISOC_CREDIT_PARITY_ERR, _ENABLE) | 345 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NXBAR_FLITTYPE_MISMATCH_ERR, _ENABLE) | 346 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _CREDIT_TIME_OUT_ERR, _ENABLE) | 347 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _INVALIDVCSET_ERR, _ENABLE) | 348 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _NXBAR_SIDEBAND_PD_PARITY_ERR, _ENABLE) | 349 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _URRSPERR, _ENABLE) | 350 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_0, _HWRSPERR, _ENABLE); 351 352 chip_device->intr_mask.egress[0].nonfatal = 353 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_0, _NXBAR_HDR_ECC_LIMIT_ERR, _ENABLE) | 354 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RAM_OUT_HDR_ECC_LIMIT_ERR, _ENABLE) | 355 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_0, _PRIVRSPERR, _ENABLE) | 356 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_0, _RFU, _DISABLE); 357 358 chip_device->intr_mask.egress[1].fatal = 359 360 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_1, _MCRSPCTRLSTORE_ECC_DBE_ERR, _ENABLE) | 361 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_1, _RBCTRLSTORE_ECC_DBE_ERR, _ENABLE) | 362 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_1, _MCREDSGT_ECC_DBE_ERR, _ENABLE) | 363 DRF_DEF(_EGRESS, _ERR_FATAL_REPORT_EN_1, _MCRSP_RAM_HDR_ECC_DBE_ERR, _ENABLE); 364 365 chip_device->intr_mask.egress[1].nonfatal = 366 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, _ENABLE) | 367 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCRSPCTRLSTORE_ECC_LIMIT_ERR, _ENABLE) | 368 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _RBCTRLSTORE_ECC_LIMIT_ERR, _ENABLE) | 369 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCREDSGT_ECC_LIMIT_ERR, _ENABLE) | 370 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCREDBUF_ECC_LIMIT_ERR, _ENABLE) | 371 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCRSP_RAM_HDR_ECC_LIMIT_ERR, _ENABLE) | 372 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _NXBAR_REDUCTION_HDR_ECC_DBE_ERR, _ENABLE) | 373 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _NXBAR_REDUCTION_HDR_PARITY_ERR, _ENABLE) | 374 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _NXBAR_REDUCTION_FLITTYPE_MISMATCH_ERR, _ENABLE) | 375 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCREDBUF_ECC_DBE_ERR, _ENABLE) | 376 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _MCRSP_CNT_ERR, _ENABLE) | 377 DRF_DEF(_EGRESS, _ERR_NON_FATAL_REPORT_EN_1, _RBRSP_CNT_ERR, _ENABLE); 378 } 379 380 static void 381 _nvswitch_initialize_tstate_interrupts 382 ( 383 nvswitch_device *device 384 ) 385 { 386 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 387 388 chip_device->intr_mask.tstate.fatal = 389 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _TAGPOOLBUFERR, _ENABLE) | 390 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _TAGPOOL_ECC_DBE_ERR, _ENABLE) | 391 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTOREBUFERR, _ENABLE) | 392 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_DBE_ERR, _ENABLE) | 393 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _ATO_ERR, _ENABLE) | 394 DRF_DEF(_TSTATE, _ERR_FATAL_REPORT_EN_0, _CAMRSP_ERR, _ENABLE); 395 396 chip_device->intr_mask.tstate.nonfatal = 397 DRF_DEF(_TSTATE, _ERR_NON_FATAL_REPORT_EN_0, _TAGPOOL_ECC_LIMIT_ERR, _ENABLE) | 398 DRF_DEF(_TSTATE, _ERR_NON_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_LIMIT_ERR, _ENABLE); 399 } 400 401 static void 402 _nvswitch_initialize_sourcetrack_interrupts 403 ( 404 nvswitch_device *device 405 ) 406 { 407 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 408 409 chip_device->intr_mask.sourcetrack.fatal = 410 DRF_DEF(_SOURCETRACK, _ERR_FATAL_REPORT_EN_0, _CREQ_TCEN0_CRUMBSTORE_ECC_DBE_ERR, _ENABLE) | 411 DRF_DEF(_SOURCETRACK, _ERR_FATAL_REPORT_EN_0, _DUP_CREQ_TCEN0_TAG_ERR, _ENABLE) | 412 DRF_DEF(_SOURCETRACK, _ERR_FATAL_REPORT_EN_0, _INVALID_TCEN0_RSP_ERR, _ENABLE) | 413 DRF_DEF(_SOURCETRACK, _ERR_FATAL_REPORT_EN_0, _INVALID_TCEN1_RSP_ERR, _ENABLE) | 414 DRF_DEF(_SOURCETRACK, _ERR_FATAL_REPORT_EN_0, _SOURCETRACK_TIME_OUT_ERR, _ENABLE); 415 416 chip_device->intr_mask.sourcetrack.nonfatal = 417 DRF_DEF(_SOURCETRACK, _ERR_NON_FATAL_REPORT_EN_0, _CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, _ENABLE); 418 } 419 420 static void 421 _nvswitch_initialize_multicast_tstate_interrupts 422 ( 423 nvswitch_device *device 424 ) 425 { 426 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 427 428 chip_device->intr_mask.mc_tstate.fatal = 429 DRF_DEF(_MULTICASTTSTATE, _ERR_FATAL_REPORT_EN_0, _TAGPOOL_ECC_DBE_ERR, _ENABLE) | 430 DRF_DEF(_MULTICASTTSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTORE_BUF_OVERWRITE_ERR, _ENABLE) | 431 DRF_DEF(_MULTICASTTSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_DBE_ERR, _ENABLE); 432 433 chip_device->intr_mask.mc_tstate.nonfatal = 434 DRF_DEF(_MULTICASTTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _TAGPOOL_ECC_LIMIT_ERR, _ENABLE) | 435 DRF_DEF(_MULTICASTTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_LIMIT_ERR, _ENABLE) | 436 DRF_DEF(_MULTICASTTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _CRUMBSTORE_MCTO_ERR, _ENABLE); 437 } 438 439 static void 440 _nvswitch_initialize_reduction_tstate_interrupts 441 ( 442 nvswitch_device *device 443 ) 444 { 445 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 446 447 chip_device->intr_mask.red_tstate.fatal = 448 DRF_DEF(_REDUCTIONTSTATE, _ERR_FATAL_REPORT_EN_0, _TAGPOOL_ECC_DBE_ERR, _ENABLE) | 449 DRF_DEF(_REDUCTIONTSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTORE_BUF_OVERWRITE_ERR, _ENABLE) | 450 DRF_DEF(_REDUCTIONTSTATE, _ERR_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_DBE_ERR, _ENABLE); 451 452 chip_device->intr_mask.red_tstate.nonfatal = 453 DRF_DEF(_REDUCTIONTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _TAGPOOL_ECC_LIMIT_ERR, _ENABLE) | 454 DRF_DEF(_REDUCTIONTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _CRUMBSTORE_ECC_LIMIT_ERR, _ENABLE) | 455 DRF_DEF(_REDUCTIONTSTATE, _ERR_NON_FATAL_REPORT_EN_0, _CRUMBSTORE_RTO_ERR, _ENABLE); 456 } 457 458 void 459 _nvswitch_initialize_nport_interrupts_ls10 460 ( 461 nvswitch_device *device 462 ) 463 { 464 // Moving this L2 register access to SOE. Refer bug #3747687 465 #if 0 466 NvU32 val; 467 468 val = 469 DRF_NUM(_NPORT, _ERR_CONTROL_COMMON_NPORT, _CORRECTABLEENABLE, 1) | 470 DRF_NUM(_NPORT, _ERR_CONTROL_COMMON_NPORT, _FATALENABLE, 1) | 471 DRF_NUM(_NPORT, _ERR_CONTROL_COMMON_NPORT, _NONFATALENABLE, 1); 472 NVSWITCH_NPORT_BCAST_WR32_LS10(device, _NPORT, _ERR_CONTROL_COMMON_NPORT, val); 473 #endif // 0 474 475 _nvswitch_initialize_route_interrupts(device); 476 _nvswitch_initialize_ingress_interrupts(device); 477 _nvswitch_initialize_egress_interrupts(device); 478 _nvswitch_initialize_tstate_interrupts(device); 479 _nvswitch_initialize_sourcetrack_interrupts(device); 480 _nvswitch_initialize_multicast_tstate_interrupts(device); 481 _nvswitch_initialize_reduction_tstate_interrupts(device); 482 } 483 484 void 485 _nvswitch_initialize_nxbar_interrupts_ls10 486 ( 487 nvswitch_device *device 488 ) 489 { 490 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 491 NvU32 report_fatal; 492 493 report_fatal = 494 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_BUFFER_OVERFLOW, 1) | 495 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_BUFFER_UNDERFLOW, 1) | 496 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _EGRESS_CREDIT_OVERFLOW, 1) | 497 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _EGRESS_CREDIT_UNDERFLOW, 1) | 498 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_NON_BURSTY_PKT, 1) | 499 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_NON_STICKY_PKT, 1) | 500 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_BURST_GT_9_DATA_VC, 1) | 501 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_PKT_INVALID_DST, 1) | 502 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_PKT_PARITY_ERROR, 1) | 503 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_SIDEBAND_PARITY_ERROR, 1) | 504 DRF_NUM(_NXBAR_TILE, _ERR_FATAL_INTR_EN, _INGRESS_REDUCTION_PKT_ERROR, 1); 505 506 // Moving this L2 register access to SOE. Refer bug #3747687 507 #if 0 508 NVSWITCH_BCAST_WR32_LS10(device, NXBAR, _NXBAR_TILE, _ERR_FATAL_INTR_EN, report_fatal); 509 #endif // 0 510 511 chip_device->intr_mask.tile.fatal = report_fatal; 512 chip_device->intr_mask.tile.nonfatal = 0; 513 514 report_fatal = 515 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _INGRESS_BUFFER_OVERFLOW, 1) | 516 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _INGRESS_BUFFER_UNDERFLOW, 1) | 517 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _EGRESS_CREDIT_OVERFLOW, 1) | 518 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _EGRESS_CREDIT_UNDERFLOW, 1) | 519 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _INGRESS_NON_BURSTY_PKT, 1) | 520 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _INGRESS_NON_STICKY_PKT, 1) | 521 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _INGRESS_BURST_GT_9_DATA_VC, 1) | 522 DRF_NUM(_NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, _EGRESS_CDT_PARITY_ERROR, 1); 523 524 // Moving this L2 register access to SOE. Refer bug #3747687 525 #if 0 526 NVSWITCH_BCAST_WR32_LS10(device, NXBAR, _NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, report_fatal); 527 #endif // 0 528 529 chip_device->intr_mask.tileout.fatal = report_fatal; 530 chip_device->intr_mask.tileout.nonfatal = 0; 531 } 532 533 /* 534 * @brief Service MINION Falcon interrupts on the requested interrupt tree 535 * Falcon Interrupts are a little unique in how they are handled:#include <assert.h> 536 * IRQSTAT is used to read in interrupt status from FALCON 537 * IRQMASK is used to read in mask of interrupts 538 * IRQDEST is used to read in enabled interrupts that are routed to the HOST 539 * 540 * IRQSTAT & IRQMASK gives the pending interrupting on this minion 541 * 542 * @param[in] device MINION on this device 543 * @param[in] instance MINION instance 544 * 545 */ 546 NvlStatus 547 nvswitch_minion_service_falcon_interrupts_ls10 548 ( 549 nvswitch_device *device, 550 NvU32 instance 551 ) 552 { 553 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 554 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 555 NvU32 pending, bit, unhandled, intr, link; 556 557 link = instance * NVSWITCH_LINKS_PER_MINION_LS10; 558 report.raw_pending = NVSWITCH_MINION_RD32_LS10(device, instance, _CMINION, _FALCON_IRQSTAT); 559 report.raw_enable = chip_device->intr_minion_dest; 560 report.mask = NVSWITCH_MINION_RD32_LS10(device, instance, _CMINION, _FALCON_IRQMASK); 561 562 pending = report.raw_pending & report.mask; 563 564 if (pending == 0) 565 { 566 return -NVL_NOT_FOUND; 567 } 568 569 unhandled = pending; 570 571 bit = DRF_NUM(_CMINION_FALCON, _IRQSTAT, _WDTMR, 1); 572 if (nvswitch_test_flags(pending, bit)) 573 { 574 NVSWITCH_REPORT_FATAL(_HW_MINION_WATCHDOG, "MINION Watchdog timer ran out", NV_TRUE); 575 nvswitch_clear_flags(&unhandled, bit); 576 } 577 578 bit = DRF_NUM(_CMINION_FALCON, _IRQSTAT, _HALT, 1); 579 if (nvswitch_test_flags(pending, bit)) 580 { 581 NVSWITCH_REPORT_FATAL(_HW_MINION_HALT, "MINION HALT", NV_TRUE); 582 nvswitch_clear_flags(&unhandled, bit); 583 } 584 585 bit = DRF_NUM(_CMINION_FALCON, _IRQSTAT, _EXTERR, 1); 586 if (nvswitch_test_flags(pending, bit)) 587 { 588 NVSWITCH_REPORT_FATAL(_HW_MINION_EXTERR, "MINION EXTERR", NV_TRUE); 589 nvswitch_clear_flags(&unhandled, bit); 590 } 591 592 bit = DRF_NUM(_CMINION_FALCON, _IRQSTAT, _SWGEN0, 1); 593 if (nvswitch_test_flags(pending, bit)) 594 { 595 NVSWITCH_PRINT(device, INFO, 596 "%s: Received MINION Falcon SWGEN0 interrupt on MINION %d.\n", 597 __FUNCTION__, instance); 598 nvswitch_clear_flags(&unhandled, bit); 599 } 600 601 bit = DRF_NUM(_CMINION_FALCON, _IRQSTAT, _SWGEN1, 1); 602 if (nvswitch_test_flags(pending, bit)) 603 { 604 NVSWITCH_PRINT(device, INFO, 605 "%s: Received MINION Falcon SWGEN1 interrupt on MINION %d.\n", 606 __FUNCTION__, instance); 607 nvswitch_clear_flags(&unhandled, bit); 608 } 609 610 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 611 612 if (device->link[link].fatal_error_occurred) 613 { 614 intr = NVSWITCH_MINION_RD32_LS10(device, instance, _MINION, _MINION_INTR_STALL_EN); 615 intr = FLD_SET_DRF(_MINION, _MINION_INTR_STALL_EN, _FATAL, _DISABLE, intr); 616 intr = FLD_SET_DRF(_MINION, _MINION_INTR_STALL_EN, _FALCON_STALL, _DISABLE, intr); 617 intr = FLD_SET_DRF(_MINION, _MINION_INTR_STALL_EN, _FATAL, _DISABLE, intr); 618 intr = FLD_SET_DRF(_MINION, _MINION_INTR_STALL_EN, _NONFATAL, _DISABLE, intr); 619 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _MINION_INTR_STALL_EN, intr); 620 } 621 622 // Write to IRQSCLR to clear status of interrupt 623 NVSWITCH_MINION_WR32_LS10(device, instance, _CMINION, _FALCON_IRQSCLR, pending); 624 625 if (unhandled != 0) 626 { 627 return -NVL_MORE_PROCESSING_REQUIRED; 628 } 629 630 return NVL_SUCCESS; 631 } 632 633 /* 634 * @Brief : Send priv ring command and wait for completion 635 * 636 * @Description : 637 * 638 * @param[in] device a reference to the device to initialize 639 * @param[in] cmd encoded priv ring command 640 */ 641 static NvlStatus 642 _nvswitch_ring_master_cmd_ls10 643 ( 644 nvswitch_device *device, 645 NvU32 cmd 646 ) 647 { 648 NvU32 value; 649 NVSWITCH_TIMEOUT timeout; 650 NvBool keepPolling; 651 652 NVSWITCH_ENG_WR32(device, PRI_MASTER_RS, , 0, _PPRIV_MASTER, _RING_COMMAND, cmd); 653 654 nvswitch_timeout_create(NVSWITCH_INTERVAL_5MSEC_IN_NS, &timeout); 655 do 656 { 657 keepPolling = (nvswitch_timeout_check(&timeout)) ? NV_FALSE : NV_TRUE; 658 659 value = NVSWITCH_ENG_RD32(device, PRI_MASTER_RS, , 0, _PPRIV_MASTER, _RING_COMMAND); 660 if (FLD_TEST_DRF(_PPRIV_MASTER, _RING_COMMAND, _CMD, _NO_CMD, value)) 661 { 662 break; 663 } 664 665 nvswitch_os_sleep(1); 666 } 667 while (keepPolling); 668 669 if (!FLD_TEST_DRF(_PPRIV_MASTER, _RING_COMMAND, _CMD, _NO_CMD, value)) 670 { 671 NVSWITCH_PRINT(device, ERROR, 672 "%s: Timeout waiting for RING_COMMAND == NO_CMD (cmd=0x%x).\n", 673 __FUNCTION__, cmd); 674 return -NVL_INITIALIZATION_TOTAL_FAILURE; 675 } 676 677 return NVL_SUCCESS; 678 } 679 680 static NvlStatus 681 _nvswitch_service_priv_ring_ls10 682 ( 683 nvswitch_device *device 684 ) 685 { 686 NvU32 pending, i; 687 NVSWITCH_PRI_ERROR_LOG_TYPE pri_error; 688 NvlStatus status = NVL_SUCCESS; 689 690 pending = NVSWITCH_ENG_RD32(device, PRI_MASTER_RS, , 0, _PPRIV_MASTER, _RING_INTERRUPT_STATUS0); 691 if (pending == 0) 692 { 693 return -NVL_NOT_FOUND; 694 } 695 696 // 697 // SYS 698 // 699 700 if (FLD_TEST_DRF_NUM(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0, 701 _GBL_WRITE_ERROR_SYS, 1, pending)) 702 { 703 pri_error.addr = NVSWITCH_ENG_RD32(device, SYS_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_ADR); 704 pri_error.data = NVSWITCH_ENG_RD32(device, SYS_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_WRDAT); 705 pri_error.info = NVSWITCH_ENG_RD32(device, SYS_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_INFO); 706 pri_error.code = NVSWITCH_ENG_RD32(device, SYS_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_CODE); 707 708 NVSWITCH_REPORT_PRI_ERROR_NONFATAL(_HW_HOST_PRIV_ERROR, "PRI WRITE SYS error", NVSWITCH_PPRIV_WRITE_SYS, 0, pri_error); 709 710 NVSWITCH_PRINT(device, ERROR, 711 "SYS PRI write error addr: 0x%08x data: 0x%08x info: 0x%08x code: 0x%08x\n", 712 pri_error.addr, pri_error.data, 713 pri_error.info, pri_error.code); 714 715 pending = FLD_SET_DRF_NUM(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0, 716 _GBL_WRITE_ERROR_SYS, 0, pending); 717 } 718 719 // 720 // SYSB 721 // 722 723 if (FLD_TEST_DRF_NUM(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0, 724 _GBL_WRITE_ERROR_SYSB, 1, pending)) 725 { 726 pri_error.addr = NVSWITCH_ENG_RD32(device, SYSB_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_ADR); 727 pri_error.data = NVSWITCH_ENG_RD32(device, SYSB_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_WRDAT); 728 pri_error.info = NVSWITCH_ENG_RD32(device, SYSB_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_INFO); 729 pri_error.code = NVSWITCH_ENG_RD32(device, SYSB_PRI_HUB, , 0, _PPRIV_SYS, _PRIV_ERROR_CODE); 730 731 NVSWITCH_REPORT_PRI_ERROR_NONFATAL(_HW_HOST_PRIV_ERROR, "PRI WRITE SYSB error", NVSWITCH_PPRIV_WRITE_SYS, 1, pri_error); 732 733 NVSWITCH_PRINT(device, ERROR, 734 "SYSB PRI write error addr: 0x%08x data: 0x%08x info: 0x%08x code: 0x%08x\n", 735 pri_error.addr, pri_error.data, 736 pri_error.info, pri_error.code); 737 738 pending = FLD_SET_DRF_NUM(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0, 739 _GBL_WRITE_ERROR_SYSB, 0, pending); 740 } 741 742 // 743 // per-PRT 744 // 745 746 for (i = 0; i < NUM_PRT_PRI_HUB_ENGINE_LS10; i++) 747 { 748 if (DRF_VAL(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0, 749 _GBL_WRITE_ERROR_FBP, pending) & NVBIT(i)) 750 { 751 pri_error.addr = NVSWITCH_ENG_RD32(device, PRT_PRI_HUB, , i, _PPRIV_PRT, _PRIV_ERROR_ADR); 752 pri_error.data = NVSWITCH_ENG_RD32(device, PRT_PRI_HUB, , i, _PPRIV_PRT, _PRIV_ERROR_WRDAT); 753 pri_error.info = NVSWITCH_ENG_RD32(device, PRT_PRI_HUB, , i, _PPRIV_PRT, _PRIV_ERROR_INFO); 754 pri_error.code = NVSWITCH_ENG_RD32(device, PRT_PRI_HUB, , i, _PPRIV_PRT, _PRIV_ERROR_CODE); 755 756 NVSWITCH_REPORT_PRI_ERROR_NONFATAL(_HW_HOST_PRIV_ERROR, "PRI WRITE PRT error", NVSWITCH_PPRIV_WRITE_PRT, i, pri_error); 757 758 NVSWITCH_PRINT(device, ERROR, 759 "PRT%d PRI write error addr: 0x%08x data: 0x%08x info: 0x%08x code: 0x%08x\n", 760 i, pri_error.addr, pri_error.data, pri_error.info, pri_error.code); 761 762 pending &= ~DRF_NUM(_PPRIV_MASTER, _RING_INTERRUPT_STATUS0, 763 _GBL_WRITE_ERROR_FBP, NVBIT(i)); 764 } 765 } 766 767 if (pending != 0) 768 { 769 NVSWITCH_PRINT_SXID(device, NVSWITCH_ERR_HW_HOST_PRIV_ERROR, 770 "Fatal, Unexpected PRI error\n"); 771 NVSWITCH_LOG_FATAL_DATA(device, _HW, _HW_HOST_PRIV_ERROR, 2, 0, NV_FALSE, &pending); 772 773 NVSWITCH_PRINT(device, ERROR, 774 "Unexpected PRI error 0x%08x\n", pending); 775 return -NVL_MORE_PROCESSING_REQUIRED; 776 } 777 778 // acknowledge the interrupt to the ringmaster 779 status = _nvswitch_ring_master_cmd_ls10(device, 780 DRF_DEF(_PPRIV_MASTER, _RING_COMMAND, _CMD, _ACK_INTERRUPT)); 781 if (status != NVL_SUCCESS) 782 { 783 NVSWITCH_PRINT(device, ERROR, "Timeout ACK'ing PRI error\n"); 784 // 785 // Don't return error code -- there is nothing kernel SW can do about it if ACK failed. 786 // Likely it is PLM protected and SOE needs to handle it. 787 // 788 } 789 790 return NVL_SUCCESS; 791 } 792 793 static NvlStatus 794 _nvswitch_collect_nport_error_info_ls10 795 ( 796 nvswitch_device *device, 797 NvU32 link, 798 NVSWITCH_RAW_ERROR_LOG_TYPE *data, 799 NvU32 *idx, 800 NvU32 register_start, 801 NvU32 register_end 802 ) 803 { 804 NvU32 register_block_size; 805 NvU32 i = *idx; 806 807 if ((register_start > register_end) || 808 (register_start % sizeof(NvU32) != 0) || 809 (register_end % sizeof(NvU32) != 0)) 810 { 811 return -NVL_BAD_ARGS; 812 } 813 814 register_block_size = (register_end - register_start)/sizeof(NvU32) + 1; 815 if ((i + register_block_size > NVSWITCH_RAW_ERROR_LOG_DATA_SIZE) || 816 (register_block_size > NVSWITCH_RAW_ERROR_LOG_DATA_SIZE)) 817 { 818 return -NVL_BAD_ARGS; 819 } 820 821 do 822 { 823 data->data[i] = NVSWITCH_ENG_OFF_RD32(device, NPORT, , link, register_start); 824 register_start += sizeof(NvU32); 825 i++; 826 827 } 828 while (register_start <= register_end); 829 830 *idx = i; 831 return NVL_SUCCESS; 832 } 833 834 static void 835 _nvswitch_collect_error_info_ls10 836 ( 837 nvswitch_device *device, 838 NvU32 link, 839 NvU32 collect_flags, // NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_* 840 NVSWITCH_RAW_ERROR_LOG_TYPE *data 841 ) 842 { 843 NvU32 val; 844 NvU32 i = 0; 845 NvlStatus status = NVL_SUCCESS; 846 847 // 848 // The requested data 'collect_flags' is captured, if valid. 849 // if the error log buffer fills, then the currently captured data block 850 // could be truncated and subsequent blocks will be skipped. 851 // The 'flags' field in the log structure describes which blocks are 852 // actually captured. 853 // Captured blocks are packed, in order. 854 // 855 856 data->flags = 0; 857 858 // ROUTE 859 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME) 860 { 861 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 862 NV_ROUTE_ERR_TIMESTAMP_LOG, 863 NV_ROUTE_ERR_TIMESTAMP_LOG); 864 if (status == NVL_SUCCESS) 865 { 866 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME; 867 NVSWITCH_PRINT(device, INFO, 868 "ROUTE: TIMESTAMP: 0x%08x\n", data->data[i-1]); 869 } 870 } 871 872 val = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_HEADER_LOG_VALID); 873 if (FLD_TEST_DRF_NUM(_ROUTE, _ERR_HEADER_LOG_VALID, _HEADERVALID0, 1, val)) 874 { 875 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC) 876 { 877 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 878 NV_ROUTE_ERR_MISC_LOG_0, 879 NV_ROUTE_ERR_MISC_LOG_0); 880 if (status == NVL_SUCCESS) 881 { 882 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC; 883 NVSWITCH_PRINT(device, INFO, 884 "ROUTE: MISC: 0x%08x\n", data->data[i-1]); 885 } 886 } 887 888 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR) 889 { 890 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 891 NV_ROUTE_ERR_HEADER_LOG_4, 892 NV_ROUTE_ERR_HEADER_LOG_10); 893 if (status == NVL_SUCCESS) 894 { 895 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR; 896 NVSWITCH_PRINT(device, INFO, 897 "ROUTE: HEADER: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x,\n", 898 data->data[i-8], data->data[i-7], data->data[i-6], data->data[i-5], 899 data->data[i-4], data->data[i-3], data->data[i-2], data->data[i-1]); 900 } 901 } 902 } 903 904 // INGRESS 905 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME) 906 { 907 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 908 NV_INGRESS_ERR_TIMESTAMP_LOG, 909 NV_INGRESS_ERR_TIMESTAMP_LOG); 910 if (status == NVL_SUCCESS) 911 { 912 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME; 913 NVSWITCH_PRINT(device, INFO, 914 "INGRESS: TIMESTAMP: 0x%08x\n", data->data[i-1]); 915 } 916 } 917 918 val = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_HEADER_LOG_VALID); 919 if (FLD_TEST_DRF_NUM(_INGRESS, _ERR_HEADER_LOG_VALID, _HEADERVALID0, 1, val)) 920 { 921 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC) 922 { 923 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 924 NV_INGRESS_ERR_MISC_LOG_0, 925 NV_INGRESS_ERR_MISC_LOG_0); 926 if (status == NVL_SUCCESS) 927 { 928 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC; 929 NVSWITCH_PRINT(device, INFO, 930 "INGRESS: MISC: 0x%08x\n", data->data[i-1]); 931 } 932 } 933 934 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR) 935 { 936 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 937 NV_INGRESS_ERR_HEADER_LOG_4, 938 NV_INGRESS_ERR_HEADER_LOG_9); 939 if (status == NVL_SUCCESS) 940 { 941 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR; 942 NVSWITCH_PRINT(device, INFO, 943 "INGRESS: HEADER: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x,\n", 944 data->data[i-7], data->data[i-6], data->data[i-5], data->data[i-4], 945 data->data[i-3], data->data[i-2], data->data[i-1]); 946 } 947 } 948 } 949 950 // EGRESS 951 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME) 952 { 953 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 954 NV_EGRESS_ERR_TIMESTAMP_LOG, 955 NV_EGRESS_ERR_TIMESTAMP_LOG); 956 if (status == NVL_SUCCESS) 957 { 958 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME; 959 NVSWITCH_PRINT(device, INFO, 960 "EGRESS: TIMESTAMP: 0x%08x\n", data->data[i-1]); 961 } 962 } 963 964 val = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_HEADER_LOG_VALID); 965 if (FLD_TEST_DRF_NUM(_EGRESS, _ERR_HEADER_LOG_VALID, _HEADERVALID0, 1, val)) 966 { 967 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC) 968 { 969 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 970 NV_EGRESS_ERR_MISC_LOG_0, 971 NV_EGRESS_ERR_MISC_LOG_0); 972 if (status == NVL_SUCCESS) 973 { 974 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC; 975 NVSWITCH_PRINT(device, INFO, 976 "EGRESS: MISC: 0x%08x\n", data->data[i-1]); 977 } 978 } 979 980 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR) 981 { 982 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 983 NV_EGRESS_ERR_HEADER_LOG_4, 984 NV_EGRESS_ERR_HEADER_LOG_10); 985 if (status == NVL_SUCCESS) 986 { 987 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR; 988 NVSWITCH_PRINT(device, INFO, 989 "EGRESS: HEADER: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x\n", 990 data->data[i-7], data->data[i-6], data->data[i-5], data->data[i-4], 991 data->data[i-3], data->data[i-2], data->data[i-1]); 992 } 993 } 994 } 995 996 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_TIME) 997 { 998 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 999 NV_EGRESS_MC_ERR_TIMESTAMP_LOG, 1000 NV_EGRESS_MC_ERR_TIMESTAMP_LOG); 1001 if (status == NVL_SUCCESS) 1002 { 1003 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_TIME; 1004 NVSWITCH_PRINT(device, INFO, 1005 "EGRESS: TIME MC: 0x%08x\n", data->data[i-1]); 1006 } 1007 } 1008 1009 val = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _MC_ERR_HEADER_LOG_VALID); 1010 if (FLD_TEST_DRF_NUM(_EGRESS, _MC_ERR_HEADER_LOG_VALID, _HEADERVALID0, 1, val)) 1011 { 1012 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_MISC) 1013 { 1014 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 1015 NV_EGRESS_MC_ERR_MISC_LOG_0, 1016 NV_EGRESS_MC_ERR_MISC_LOG_0); 1017 if (status == NVL_SUCCESS) 1018 { 1019 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_MISC; 1020 NVSWITCH_PRINT(device, INFO, 1021 "EGRESS: MISC MC: 0x%08x\n", data->data[i-1]); 1022 } 1023 } 1024 1025 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_HDR) 1026 { 1027 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 1028 NV_EGRESS_MC_ERR_HEADER_LOG_4, 1029 NV_EGRESS_MC_ERR_HEADER_LOG_10); 1030 if (status == NVL_SUCCESS) 1031 { 1032 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_HDR; 1033 NVSWITCH_PRINT(device, INFO, 1034 "EGRESS MC: HEADER: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x\n", 1035 data->data[i-7], data->data[i-6], data->data[i-5], data->data[i-4], 1036 data->data[i-3], data->data[i-2], data->data[i-1]); 1037 } 1038 } 1039 } 1040 1041 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_MC_TIME) 1042 { 1043 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 1044 NV_MULTICASTTSTATE_ERR_TIMESTAMP_LOG, 1045 NV_MULTICASTTSTATE_ERR_TIMESTAMP_LOG); 1046 if (status == NVL_SUCCESS) 1047 { 1048 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_MC_TIME; 1049 NVSWITCH_PRINT(device, INFO, 1050 "MC TSTATE MC: 0x%08x\n", 1051 data->data[i-1]); 1052 } 1053 } 1054 1055 if (collect_flags & NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_RED_TIME) 1056 { 1057 status = _nvswitch_collect_nport_error_info_ls10(device, link, data, &i, 1058 NV_REDUCTIONTSTATE_ERR_TIMESTAMP_LOG, 1059 NV_REDUCTIONTSTATE_ERR_TIMESTAMP_LOG); 1060 if (status == NVL_SUCCESS) 1061 { 1062 data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_RED_TIME; 1063 NVSWITCH_PRINT(device, INFO, 1064 "MC TSTATE RED: 0x%08x\n", 1065 data->data[i-1]); 1066 } 1067 } 1068 1069 while (i < NVSWITCH_RAW_ERROR_LOG_DATA_SIZE) 1070 { 1071 data->data[i++] = 0; 1072 } 1073 } 1074 1075 static NvlStatus 1076 _nvswitch_service_route_fatal_ls10 1077 ( 1078 nvswitch_device *device, 1079 NvU32 link 1080 ) 1081 { 1082 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 1083 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 1084 NvU32 pending, bit, contain, unhandled; 1085 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 1086 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 1087 1088 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_STATUS_0); 1089 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_FATAL_REPORT_EN_0); 1090 report.mask = report.raw_enable & chip_device->intr_mask.route.fatal; 1091 pending = report.raw_pending & report.mask; 1092 1093 if (pending == 0) 1094 { 1095 return -NVL_NOT_FOUND; 1096 } 1097 1098 unhandled = pending; 1099 1100 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_FIRST_0); 1101 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_CONTAIN_EN_0); 1102 1103 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _ROUTEBUFERR, 1); 1104 if (nvswitch_test_flags(pending, bit)) 1105 { 1106 _nvswitch_collect_error_info_ls10(device, link, 1107 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME, 1108 &data); 1109 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_ROUTEBUFERR, "route buffer over/underflow", NV_FALSE); 1110 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_ROUTEBUFERR, data); 1111 nvswitch_clear_flags(&unhandled, bit); 1112 } 1113 1114 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _GLT_ECC_DBE_ERR, 1); 1115 if (nvswitch_test_flags(pending, bit)) 1116 { 1117 NvBool bAddressValid = NV_FALSE; 1118 NvU32 address = 0; 1119 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, 1120 _ERR_GLT_ECC_ERROR_ADDRESS_VALID); 1121 1122 if (FLD_TEST_DRF(_ROUTE_ERR_GLT, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 1123 addressValid)) 1124 { 1125 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, 1126 _ERR_GLT_ECC_ERROR_ADDRESS); 1127 bAddressValid = NV_TRUE; 1128 } 1129 1130 _nvswitch_collect_error_info_ls10(device, link, 1131 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME | 1132 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC | 1133 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR, 1134 &data); 1135 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_GLT_ECC_DBE_ERR, "route GLT DBE", NV_FALSE); 1136 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_GLT_ECC_DBE_ERR, data); 1137 nvswitch_clear_flags(&unhandled, bit); 1138 1139 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1140 NVSWITCH_ERR_HW_NPORT_ROUTE_GLT_ECC_DBE_ERR, link, bAddressValid, 1141 address, NV_TRUE, 1); 1142 1143 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1144 } 1145 1146 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _PDCTRLPARERR, 1); 1147 if (nvswitch_test_flags(pending, bit)) 1148 { 1149 _nvswitch_collect_error_info_ls10(device, link, 1150 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME | 1151 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC | 1152 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR, 1153 &data); 1154 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_PDCTRLPARERR, "route parity", NV_FALSE); 1155 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_PDCTRLPARERR, data); 1156 nvswitch_clear_flags(&unhandled, bit); 1157 } 1158 1159 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _NVS_ECC_DBE_ERR, 1); 1160 if (nvswitch_test_flags(pending, bit)) 1161 { 1162 _nvswitch_collect_error_info_ls10(device, link, 1163 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME | 1164 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC | 1165 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR, 1166 &data); 1167 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_NVS_ECC_DBE_ERR, "route incoming DBE", NV_FALSE); 1168 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_NVS_ECC_DBE_ERR, data); 1169 nvswitch_clear_flags(&unhandled, bit); 1170 1171 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1172 NVSWITCH_ERR_HW_NPORT_ROUTE_NVS_ECC_DBE_ERR, link, NV_FALSE, 0, 1173 NV_TRUE, 1); 1174 1175 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1176 1177 // Clear associated LIMIT_ERR interrupt 1178 if (report.raw_pending & DRF_NUM(_ROUTE, _ERR_STATUS_0, _NVS_ECC_LIMIT_ERR, 1)) 1179 { 1180 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_STATUS_0, 1181 DRF_NUM(_ROUTE, _ERR_STATUS_0, _NVS_ECC_LIMIT_ERR, 1)); 1182 } 1183 } 1184 1185 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _CDTPARERR, 1); 1186 if (nvswitch_test_flags(pending, bit)) 1187 { 1188 _nvswitch_collect_error_info_ls10(device, link, 1189 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME, 1190 &data); 1191 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_CDTPARERR, "route credit parity", NV_FALSE); 1192 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_CDTPARERR, data); 1193 nvswitch_clear_flags(&unhandled, bit); 1194 1195 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1196 NVSWITCH_ERR_HW_NPORT_ROUTE_CDTPARERR, link, NV_FALSE, 0, 1197 NV_TRUE, 1); 1198 1199 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1200 } 1201 1202 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _MCRID_ECC_DBE_ERR, 1); 1203 if (nvswitch_test_flags(pending, bit)) 1204 { 1205 _nvswitch_collect_error_info_ls10(device, link, 1206 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME | 1207 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC | 1208 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR, 1209 &data); 1210 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_MCRID_ECC_DBE_ERR, "MC route ECC", NV_FALSE); 1211 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_MCRID_ECC_DBE_ERR, data); 1212 nvswitch_clear_flags(&unhandled, bit); 1213 1214 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1215 NVSWITCH_ERR_HW_NPORT_ROUTE_MCRID_ECC_DBE_ERR, link, NV_FALSE, 0, 1216 NV_TRUE, 1); 1217 1218 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1219 } 1220 1221 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _EXTMCRID_ECC_DBE_ERR, 1); 1222 if (nvswitch_test_flags(pending, bit)) 1223 { 1224 _nvswitch_collect_error_info_ls10(device, link, 1225 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME | 1226 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC | 1227 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR, 1228 &data); 1229 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_EXTMCRID_ECC_DBE_ERR, "Extd MC route ECC", NV_FALSE); 1230 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_EXTMCRID_ECC_DBE_ERR, data); 1231 nvswitch_clear_flags(&unhandled, bit); 1232 1233 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1234 NVSWITCH_ERR_HW_NPORT_ROUTE_EXTMCRID_ECC_DBE_ERR, link, NV_FALSE, 0, 1235 NV_TRUE, 1); 1236 1237 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1238 } 1239 1240 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _RAM_ECC_DBE_ERR, 1); 1241 if (nvswitch_test_flags(pending, bit)) 1242 { 1243 _nvswitch_collect_error_info_ls10(device, link, 1244 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME | 1245 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC | 1246 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR, 1247 &data); 1248 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_ROUTE_RAM_ECC_DBE_ERR, "route RAM ECC", NV_FALSE); 1249 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_ROUTE_RAM_ECC_DBE_ERR, data); 1250 nvswitch_clear_flags(&unhandled, bit); 1251 1252 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1253 NVSWITCH_ERR_HW_NPORT_ROUTE_RAM_ECC_DBE_ERR, link, NV_FALSE, 0, 1254 NV_TRUE, 1); 1255 1256 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1257 } 1258 1259 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 1260 1261 // Disable interrupts that have occurred after fatal error. 1262 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 1263 if (device->link[link].fatal_error_occurred) 1264 { 1265 if (nvswitch_is_soe_supported(device)) 1266 { 1267 nvswitch_soe_disable_nport_fatal_interrupts_ls10(device, link, 1268 report.raw_enable & ~pending, RM_SOE_CORE_NPORT_ROUTE_INTERRUPT); 1269 } 1270 else 1271 { 1272 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_FATAL_REPORT_EN_0, 1273 report.raw_enable & ~pending); 1274 } 1275 } 1276 1277 if (report.raw_first & report.mask) 1278 { 1279 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_FIRST_0, 1280 report.raw_first & report.mask); 1281 } 1282 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_STATUS_0, pending); 1283 1284 if (unhandled != 0) 1285 { 1286 return -NVL_MORE_PROCESSING_REQUIRED; 1287 } 1288 1289 return NVL_SUCCESS; 1290 } 1291 1292 static NvlStatus 1293 _nvswitch_service_route_nonfatal_ls10 1294 ( 1295 nvswitch_device *device, 1296 NvU32 link 1297 ) 1298 { 1299 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 1300 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 1301 NvU32 pending, bit, unhandled; 1302 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 1303 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 1304 1305 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_STATUS_0); 1306 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_NON_FATAL_REPORT_EN_0); 1307 report.mask = report.raw_enable & chip_device->intr_mask.route.nonfatal; 1308 pending = report.raw_pending & report.mask; 1309 1310 if (pending == 0) 1311 { 1312 return -NVL_NOT_FOUND; 1313 } 1314 1315 unhandled = pending; 1316 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_FIRST_0); 1317 1318 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _NOPORTDEFINEDERR, 1); 1319 if (nvswitch_test_flags(pending, bit)) 1320 { 1321 _nvswitch_collect_error_info_ls10(device, link, 1322 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME | 1323 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC | 1324 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR, 1325 &data); 1326 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_NOPORTDEFINEDERR, "route undefined route"); 1327 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_NOPORTDEFINEDERR, data); 1328 nvswitch_clear_flags(&unhandled, bit); 1329 } 1330 1331 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _INVALIDROUTEPOLICYERR, 1); 1332 if (nvswitch_test_flags(pending, bit)) 1333 { 1334 _nvswitch_collect_error_info_ls10(device, link, 1335 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME | 1336 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_MISC | 1337 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR, 1338 &data); 1339 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_INVALIDROUTEPOLICYERR, "route invalid policy"); 1340 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_INVALIDROUTEPOLICYERR, data); 1341 nvswitch_clear_flags(&unhandled, bit); 1342 } 1343 1344 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _NVS_ECC_LIMIT_ERR, 1); 1345 if (nvswitch_test_flags(pending, bit)) 1346 { 1347 // Ignore LIMIT error if DBE is pending 1348 if (!(nvswitch_test_flags(report.raw_pending, 1349 DRF_NUM(_ROUTE, _ERR_STATUS_0, _NVS_ECC_DBE_ERR, 1)))) 1350 { 1351 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_NVS_ECC_ERROR_COUNTER); 1352 _nvswitch_collect_error_info_ls10(device, link, 1353 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME, 1354 &data); 1355 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, "route incoming ECC limit"); 1356 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, data); 1357 1358 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1359 NVSWITCH_ERR_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1360 NV_FALSE, 1); 1361 1362 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1363 } 1364 1365 nvswitch_clear_flags(&unhandled, bit); 1366 } 1367 1368 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _GLT_ECC_LIMIT_ERR, 1); 1369 if (nvswitch_test_flags(pending, bit)) 1370 { 1371 // Ignore LIMIT error if DBE is pending 1372 if (!(nvswitch_test_flags(report.raw_pending, 1373 DRF_NUM(_ROUTE, _ERR_STATUS_0, _GLT_ECC_DBE_ERR, 1)))) 1374 { 1375 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_GLT_ECC_ERROR_COUNTER); 1376 _nvswitch_collect_error_info_ls10(device, link, 1377 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME, 1378 &data); 1379 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, "GLT ECC limit"); 1380 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_GLT_ECC_LIMIT_ERR, data); 1381 1382 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1383 NVSWITCH_ERR_HW_NPORT_ROUTE_GLT_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1384 NV_FALSE, 1); 1385 1386 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1387 } 1388 1389 nvswitch_clear_flags(&unhandled, bit); 1390 } 1391 1392 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _MCRID_ECC_LIMIT_ERR, 1); 1393 if (nvswitch_test_flags(pending, bit)) 1394 { 1395 // Ignore LIMIT error if DBE is pending 1396 if (!(nvswitch_test_flags(report.raw_pending, 1397 DRF_NUM(_ROUTE, _ERR_STATUS_0, _MCRID_ECC_DBE_ERR, 1)))) 1398 { 1399 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_MCRID_ECC_ERROR_COUNTER); 1400 _nvswitch_collect_error_info_ls10(device, link, 1401 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME, 1402 &data); 1403 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, "MCRID ECC limit"); 1404 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_MCRID_ECC_LIMIT_ERR, data); 1405 1406 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1407 NVSWITCH_ERR_HW_NPORT_ROUTE_MCRID_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1408 NV_FALSE, 1); 1409 1410 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1411 } 1412 1413 nvswitch_clear_flags(&unhandled, bit); 1414 } 1415 1416 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _EXTMCRID_ECC_LIMIT_ERR, 1); 1417 if (nvswitch_test_flags(pending, bit)) 1418 { 1419 // Ignore LIMIT error if DBE is pending 1420 if (!(nvswitch_test_flags(report.raw_pending, 1421 DRF_NUM(_ROUTE, _ERR_STATUS_0, _EXTMCRID_ECC_DBE_ERR, 1)))) 1422 { 1423 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_EXTMCRID_ECC_ERROR_COUNTER); 1424 _nvswitch_collect_error_info_ls10(device, link, 1425 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME, 1426 &data); 1427 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_NVS_ECC_LIMIT_ERR, "EXTMCRID ECC limit"); 1428 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_EXTMCRID_ECC_LIMIT_ERR, data); 1429 1430 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1431 NVSWITCH_ERR_HW_NPORT_ROUTE_EXTMCRID_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1432 NV_FALSE, 1); 1433 1434 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1435 } 1436 1437 nvswitch_clear_flags(&unhandled, bit); 1438 } 1439 1440 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _RAM_ECC_LIMIT_ERR, 1); 1441 if (nvswitch_test_flags(pending, bit)) 1442 { 1443 // Ignore LIMIT error if DBE is pending 1444 if (!(nvswitch_test_flags(report.raw_pending, 1445 DRF_NUM(_ROUTE, _ERR_STATUS_0, _RAM_ECC_DBE_ERR, 1)))) 1446 { 1447 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _ROUTE, _ERR_RAM_ECC_ERROR_COUNTER); 1448 _nvswitch_collect_error_info_ls10(device, link, 1449 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME, 1450 &data); 1451 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_RAM_ECC_LIMIT_ERR, "RAM ECC limit"); 1452 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_RAM_ECC_LIMIT_ERR, data); 1453 1454 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1455 NVSWITCH_ERR_HW_NPORT_ROUTE_RAM_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1456 NV_FALSE, 1); 1457 1458 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1459 } 1460 1461 nvswitch_clear_flags(&unhandled, bit); 1462 } 1463 1464 bit = DRF_NUM(_ROUTE, _ERR_STATUS_0, _INVALID_MCRID_ERR, 1); 1465 if (nvswitch_test_flags(pending, bit)) 1466 { 1467 _nvswitch_collect_error_info_ls10(device, link, 1468 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_TIME, 1469 &data); 1470 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_ROUTE_INVALID_MCRID_ERR, "invalid MC route"); 1471 NVSWITCH_REPORT_DATA(_HW_NPORT_ROUTE_INVALID_MCRID_ERR, data); 1472 nvswitch_clear_flags(&unhandled, bit); 1473 } 1474 1475 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 1476 1477 // Disable interrupts that have occurred after fatal error. 1478 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 1479 if (device->link[link].fatal_error_occurred) 1480 { 1481 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_NON_FATAL_REPORT_EN_0, 1482 report.raw_enable & ~pending); 1483 } 1484 1485 if (report.raw_first & report.mask) 1486 { 1487 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_FIRST_0, 1488 report.raw_first & report.mask); 1489 } 1490 1491 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_STATUS_0, pending); 1492 1493 // 1494 // Note, when traffic is flowing, if we reset ERR_COUNT before ERR_STATUS 1495 // register, we won't see an interrupt again until counter wraps around. 1496 // In that case, we will miss writing back many ECC victim entries. Hence, 1497 // always clear _ERR_COUNT only after _ERR_STATUS register is cleared! 1498 // 1499 NVSWITCH_ENG_WR32(device, NPORT, , link, _ROUTE, _ERR_NVS_ECC_ERROR_COUNTER, 0x0); 1500 1501 if (unhandled != 0) 1502 { 1503 return -NVL_MORE_PROCESSING_REQUIRED; 1504 } 1505 1506 return NVL_SUCCESS; 1507 } 1508 1509 // 1510 // Ingress 1511 // 1512 1513 static NvlStatus 1514 _nvswitch_service_ingress_fatal_ls10 1515 ( 1516 nvswitch_device *device, 1517 NvU32 link 1518 ) 1519 { 1520 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 1521 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 1522 NvU32 pending, bit, contain, unhandled; 1523 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 1524 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 1525 1526 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_STATUS_0); 1527 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_FATAL_REPORT_EN_0); 1528 report.mask = report.raw_enable & chip_device->intr_mask.ingress[0].fatal; 1529 pending = report.raw_pending & report.mask; 1530 1531 if (pending == 0) 1532 { 1533 return -NVL_NOT_FOUND; 1534 } 1535 1536 unhandled = pending; 1537 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_FIRST_0); 1538 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_CONTAIN_EN_0); 1539 _nvswitch_collect_error_info_ls10(device, link, 1540 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME | 1541 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC | 1542 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR, 1543 &data); 1544 1545 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _CMDDECODEERR, 1); 1546 if (nvswitch_test_flags(pending, bit)) 1547 { 1548 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_CMDDECODEERR, "ingress invalid command", NV_FALSE); 1549 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_CMDDECODEERR, data); 1550 nvswitch_clear_flags(&unhandled, bit); 1551 } 1552 1553 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_ECC_DBE_ERR, 1); 1554 if (nvswitch_test_flags(pending, bit)) 1555 { 1556 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTAREMAPTAB_ECC_ERROR_COUNTER); 1557 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTAREMAPTAB_ECC_ERROR_ADDRESS); 1558 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTAREMAPTAB_ECC_ERROR_ADDRESS_VALID); 1559 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_DBE_ERR, "ingress ExtA remap DBE", NV_FALSE); 1560 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_DBE_ERR, data); 1561 nvswitch_clear_flags(&unhandled, bit); 1562 1563 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1564 NVSWITCH_ERR_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_DBE_ERR, link, NV_FALSE, 0, 1565 NV_TRUE, 1); 1566 1567 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1568 } 1569 1570 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_HDR_ECC_DBE_ERR, 1); 1571 if (nvswitch_test_flags(pending, bit)) 1572 { 1573 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_NCISOC_HDR_ECC_ERROR_COUNTER); 1574 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_NCISOC_HDR_ECC_DBE_ERR, "ingress header DBE", NV_FALSE); 1575 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_NCISOC_HDR_ECC_DBE_ERR, data); 1576 nvswitch_clear_flags(&unhandled, bit); 1577 1578 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1579 NVSWITCH_ERR_HW_NPORT_INGRESS_NCISOC_HDR_ECC_DBE_ERR, link, NV_FALSE, 0, 1580 NV_TRUE, 1); 1581 1582 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1583 1584 // Clear associated LIMIT_ERR interrupt 1585 if (report.raw_pending & DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_HDR_ECC_LIMIT_ERR, 1)) 1586 { 1587 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_STATUS_0, 1588 DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_HDR_ECC_LIMIT_ERR, 1)); 1589 } 1590 } 1591 1592 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _INVALIDVCSET, 1); 1593 if (nvswitch_test_flags(pending, bit)) 1594 { 1595 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_INVALIDVCSET, "ingress invalid VCSet", NV_FALSE); 1596 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_INVALIDVCSET, data); 1597 nvswitch_clear_flags(&unhandled, bit); 1598 } 1599 1600 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _REMAPTAB_ECC_DBE_ERR, 1); 1601 if (nvswitch_test_flags(pending, bit)) 1602 { 1603 NvBool bAddressValid = NV_FALSE; 1604 NvU32 address = 0; 1605 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, 1606 _ERR_REMAPTAB_ECC_ERROR_ADDRESS); 1607 1608 if (FLD_TEST_DRF(_INGRESS_ERR_REMAPTAB, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 1609 addressValid)) 1610 { 1611 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, 1612 _ERR_REMAPTAB_ECC_ERROR_ADDRESS); 1613 bAddressValid = NV_TRUE; 1614 } 1615 1616 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_REMAPTAB_ECC_ERROR_COUNTER); 1617 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_REMAPTAB_ECC_ERROR_ADDRESS); 1618 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_REMAPTAB_ECC_ERROR_ADDRESS_VALID); 1619 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_REMAPTAB_ECC_DBE_ERR, "ingress Remap DBE", NV_FALSE); 1620 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_REMAPTAB_ECC_DBE_ERR, data); 1621 nvswitch_clear_flags(&unhandled, bit); 1622 1623 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1624 NVSWITCH_ERR_HW_NPORT_INGRESS_REMAPTAB_ECC_DBE_ERR, link, bAddressValid, 1625 address, NV_TRUE, 1); 1626 1627 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1628 } 1629 1630 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RIDTAB_ECC_DBE_ERR, 1); 1631 if (nvswitch_test_flags(pending, bit)) 1632 { 1633 NvBool bAddressValid = NV_FALSE; 1634 NvU32 address = 0; 1635 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, 1636 _ERR_RIDTAB_ECC_ERROR_ADDRESS_VALID); 1637 1638 if (FLD_TEST_DRF(_INGRESS_ERR_RIDTAB, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 1639 addressValid)) 1640 { 1641 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, 1642 _ERR_RIDTAB_ECC_ERROR_ADDRESS); 1643 bAddressValid = NV_TRUE; 1644 } 1645 1646 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RIDTAB_ECC_ERROR_COUNTER); 1647 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RIDTAB_ECC_ERROR_ADDRESS); 1648 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RIDTAB_ECC_ERROR_ADDRESS_VALID); 1649 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_RIDTAB_ECC_DBE_ERR, "ingress RID DBE", NV_FALSE); 1650 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_RIDTAB_ECC_DBE_ERR, data); 1651 nvswitch_clear_flags(&unhandled, bit); 1652 1653 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1654 NVSWITCH_ERR_HW_NPORT_INGRESS_RIDTAB_ECC_DBE_ERR, link, bAddressValid, 1655 address, NV_TRUE, 1); 1656 1657 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1658 } 1659 1660 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RLANTAB_ECC_DBE_ERR, 1); 1661 if (nvswitch_test_flags(pending, bit)) 1662 { 1663 NvBool bAddressValid = NV_FALSE; 1664 NvU32 address = 0; 1665 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, 1666 _ERR_RLANTAB_ECC_ERROR_ADDRESS_VALID); 1667 1668 if (FLD_TEST_DRF(_INGRESS_ERR_RLANTAB, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 1669 addressValid)) 1670 { 1671 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, 1672 _ERR_RLANTAB_ECC_ERROR_ADDRESS); 1673 bAddressValid = NV_TRUE; 1674 } 1675 1676 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RLANTAB_ECC_ERROR_COUNTER); 1677 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RLANTAB_ECC_ERROR_ADDRESS); 1678 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RLANTAB_ECC_ERROR_ADDRESS_VALID); 1679 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_RLANTAB_ECC_DBE_ERR, "ingress RLAN DBE", NV_FALSE); 1680 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_RLANTAB_ECC_DBE_ERR, data); 1681 nvswitch_clear_flags(&unhandled, bit); 1682 1683 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1684 NVSWITCH_ERR_HW_NPORT_INGRESS_RLANTAB_ECC_DBE_ERR, link, bAddressValid, 1685 address, NV_TRUE, 1); 1686 1687 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1688 } 1689 1690 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_PARITY_ERR, 1); 1691 if (nvswitch_test_flags(pending, bit)) 1692 { 1693 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_NCISOC_PARITY_ERR, "ingress control parity", NV_FALSE); 1694 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_NCISOC_PARITY_ERR, data); 1695 nvswitch_clear_flags(&unhandled, bit); 1696 1697 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1698 NVSWITCH_ERR_HW_NPORT_INGRESS_NCISOC_PARITY_ERR, link, NV_FALSE, 0, 1699 NV_TRUE, 1); 1700 1701 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1702 } 1703 1704 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_ECC_DBE_ERR, 1); 1705 if (nvswitch_test_flags(pending, bit)) 1706 { 1707 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTBREMAPTAB_ECC_ERROR_COUNTER); 1708 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTBREMAPTAB_ECC_ERROR_ADDRESS); 1709 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTBREMAPTAB_ECC_ERROR_ADDRESS_VALID); 1710 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_DBE_ERR, "ingress ExtB remap DBE", NV_FALSE); 1711 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_DBE_ERR, data); 1712 nvswitch_clear_flags(&unhandled, bit); 1713 1714 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1715 NVSWITCH_ERR_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_DBE_ERR, link, NV_FALSE, 0, 1716 NV_TRUE, 1); 1717 1718 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1719 } 1720 1721 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_ECC_DBE_ERR, 1); 1722 if (nvswitch_test_flags(pending, bit)) 1723 { 1724 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_MCREMAPTAB_ECC_ERROR_COUNTER); 1725 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_MCREMAPTAB_ECC_ERROR_ADDRESS); 1726 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_MCREMAPTAB_ECC_ERROR_ADDRESS_VALID); 1727 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_INGRESS_MCREMAPTAB_ECC_DBE_ERR, "ingress MC remap DBE", NV_FALSE); 1728 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_ECC_DBE_ERR, data); 1729 nvswitch_clear_flags(&unhandled, bit); 1730 1731 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1732 NVSWITCH_ERR_HW_NPORT_INGRESS_MCREMAPTAB_ECC_DBE_ERR, link, NV_FALSE, 0, 1733 NV_TRUE, 1); 1734 1735 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1736 } 1737 1738 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 1739 1740 // Disable interrupts that have occurred after fatal error. 1741 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 1742 if (device->link[link].fatal_error_occurred) 1743 { 1744 if (nvswitch_is_soe_supported(device)) 1745 { 1746 nvswitch_soe_disable_nport_fatal_interrupts_ls10(device, link, 1747 report.raw_enable & ~pending, RM_SOE_CORE_NPORT_INGRESS_INTERRUPT); 1748 } 1749 else 1750 { 1751 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_FATAL_REPORT_EN_0, 1752 report.raw_enable & ~pending); 1753 } 1754 } 1755 1756 if (report.raw_first & report.mask) 1757 { 1758 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_FIRST_0, 1759 report.raw_first & report.mask); 1760 } 1761 1762 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_STATUS_0, pending); 1763 1764 if (unhandled != 0) 1765 { 1766 return -NVL_MORE_PROCESSING_REQUIRED; 1767 } 1768 1769 return NVL_SUCCESS; 1770 } 1771 1772 static NvlStatus 1773 _nvswitch_service_ingress_nonfatal_ls10 1774 ( 1775 nvswitch_device *device, 1776 NvU32 link 1777 ) 1778 { 1779 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 1780 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 1781 NvU32 pending, bit, unhandled; 1782 NvU32 pending_0, pending_1; 1783 NvU32 raw_pending_0; 1784 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 1785 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 1786 NvlStatus status = NVL_SUCCESS; 1787 1788 // 1789 // _ERR_STATUS_0 1790 // 1791 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_STATUS_0); 1792 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_NON_FATAL_REPORT_EN_0); 1793 report.mask = report.raw_enable & chip_device->intr_mask.ingress[0].nonfatal; 1794 1795 raw_pending_0 = report.raw_pending; 1796 pending = (report.raw_pending & report.mask); 1797 pending_0 = pending; 1798 1799 if (pending == 0) 1800 { 1801 goto _nvswitch_service_ingress_nonfatal_ls10_err_status_1; 1802 } 1803 1804 unhandled = pending; 1805 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_FIRST_0); 1806 _nvswitch_collect_error_info_ls10(device, link, 1807 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME | 1808 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC | 1809 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR, 1810 &data); 1811 1812 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _REQCONTEXTMISMATCHERR, 1); 1813 if (nvswitch_test_flags(pending, bit)) 1814 { 1815 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_REQCONTEXTMISMATCHERR, "ingress request context mismatch"); 1816 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_REQCONTEXTMISMATCHERR, data); 1817 nvswitch_clear_flags(&unhandled, bit); 1818 } 1819 1820 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _ACLFAIL, 1); 1821 if (nvswitch_test_flags(pending, bit)) 1822 { 1823 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_ACLFAIL, "ingress invalid ACL"); 1824 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_ACLFAIL, data); 1825 nvswitch_clear_flags(&unhandled, bit); 1826 } 1827 1828 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_HDR_ECC_LIMIT_ERR, 1); 1829 if (nvswitch_test_flags(pending, bit)) 1830 { 1831 // Ignore LIMIT error if DBE is pending 1832 if (!(nvswitch_test_flags(report.raw_pending, 1833 DRF_NUM(_INGRESS, _ERR_STATUS_0, _NCISOC_HDR_ECC_DBE_ERR, 1)))) 1834 { 1835 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_NCISOC_HDR_ECC_ERROR_COUNTER); 1836 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_NCISOC_HDR_ECC_LIMIT_ERR, "ingress header ECC"); 1837 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_NCISOC_HDR_ECC_LIMIT_ERR, data); 1838 1839 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1840 NVSWITCH_ERR_HW_NPORT_INGRESS_NCISOC_HDR_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1841 NV_FALSE, 1); 1842 1843 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1844 } 1845 1846 nvswitch_clear_flags(&unhandled, bit); 1847 } 1848 1849 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _ADDRBOUNDSERR, 1); 1850 if (nvswitch_test_flags(pending, bit)) 1851 { 1852 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_ADDRBOUNDSERR, "ingress address bounds"); 1853 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_ADDRBOUNDSERR, data); 1854 nvswitch_clear_flags(&unhandled, bit); 1855 } 1856 1857 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RIDTABCFGERR, 1); 1858 if (nvswitch_test_flags(pending, bit)) 1859 { 1860 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_RIDTABCFGERR, "ingress RID packet"); 1861 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_RIDTABCFGERR, data); 1862 nvswitch_clear_flags(&unhandled, bit); 1863 } 1864 1865 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RLANTABCFGERR, 1); 1866 if (nvswitch_test_flags(pending, bit)) 1867 { 1868 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_RLANTABCFGERR, "ingress RLAN packet"); 1869 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_RLANTABCFGERR, data); 1870 nvswitch_clear_flags(&unhandled, bit); 1871 } 1872 1873 1874 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _REMAPTAB_ECC_LIMIT_ERR, 1); 1875 if (nvswitch_test_flags(pending, bit)) 1876 { 1877 // Ignore LIMIT error if DBE is pending 1878 if (!(nvswitch_test_flags(report.raw_pending, 1879 DRF_NUM(_INGRESS, _ERR_STATUS_0, _REMAPTAB_ECC_DBE_ERR, 1)))) 1880 { 1881 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_REMAPTAB_ECC_ERROR_COUNTER); 1882 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_REMAPTAB_ECC_LIMIT_ERR, "ingress remap ECC"); 1883 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_REMAPTAB_ECC_LIMIT_ERR, data); 1884 1885 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1886 NVSWITCH_ERR_HW_NPORT_INGRESS_REMAPTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1887 NV_FALSE, 1); 1888 1889 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1890 } 1891 1892 nvswitch_clear_flags(&unhandled, bit); 1893 } 1894 1895 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RIDTAB_ECC_LIMIT_ERR, 1); 1896 if (nvswitch_test_flags(pending, bit)) 1897 { 1898 // Ignore LIMIT error if DBE is pending 1899 if (!(nvswitch_test_flags(report.raw_pending, 1900 DRF_NUM(_INGRESS, _ERR_STATUS_0, _RIDTAB_ECC_DBE_ERR, 1)))) 1901 { 1902 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RIDTAB_ECC_ERROR_COUNTER); 1903 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_RIDTAB_ECC_LIMIT_ERR, "ingress RID ECC"); 1904 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_RIDTAB_ECC_LIMIT_ERR, data); 1905 1906 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1907 NVSWITCH_ERR_HW_NPORT_INGRESS_RIDTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1908 NV_FALSE, 1); 1909 1910 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1911 } 1912 1913 nvswitch_clear_flags(&unhandled, bit); 1914 } 1915 1916 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _RLANTAB_ECC_LIMIT_ERR, 1); 1917 if (nvswitch_test_flags(pending, bit)) 1918 { 1919 // Ignore LIMIT error if DBE is pending 1920 if (!(nvswitch_test_flags(report.raw_pending, 1921 DRF_NUM(_INGRESS, _ERR_STATUS_0, _RLANTAB_ECC_DBE_ERR, 1)))) 1922 { 1923 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_RLANTAB_ECC_ERROR_COUNTER); 1924 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_RLANTAB_ECC_LIMIT_ERR, "ingress RLAN ECC"); 1925 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_RLANTAB_ECC_LIMIT_ERR, data); 1926 1927 _nvswitch_construct_ecc_error_event_ls10(&err_event, 1928 NVSWITCH_ERR_HW_NPORT_INGRESS_RLANTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0, 1929 NV_FALSE, 1); 1930 1931 nvswitch_inforom_ecc_log_err_event(device, &err_event); 1932 } 1933 1934 nvswitch_clear_flags(&unhandled, bit); 1935 } 1936 1937 1938 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _ADDRTYPEERR, 1); 1939 if (nvswitch_test_flags(pending, bit)) 1940 { 1941 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_ADDRTYPEERR, "ingress illegal address"); 1942 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_ADDRTYPEERR, data); 1943 nvswitch_clear_flags(&unhandled, bit); 1944 } 1945 1946 1947 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_INDEX_ERR, 1); 1948 if (nvswitch_test_flags(pending, bit)) 1949 { 1950 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_INDEX_ERR, "ingress ExtA remap index"); 1951 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_INDEX_ERR, data); 1952 nvswitch_clear_flags(&unhandled, bit); 1953 } 1954 1955 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_INDEX_ERR, 1); 1956 if (nvswitch_test_flags(pending, bit)) 1957 { 1958 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_INDEX_ERR, "ingress ExtB remap index"); 1959 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_INDEX_ERR, data); 1960 nvswitch_clear_flags(&unhandled, bit); 1961 } 1962 1963 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_INDEX_ERR, 1); 1964 if (nvswitch_test_flags(pending, bit)) 1965 { 1966 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_INDEX_ERR, "ingress MC remap index"); 1967 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_INDEX_ERR, data); 1968 nvswitch_clear_flags(&unhandled, bit); 1969 } 1970 1971 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_REQCONTEXTMISMATCHERR, 1); 1972 if (nvswitch_test_flags(pending, bit)) 1973 { 1974 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_REQCONTEXTMISMATCHERR, "ingress ExtA request context mismatch"); 1975 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_REQCONTEXTMISMATCHERR, data); 1976 nvswitch_clear_flags(&unhandled, bit); 1977 } 1978 1979 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_REQCONTEXTMISMATCHERR, 1); 1980 if (nvswitch_test_flags(pending, bit)) 1981 { 1982 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_REQCONTEXTMISMATCHERR, "ingress ExtB request context mismatch"); 1983 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_REQCONTEXTMISMATCHERR, data); 1984 nvswitch_clear_flags(&unhandled, bit); 1985 } 1986 1987 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_REQCONTEXTMISMATCHERR, 1); 1988 if (nvswitch_test_flags(pending, bit)) 1989 { 1990 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_REQCONTEXTMISMATCHERR, "ingress MC request context mismatch"); 1991 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_REQCONTEXTMISMATCHERR, data); 1992 nvswitch_clear_flags(&unhandled, bit); 1993 } 1994 1995 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_ACLFAIL, 1); 1996 if (nvswitch_test_flags(pending, bit)) 1997 { 1998 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_ACLFAIL, "ingress invalid ExtA ACL"); 1999 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_ACLFAIL, data); 2000 nvswitch_clear_flags(&unhandled, bit); 2001 } 2002 2003 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_ACLFAIL, 1); 2004 if (nvswitch_test_flags(pending, bit)) 2005 { 2006 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_ACLFAIL, "ingress invalid ExtB ACL"); 2007 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_ACLFAIL, data); 2008 nvswitch_clear_flags(&unhandled, bit); 2009 } 2010 2011 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_ACLFAIL, 1); 2012 if (nvswitch_test_flags(pending, bit)) 2013 { 2014 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_ACLFAIL, "ingress invalid MC ACL"); 2015 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_ACLFAIL, data); 2016 nvswitch_clear_flags(&unhandled, bit); 2017 } 2018 2019 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_ADDRBOUNDSERR, 1); 2020 if (nvswitch_test_flags(pending, bit)) 2021 { 2022 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_ADDRBOUNDSERR, "ingress ExtA address bounds"); 2023 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_ADDRBOUNDSERR, data); 2024 nvswitch_clear_flags(&unhandled, bit); 2025 } 2026 2027 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_ADDRBOUNDSERR, 1); 2028 if (nvswitch_test_flags(pending, bit)) 2029 { 2030 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_ADDRBOUNDSERR, "ingress ExtB address bounds"); 2031 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_ADDRBOUNDSERR, data); 2032 nvswitch_clear_flags(&unhandled, bit); 2033 } 2034 2035 bit = DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_ADDRBOUNDSERR, 1); 2036 if (nvswitch_test_flags(pending, bit)) 2037 { 2038 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_ADDRBOUNDSERR, "ingress MC address bounds"); 2039 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_ADDRBOUNDSERR, data); 2040 nvswitch_clear_flags(&unhandled, bit); 2041 } 2042 2043 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 2044 2045 // Disable interrupts that have occurred after fatal error. 2046 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 2047 if (device->link[link].fatal_error_occurred) 2048 { 2049 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_NON_FATAL_REPORT_EN_0, 2050 report.raw_enable & ~pending); 2051 } 2052 2053 if (report.raw_first & report.mask) 2054 { 2055 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_FIRST_0, 2056 report.raw_first & report.mask); 2057 } 2058 2059 if (unhandled != 0) 2060 { 2061 status = -NVL_MORE_PROCESSING_REQUIRED; 2062 } 2063 2064 _nvswitch_service_ingress_nonfatal_ls10_err_status_1: 2065 // 2066 // _ERR_STATUS_1 2067 // 2068 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_STATUS_1); 2069 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_NON_FATAL_REPORT_EN_1); 2070 report.mask = report.raw_enable & chip_device->intr_mask.ingress[1].nonfatal; 2071 2072 pending = (report.raw_pending & report.mask); 2073 pending_1 = pending; 2074 2075 if ((pending_0 == 0) && (pending_1 == 0)) 2076 { 2077 return -NVL_NOT_FOUND; 2078 } 2079 2080 unhandled = pending; 2081 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_FIRST_1); 2082 2083 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _EXTAREMAPTAB_ECC_LIMIT_ERR, 1); 2084 if (nvswitch_test_flags(pending, bit)) 2085 { 2086 // Ignore LIMIT error if DBE is pending 2087 if (!(nvswitch_test_flags(raw_pending_0, 2088 DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTAREMAPTAB_ECC_DBE_ERR, 1)))) 2089 { 2090 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTAREMAPTAB_ECC_ERROR_COUNTER); 2091 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_LIMIT_ERR, "ingress ExtA remap ECC"); 2092 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_LIMIT_ERR, data); 2093 2094 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2095 NVSWITCH_ERR_HW_NPORT_INGRESS_EXTAREMAPTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2096 NV_FALSE, 1); 2097 2098 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2099 } 2100 2101 nvswitch_clear_flags(&unhandled, bit); 2102 } 2103 2104 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _EXTBREMAPTAB_ECC_LIMIT_ERR, 1); 2105 if (nvswitch_test_flags(pending, bit)) 2106 { 2107 // Ignore LIMIT error if DBE is pending 2108 if (!(nvswitch_test_flags(raw_pending_0, 2109 DRF_NUM(_INGRESS, _ERR_STATUS_0, _EXTBREMAPTAB_ECC_DBE_ERR, 1)))) 2110 { 2111 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_EXTBREMAPTAB_ECC_ERROR_COUNTER); 2112 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_LIMIT_ERR, "ingress ExtB remap ECC"); 2113 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_LIMIT_ERR, data); 2114 2115 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2116 NVSWITCH_ERR_HW_NPORT_INGRESS_EXTBREMAPTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2117 NV_FALSE, 1); 2118 2119 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2120 } 2121 2122 nvswitch_clear_flags(&unhandled, bit); 2123 } 2124 2125 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _MCREMAPTAB_ECC_LIMIT_ERR, 1); 2126 if (nvswitch_test_flags(pending, bit)) 2127 { 2128 // Ignore LIMIT error if DBE is pending 2129 if (!(nvswitch_test_flags(raw_pending_0, 2130 DRF_NUM(_INGRESS, _ERR_STATUS_0, _MCREMAPTAB_ECC_DBE_ERR, 1)))) 2131 { 2132 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _INGRESS, _ERR_MCREMAPTAB_ECC_ERROR_COUNTER); 2133 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_ECC_LIMIT_ERR, "ingress MC remap ECC"); 2134 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_ECC_LIMIT_ERR, data); 2135 2136 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2137 NVSWITCH_ERR_HW_NPORT_INGRESS_MCREMAPTAB_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2138 NV_FALSE, 1); 2139 2140 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2141 } 2142 2143 nvswitch_clear_flags(&unhandled, bit); 2144 } 2145 2146 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _MCCMDTOUCADDRERR, 1); 2147 if (nvswitch_test_flags(pending, bit)) 2148 { 2149 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCCMDTOUCADDRERR, "ingress MC command to uc"); 2150 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCCMDTOUCADDRERR, data); 2151 nvswitch_clear_flags(&unhandled, bit); 2152 } 2153 2154 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _READMCREFLECTMEMERR, 1); 2155 if (nvswitch_test_flags(pending, bit)) 2156 { 2157 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_READMCREFLECTMEMERR, "ingress read reflective"); 2158 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_READMCREFLECTMEMERR, data); 2159 nvswitch_clear_flags(&unhandled, bit); 2160 } 2161 2162 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _EXTAREMAPTAB_ADDRTYPEERR, 1); 2163 if (nvswitch_test_flags(pending, bit)) 2164 { 2165 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTAREMAPTAB_ADDRTYPEERR, "ingress ExtA address type"); 2166 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTAREMAPTAB_ADDRTYPEERR, data); 2167 nvswitch_clear_flags(&unhandled, bit); 2168 } 2169 2170 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _EXTBREMAPTAB_ADDRTYPEERR, 1); 2171 if (nvswitch_test_flags(pending, bit)) 2172 { 2173 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_EXTBREMAPTAB_ADDRTYPEERR, "ingress ExtB address type"); 2174 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_EXTBREMAPTAB_ADDRTYPEERR, data); 2175 nvswitch_clear_flags(&unhandled, bit); 2176 } 2177 2178 bit = DRF_NUM(_INGRESS, _ERR_STATUS_1, _MCREMAPTAB_ADDRTYPEERR, 1); 2179 if (nvswitch_test_flags(pending, bit)) 2180 { 2181 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_INGRESS_MCREMAPTAB_ADDRTYPEERR, "ingress MC address type"); 2182 NVSWITCH_REPORT_DATA(_HW_NPORT_INGRESS_MCREMAPTAB_ADDRTYPEERR, data); 2183 nvswitch_clear_flags(&unhandled, bit); 2184 } 2185 2186 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 2187 2188 // Disable interrupts that have occurred after fatal error. 2189 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 2190 if (device->link[link].fatal_error_occurred) 2191 { 2192 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_NON_FATAL_REPORT_EN_1, 2193 report.raw_enable & ~pending); 2194 } 2195 2196 if (report.raw_first & report.mask) 2197 { 2198 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_FIRST_1, 2199 report.raw_first & report.mask); 2200 } 2201 2202 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_STATUS_0, pending_0); 2203 NVSWITCH_ENG_WR32(device, NPORT, , link, _INGRESS, _ERR_STATUS_1, pending_1); 2204 2205 if (unhandled != 0) 2206 { 2207 status = -NVL_MORE_PROCESSING_REQUIRED; 2208 } 2209 2210 return status; 2211 } 2212 2213 // 2214 // Tstate 2215 // 2216 2217 static NvlStatus 2218 _nvswitch_service_tstate_nonfatal_ls10 2219 ( 2220 nvswitch_device *device, 2221 NvU32 link 2222 ) 2223 { 2224 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 2225 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 2226 NvU32 pending, bit, unhandled; 2227 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 2228 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 2229 2230 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0); 2231 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_NON_FATAL_REPORT_EN_0); 2232 report.mask = report.raw_enable & chip_device->intr_mask.tstate.nonfatal; 2233 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_MISC_LOG_0); 2234 pending = report.raw_pending & report.mask; 2235 2236 if (pending == 0) 2237 { 2238 return -NVL_NOT_FOUND; 2239 } 2240 2241 unhandled = pending; 2242 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_FIRST_0); 2243 2244 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1); 2245 if (nvswitch_test_flags(pending, bit)) 2246 { 2247 // Ignore LIMIT error if DBE is pending 2248 if(!(nvswitch_test_flags(report.raw_pending, 2249 DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1)))) 2250 { 2251 NvBool bAddressValid = NV_FALSE; 2252 NvU32 address = 0; 2253 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, 2254 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID); 2255 2256 if (FLD_TEST_DRF(_TSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 2257 addressValid)) 2258 { 2259 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, 2260 _ERR_TAGPOOL_ECC_ERROR_ADDRESS); 2261 bAddressValid = NV_TRUE; 2262 } 2263 2264 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER); 2265 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, 2266 DRF_DEF(_TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 2267 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_TSTATE_TAGPOOL_ECC_LIMIT_ERR, "TS tag store single-bit threshold"); 2268 _nvswitch_collect_error_info_ls10(device, link, 2269 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME | 2270 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC | 2271 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR, 2272 &data); 2273 NVSWITCH_REPORT_DATA(_HW_NPORT_TSTATE_TAGPOOL_ECC_LIMIT_ERR, data); 2274 2275 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2276 NVSWITCH_ERR_HW_NPORT_TSTATE_TAGPOOL_ECC_LIMIT_ERR, link, 2277 bAddressValid, address, NV_FALSE, 1); 2278 2279 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2280 } 2281 2282 nvswitch_clear_flags(&unhandled, bit); 2283 } 2284 2285 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1); 2286 if (nvswitch_test_flags(pending, bit)) 2287 { 2288 // Ignore LIMIT error if DBE is pending 2289 if(!(nvswitch_test_flags(report.raw_pending, 2290 DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1)))) 2291 { 2292 NvBool bAddressValid = NV_FALSE; 2293 NvU32 address = 0; 2294 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, 2295 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 2296 2297 if (FLD_TEST_DRF(_TSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 2298 addressValid)) 2299 { 2300 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, 2301 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS); 2302 bAddressValid = NV_TRUE; 2303 } 2304 2305 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER); 2306 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, 2307 DRF_DEF(_TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 2308 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_TSTATE_CRUMBSTORE_ECC_LIMIT_ERR, "TS crumbstore single-bit threshold"); 2309 _nvswitch_collect_error_info_ls10(device, link, 2310 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME | 2311 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC | 2312 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR, 2313 &data); 2314 NVSWITCH_REPORT_DATA(_HW_NPORT_TSTATE_CRUMBSTORE_ECC_LIMIT_ERR, data); 2315 2316 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2317 NVSWITCH_ERR_HW_NPORT_TSTATE_CRUMBSTORE_ECC_LIMIT_ERR, link, 2318 bAddressValid, address, NV_FALSE, 1); 2319 2320 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2321 } 2322 2323 nvswitch_clear_flags(&unhandled, bit); 2324 } 2325 2326 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 2327 2328 // Disable interrupts that have occurred after fatal error. 2329 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 2330 if (device->link[link].fatal_error_occurred) 2331 { 2332 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_NON_FATAL_REPORT_EN_0, 2333 report.raw_enable & ~pending); 2334 } 2335 2336 if (report.raw_first & report.mask) 2337 { 2338 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_FIRST_0, 2339 report.raw_first & report.mask); 2340 } 2341 2342 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0, pending); 2343 2344 if (unhandled != 0) 2345 { 2346 return -NVL_MORE_PROCESSING_REQUIRED; 2347 } 2348 2349 return NVL_SUCCESS; 2350 } 2351 2352 static NvlStatus 2353 _nvswitch_service_tstate_fatal_ls10 2354 ( 2355 nvswitch_device *device, 2356 NvU32 link 2357 ) 2358 { 2359 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 2360 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 2361 NvU32 pending, bit, contain, unhandled; 2362 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 2363 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 2364 2365 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0); 2366 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_FATAL_REPORT_EN_0); 2367 report.mask = report.raw_enable & chip_device->intr_mask.tstate.fatal; 2368 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_MISC_LOG_0); 2369 pending = report.raw_pending & report.mask; 2370 2371 if (pending == 0) 2372 { 2373 return -NVL_NOT_FOUND; 2374 } 2375 2376 unhandled = pending; 2377 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_FIRST_0); 2378 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_CONTAIN_EN_0); 2379 2380 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOLBUFERR, 1); 2381 if (nvswitch_test_flags(pending, bit)) 2382 { 2383 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_TAGPOOLBUFERR, "TS pointer crossover", NV_FALSE); 2384 _nvswitch_collect_error_info_ls10(device, link, 2385 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME | 2386 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC | 2387 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR, 2388 &data); 2389 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_TSTATE_TAGPOOLBUFERR, data); 2390 nvswitch_clear_flags(&unhandled, bit); 2391 } 2392 2393 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1); 2394 if (nvswitch_test_flags(pending, bit)) 2395 { 2396 NvBool bAddressValid = NV_FALSE; 2397 NvU32 address = 0; 2398 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, 2399 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID); 2400 2401 if (FLD_TEST_DRF(_TSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 2402 addressValid)) 2403 { 2404 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, 2405 _ERR_TAGPOOL_ECC_ERROR_ADDRESS); 2406 bAddressValid = NV_TRUE; 2407 } 2408 2409 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER); 2410 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, 2411 DRF_DEF(_TSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 2412 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_TAGPOOL_ECC_DBE_ERR, "TS tag store fatal ECC", NV_FALSE); 2413 _nvswitch_collect_error_info_ls10(device, link, 2414 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME | 2415 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC | 2416 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR, 2417 &data); 2418 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_TSTATE_TAGPOOL_ECC_DBE_ERR, data); 2419 nvswitch_clear_flags(&unhandled, bit); 2420 2421 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2422 NVSWITCH_ERR_HW_NPORT_TSTATE_TAGPOOL_ECC_DBE_ERR, link, bAddressValid, 2423 address, NV_TRUE, 1); 2424 2425 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2426 2427 // Clear associated LIMIT_ERR interrupt 2428 if (report.raw_pending & DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1)) 2429 { 2430 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0, 2431 DRF_NUM(_TSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1)); 2432 } 2433 } 2434 2435 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTOREBUFERR, 1); 2436 if (nvswitch_test_flags(pending, bit)) 2437 { 2438 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_CRUMBSTOREBUFERR, "TS crumbstore", NV_FALSE); 2439 _nvswitch_collect_error_info_ls10(device, link, 2440 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME | 2441 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC | 2442 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR, 2443 &data); 2444 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_TSTATE_CRUMBSTOREBUFERR, data); 2445 nvswitch_clear_flags(&unhandled, bit); 2446 } 2447 2448 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1); 2449 if (nvswitch_test_flags(pending, bit)) 2450 { 2451 NvBool bAddressValid = NV_FALSE; 2452 NvU32 address = 0; 2453 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, 2454 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 2455 2456 if (FLD_TEST_DRF(_TSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 2457 addressValid)) 2458 { 2459 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, 2460 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS); 2461 bAddressValid = NV_TRUE; 2462 } 2463 2464 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER); 2465 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, 2466 DRF_DEF(_TSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 2467 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_CRUMBSTORE_ECC_DBE_ERR, "TS crumbstore fatal ECC", NV_FALSE); 2468 _nvswitch_collect_error_info_ls10(device, link, 2469 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME | 2470 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC | 2471 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR, 2472 &data); 2473 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_TSTATE_CRUMBSTORE_ECC_DBE_ERR, data); 2474 nvswitch_clear_flags(&unhandled, bit); 2475 2476 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2477 NVSWITCH_ERR_HW_NPORT_TSTATE_CRUMBSTORE_ECC_DBE_ERR, link, bAddressValid, 2478 address, NV_TRUE, 1); 2479 2480 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2481 2482 // Clear associated LIMIT_ERR interrupt 2483 if (report.raw_pending & DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1)) 2484 { 2485 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0, 2486 DRF_NUM(_TSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1)); 2487 } 2488 } 2489 2490 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _ATO_ERR, 1); 2491 if (nvswitch_test_flags(pending, bit)) 2492 { 2493 if (FLD_TEST_DRF_NUM(_TSTATE, _ERR_FIRST_0, _ATO_ERR, 1, report.raw_first)) 2494 { 2495 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _TSTATE, _ERR_DEBUG); 2496 } 2497 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_ATO_ERR, "TS ATO timeout", NV_FALSE); 2498 nvswitch_clear_flags(&unhandled, bit); 2499 } 2500 2501 bit = DRF_NUM(_TSTATE, _ERR_STATUS_0, _CAMRSP_ERR, 1); 2502 if (nvswitch_test_flags(pending, bit)) 2503 { 2504 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_TSTATE_CAMRSP_ERR, "Rsp Tag value out of range", NV_FALSE); 2505 _nvswitch_collect_error_info_ls10(device, link, 2506 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME | 2507 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC | 2508 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR, 2509 &data); 2510 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_TSTATE_CAMRSP_ERR, data); 2511 nvswitch_clear_flags(&unhandled, bit); 2512 } 2513 2514 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 2515 2516 // Disable interrupts that have occurred after fatal error. 2517 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 2518 if (device->link[link].fatal_error_occurred) 2519 { 2520 if (nvswitch_is_soe_supported(device)) 2521 { 2522 nvswitch_soe_disable_nport_fatal_interrupts_ls10(device, link, 2523 report.raw_enable & ~pending, RM_SOE_CORE_NPORT_TSTATE_INTERRUPT); 2524 } 2525 else 2526 { 2527 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_FATAL_REPORT_EN_0, 2528 report.raw_enable & ~pending); 2529 } 2530 } 2531 2532 if (report.raw_first & report.mask) 2533 { 2534 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_FIRST_0, 2535 report.raw_first & report.mask); 2536 } 2537 2538 NVSWITCH_ENG_WR32(device, NPORT, , link, _TSTATE, _ERR_STATUS_0, pending); 2539 2540 if (unhandled != 0) 2541 { 2542 return -NVL_MORE_PROCESSING_REQUIRED; 2543 } 2544 2545 return NVL_SUCCESS; 2546 } 2547 2548 // 2549 // Egress 2550 // 2551 2552 static NvlStatus 2553 _nvswitch_service_egress_nonfatal_ls10 2554 ( 2555 nvswitch_device *device, 2556 NvU32 link 2557 ) 2558 { 2559 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 2560 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 2561 NvU32 pending, bit, unhandled; 2562 NvU32 pending_0, pending_1; 2563 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 2564 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 2565 NvlStatus status = NVL_SUCCESS; 2566 2567 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0); 2568 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_NON_FATAL_REPORT_EN_0); 2569 report.mask = report.raw_enable & chip_device->intr_mask.egress[0].nonfatal; 2570 pending = report.raw_pending & report.mask; 2571 pending_0 = pending; 2572 2573 if (pending == 0) 2574 { 2575 goto _nvswitch_service_egress_nonfatal_ls10_err_status_1; 2576 } 2577 2578 unhandled = pending; 2579 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FIRST_0); 2580 _nvswitch_collect_error_info_ls10(device, link, 2581 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME | 2582 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC | 2583 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR, 2584 &data); 2585 2586 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_ECC_LIMIT_ERR, 1); 2587 if (nvswitch_test_flags(pending, bit)) 2588 { 2589 // Ignore LIMIT error if DBE is pending 2590 if (!(nvswitch_test_flags(report.raw_pending, 2591 DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_ECC_DBE_ERR, 1)))) 2592 { 2593 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_NXBAR_ECC_ERROR_COUNTER); 2594 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_NXBAR_HDR_ECC_LIMIT_ERR, "egress input ECC error limit"); 2595 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_NXBAR_HDR_ECC_LIMIT_ERR, data); 2596 2597 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2598 NVSWITCH_ERR_HW_NPORT_EGRESS_NXBAR_HDR_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2599 NV_FALSE, 1); 2600 2601 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2602 } 2603 2604 nvswitch_clear_flags(&unhandled, bit); 2605 } 2606 2607 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _RAM_OUT_HDR_ECC_LIMIT_ERR, 1); 2608 if (nvswitch_test_flags(pending, bit)) 2609 { 2610 // Ignore LIMIT error if DBE is pending 2611 if(!(nvswitch_test_flags(report.raw_pending, 2612 DRF_NUM(_EGRESS, _ERR_STATUS_0, _RAM_OUT_HDR_ECC_DBE_ERR, 1)))) 2613 { 2614 NvBool bAddressValid = NV_FALSE; 2615 NvU32 address = 0; 2616 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, 2617 _ERR_RAM_OUT_ECC_ERROR_ADDRESS_VALID); 2618 2619 if (FLD_TEST_DRF(_EGRESS_ERR_RAM_OUT, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 2620 addressValid)) 2621 { 2622 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, 2623 _ERR_RAM_OUT_ECC_ERROR_ADDRESS); 2624 bAddressValid = NV_TRUE; 2625 } 2626 2627 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_RAM_OUT_ECC_ERROR_COUNTER); 2628 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_RAM_OUT_ECC_ERROR_ADDRESS); 2629 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_LIMIT_ERR, "egress output ECC error limit"); 2630 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_LIMIT_ERR, data); 2631 2632 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2633 NVSWITCH_ERR_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_LIMIT_ERR, link, bAddressValid, address, 2634 NV_FALSE, 1); 2635 2636 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2637 } 2638 2639 nvswitch_clear_flags(&unhandled, bit); 2640 } 2641 2642 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _PRIVRSPERR, 1); 2643 if (nvswitch_test_flags(pending, bit)) 2644 { 2645 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_PRIVRSPERR, "egress non-posted PRIV error"); 2646 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_PRIVRSPERR, data); 2647 nvswitch_clear_flags(&unhandled, bit); 2648 } 2649 2650 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 2651 2652 // Disable interrupts that have occurred after fatal error. 2653 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 2654 if (device->link[link].fatal_error_occurred) 2655 { 2656 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_NON_FATAL_REPORT_EN_0, 2657 report.raw_enable & ~pending); 2658 } 2659 2660 if (report.raw_first & report.mask) 2661 { 2662 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FIRST_0, 2663 report.raw_first & report.mask); 2664 } 2665 2666 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0, pending); 2667 2668 // HACK: Clear all pending interrupts! 2669 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0, 0xFFFFFFFF); 2670 2671 if (unhandled != 0) 2672 { 2673 status = -NVL_MORE_PROCESSING_REQUIRED; 2674 } 2675 2676 _nvswitch_service_egress_nonfatal_ls10_err_status_1: 2677 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1); 2678 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_NON_FATAL_REPORT_EN_1); 2679 report.mask = report.raw_enable & chip_device->intr_mask.egress[1].nonfatal; 2680 pending = report.raw_pending & report.mask; 2681 pending_1 = pending; 2682 2683 if ((pending_0 == 0) && (pending_1 == 0)) 2684 { 2685 return -NVL_NOT_FOUND; 2686 } 2687 2688 unhandled = pending; 2689 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FIRST_1); 2690 2691 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, 1); 2692 if (nvswitch_test_flags(pending, bit)) 2693 { 2694 // Ignore LIMIT error if DBE is pending 2695 if (!(nvswitch_test_flags(report.raw_pending, 2696 DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_ECC_DBE_ERR, 1)))) 2697 { 2698 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, "egress reduction header ECC error limit"); 2699 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_NXBAR_HDR_ECC_LIMIT_ERR, data); 2700 2701 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2702 NVSWITCH_ERR_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2703 NV_FALSE, 1); 2704 2705 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2706 } 2707 2708 nvswitch_clear_flags(&unhandled, bit); 2709 } 2710 2711 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSPCTRLSTORE_ECC_LIMIT_ERR, 1); 2712 if (nvswitch_test_flags(pending, bit)) 2713 { 2714 // Ignore LIMIT error if DBE is pending 2715 if (!(nvswitch_test_flags(report.raw_pending, 2716 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSPCTRLSTORE_ECC_DBE_ERR, 1)))) 2717 { 2718 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_LIMIT_ERR, "egress MC response ECC error limit"); 2719 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_LIMIT_ERR, data); 2720 2721 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2722 NVSWITCH_ERR_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2723 NV_FALSE, 1); 2724 2725 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2726 } 2727 2728 nvswitch_clear_flags(&unhandled, bit); 2729 } 2730 2731 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBCTRLSTORE_ECC_LIMIT_ERR, 1); 2732 if (nvswitch_test_flags(pending, bit)) 2733 { 2734 // Ignore LIMIT error if DBE is pending 2735 if (!(nvswitch_test_flags(report.raw_pending, 2736 DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBCTRLSTORE_ECC_DBE_ERR, 1)))) 2737 { 2738 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_LIMIT_ERR, "egress RB ECC error limit"); 2739 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_LIMIT_ERR, data); 2740 2741 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2742 NVSWITCH_ERR_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2743 NV_FALSE, 1); 2744 2745 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2746 } 2747 2748 nvswitch_clear_flags(&unhandled, bit); 2749 } 2750 2751 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDSGT_ECC_LIMIT_ERR, 1); 2752 if (nvswitch_test_flags(pending, bit)) 2753 { 2754 // Ignore LIMIT error if DBE is pending 2755 if (!(nvswitch_test_flags(report.raw_pending, 2756 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDSGT_ECC_DBE_ERR, 1)))) 2757 { 2758 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCREDSGT_ECC_LIMIT_ERR, "egress RSG ECC error limit"); 2759 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCREDSGT_ECC_LIMIT_ERR, data); 2760 2761 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2762 NVSWITCH_ERR_HW_NPORT_EGRESS_MCREDSGT_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2763 NV_FALSE, 1); 2764 2765 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2766 } 2767 2768 nvswitch_clear_flags(&unhandled, bit); 2769 } 2770 2771 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDBUF_ECC_LIMIT_ERR, 1); 2772 if (nvswitch_test_flags(pending, bit)) 2773 { 2774 // Ignore LIMIT error if DBE is pending 2775 if (!(nvswitch_test_flags(report.raw_pending, 2776 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDBUF_ECC_DBE_ERR, 1)))) 2777 { 2778 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCREDBUF_ECC_LIMIT_ERR, "egress MCRB ECC error limit"); 2779 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCREDBUF_ECC_LIMIT_ERR, data); 2780 2781 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2782 NVSWITCH_ERR_HW_NPORT_EGRESS_MCREDBUF_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2783 NV_FALSE, 1); 2784 2785 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2786 } 2787 2788 nvswitch_clear_flags(&unhandled, bit); 2789 } 2790 2791 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_RAM_HDR_ECC_LIMIT_ERR, 1); 2792 if (nvswitch_test_flags(pending, bit)) 2793 { 2794 // Ignore LIMIT error if DBE is pending 2795 if (!(nvswitch_test_flags(report.raw_pending, 2796 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_RAM_HDR_ECC_DBE_ERR, 1)))) 2797 { 2798 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_LIMIT_ERR, "egress MC header ECC error limit"); 2799 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_LIMIT_ERR, data); 2800 2801 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2802 NVSWITCH_ERR_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_LIMIT_ERR, link, NV_FALSE, 0, 2803 NV_FALSE, 1); 2804 2805 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2806 } 2807 2808 nvswitch_clear_flags(&unhandled, bit); 2809 } 2810 2811 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_ECC_DBE_ERR, 1); 2812 if (nvswitch_test_flags(pending, bit)) 2813 { 2814 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_ECC_DBE_ERR, "egress reduction header ECC DBE error"); 2815 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_ECC_DBE_ERR, data); 2816 nvswitch_clear_flags(&unhandled, bit); 2817 2818 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2819 NVSWITCH_ERR_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_ECC_DBE_ERR, link, NV_FALSE, 0, 2820 NV_TRUE, 1); 2821 2822 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2823 2824 // Clear associated LIMIT_ERR interrupt 2825 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, 1)) 2826 { 2827 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 2828 DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_ECC_LIMIT_ERR, 1)); 2829 } 2830 } 2831 2832 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_HDR_PARITY_ERR, 1); 2833 if (nvswitch_test_flags(pending, bit)) 2834 { 2835 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_PARITY_ERR, "egress reduction header parity error"); 2836 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_NXBAR_REDUCTION_HDR_PARITY_ERR, data); 2837 nvswitch_clear_flags(&unhandled, bit); 2838 } 2839 2840 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _NXBAR_REDUCTION_FLITTYPE_MISMATCH_ERR, 1); 2841 if (nvswitch_test_flags(pending, bit)) 2842 { 2843 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_NXBAR_REDUCTION_FLITTYPE_MISMATCH_ERR, "egress reduction flit mismatch error"); 2844 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_NXBAR_REDUCTION_FLITTYPE_MISMATCH_ERR, data); 2845 nvswitch_clear_flags(&unhandled, bit); 2846 } 2847 2848 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDBUF_ECC_DBE_ERR, 1); 2849 if (nvswitch_test_flags(pending, bit)) 2850 { 2851 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCREDBUF_ECC_DBE_ERR, "egress reduction buffer ECC DBE error"); 2852 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCREDBUF_ECC_DBE_ERR, data); 2853 nvswitch_clear_flags(&unhandled, bit); 2854 2855 _nvswitch_construct_ecc_error_event_ls10(&err_event, 2856 NVSWITCH_ERR_HW_NPORT_EGRESS_MCREDBUF_ECC_DBE_ERR, link, NV_FALSE, 0, 2857 NV_TRUE, 1); 2858 2859 nvswitch_inforom_ecc_log_err_event(device, &err_event); 2860 2861 // Clear associated LIMIT_ERR interrupt 2862 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDBUF_ECC_LIMIT_ERR, 1)) 2863 { 2864 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 2865 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDBUF_ECC_LIMIT_ERR, 1)); 2866 } 2867 } 2868 2869 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_CNT_ERR, 1); 2870 if (nvswitch_test_flags(pending, bit)) 2871 { 2872 _nvswitch_collect_error_info_ls10(device, link, 2873 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_TIME | 2874 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_MISC | 2875 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_HDR, 2876 &data); 2877 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_MCRSP_CNT_ERR, "egress MC response count error"); 2878 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_MCRSP_CNT_ERR, data); 2879 nvswitch_clear_flags(&unhandled, bit); 2880 } 2881 2882 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBRSP_CNT_ERR, 1); 2883 if (nvswitch_test_flags(pending, bit)) 2884 { 2885 _nvswitch_collect_error_info_ls10(device, link, 2886 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_TIME | 2887 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_MISC | 2888 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MC_HDR, 2889 &data); 2890 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_EGRESS_RBRSP_CNT_ERR, "egress reduction response count error"); 2891 NVSWITCH_REPORT_DATA(_HW_NPORT_EGRESS_RBRSP_CNT_ERR, data); 2892 nvswitch_clear_flags(&unhandled, bit); 2893 } 2894 2895 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 2896 2897 // Disable interrupts that have occurred after fatal error. 2898 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 2899 if (device->link[link].fatal_error_occurred) 2900 { 2901 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_NON_FATAL_REPORT_EN_1, 2902 report.raw_enable & ~pending); 2903 } 2904 2905 if (report.raw_first & report.mask) 2906 { 2907 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FIRST_1, 2908 report.raw_first & report.mask); 2909 } 2910 2911 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, pending); 2912 2913 // Clear all pending interrupts! 2914 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 0xFFFFFFFF); 2915 2916 if (unhandled != 0) 2917 { 2918 status = -NVL_MORE_PROCESSING_REQUIRED; 2919 } 2920 2921 return status; 2922 } 2923 2924 static NvlStatus 2925 _nvswitch_service_egress_fatal_ls10 2926 ( 2927 nvswitch_device *device, 2928 NvU32 link 2929 ) 2930 { 2931 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 2932 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 2933 NvU32 pending, bit, contain, unhandled; 2934 NvU32 pending_0, pending_1; 2935 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 2936 NVSWITCH_RAW_ERROR_LOG_TYPE credit_data = {0, { 0 }}; 2937 NVSWITCH_RAW_ERROR_LOG_TYPE buffer_data = {0, { 0 }}; 2938 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 2939 NvlStatus status = NVL_SUCCESS; 2940 2941 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0); 2942 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FATAL_REPORT_EN_0); 2943 report.mask = report.raw_enable & chip_device->intr_mask.egress[0].fatal; 2944 pending = report.raw_pending & report.mask; 2945 pending_0 = pending; 2946 2947 if (pending == 0) 2948 { 2949 goto _nvswitch_service_egress_fatal_ls10_err_status_1; 2950 } 2951 2952 unhandled = pending; 2953 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FIRST_0); 2954 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_CONTAIN_EN_0); 2955 _nvswitch_collect_error_info_ls10(device, link, 2956 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME | 2957 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC | 2958 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR, 2959 &data); 2960 2961 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _EGRESSBUFERR, 1); 2962 if (nvswitch_test_flags(pending, bit)) 2963 { 2964 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_EGRESSBUFERR, "egress crossbar overflow", NV_TRUE); 2965 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_EGRESSBUFERR, data); 2966 2967 buffer_data.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS0); 2968 buffer_data.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS1); 2969 buffer_data.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS2); 2970 buffer_data.data[3] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS3); 2971 buffer_data.data[4] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS4); 2972 buffer_data.data[5] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS5); 2973 buffer_data.data[6] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS6); 2974 buffer_data.data[7] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _BUFFER_POINTERS7); 2975 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_EGRESSBUFERR, buffer_data); 2976 nvswitch_clear_flags(&unhandled, bit); 2977 } 2978 2979 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _PKTROUTEERR, 1); 2980 if (nvswitch_test_flags(pending, bit)) 2981 { 2982 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_PKTROUTEERR, "egress packet route", NV_TRUE); 2983 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_PKTROUTEERR, data); 2984 nvswitch_clear_flags(&unhandled, bit); 2985 } 2986 2987 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _SEQIDERR, 1); 2988 if (nvswitch_test_flags(pending, bit)) 2989 { 2990 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_SEQIDERR, "egress sequence ID error", NV_TRUE); 2991 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_SEQIDERR, data); 2992 nvswitch_clear_flags(&unhandled, bit); 2993 } 2994 2995 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_ECC_DBE_ERR, 1); 2996 if (nvswitch_test_flags(pending, bit)) 2997 { 2998 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NXBAR_HDR_ECC_DBE_ERR, "egress input ECC DBE error", NV_FALSE); 2999 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NXBAR_HDR_ECC_DBE_ERR, data); 3000 nvswitch_clear_flags(&unhandled, bit); 3001 3002 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3003 NVSWITCH_ERR_HW_NPORT_EGRESS_NXBAR_HDR_ECC_DBE_ERR, link, NV_FALSE, 0, 3004 NV_TRUE, 1); 3005 3006 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3007 3008 // Clear associated LIMIT_ERR interrupt 3009 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_ECC_LIMIT_ERR, 1)) 3010 { 3011 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0, 3012 DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_ECC_LIMIT_ERR, 1)); 3013 } 3014 } 3015 3016 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _RAM_OUT_HDR_ECC_DBE_ERR, 1); 3017 if (nvswitch_test_flags(pending, bit)) 3018 { 3019 NvBool bAddressValid = NV_FALSE; 3020 NvU32 address = 0; 3021 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, 3022 _ERR_RAM_OUT_ECC_ERROR_ADDRESS_VALID); 3023 3024 if (FLD_TEST_DRF(_EGRESS_ERR_RAM_OUT, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 3025 addressValid)) 3026 { 3027 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, 3028 _ERR_RAM_OUT_ECC_ERROR_ADDRESS); 3029 bAddressValid = NV_TRUE; 3030 } 3031 3032 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_DBE_ERR, "egress output ECC DBE error", NV_FALSE); 3033 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_DBE_ERR, data); 3034 nvswitch_clear_flags(&unhandled, bit); 3035 3036 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3037 NVSWITCH_ERR_HW_NPORT_EGRESS_RAM_OUT_HDR_ECC_DBE_ERR, link, bAddressValid, 3038 address, NV_TRUE, 1); 3039 3040 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3041 3042 // Clear associated LIMIT_ERR interrupt 3043 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_0, _RAM_OUT_HDR_ECC_LIMIT_ERR, 1)) 3044 { 3045 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0, 3046 DRF_NUM(_EGRESS, _ERR_STATUS_0, _RAM_OUT_HDR_ECC_LIMIT_ERR, 1)); 3047 } 3048 } 3049 3050 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NCISOCCREDITOVFL, 1); 3051 if (nvswitch_test_flags(pending, bit)) 3052 { 3053 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NCISOCCREDITOVFL, "egress credit overflow", NV_FALSE); 3054 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NCISOCCREDITOVFL, data); 3055 3056 credit_data.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT0); 3057 credit_data.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT1); 3058 credit_data.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT2); 3059 credit_data.data[3] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT3); 3060 credit_data.data[4] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT4); 3061 credit_data.data[5] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT5); 3062 credit_data.data[6] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT6); 3063 credit_data.data[7] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT7); 3064 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NCISOCCREDITOVFL, credit_data); 3065 nvswitch_clear_flags(&unhandled, bit); 3066 } 3067 3068 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _REQTGTIDMISMATCHERR, 1); 3069 if (nvswitch_test_flags(pending, bit)) 3070 { 3071 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_REQTGTIDMISMATCHERR, "egress destination request ID error", NV_FALSE); 3072 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_REQTGTIDMISMATCHERR, data); 3073 nvswitch_clear_flags(&unhandled, bit); 3074 } 3075 3076 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _RSPREQIDMISMATCHERR, 1); 3077 if (nvswitch_test_flags(pending, bit)) 3078 { 3079 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_RSPREQIDMISMATCHERR, "egress destination response ID error", NV_FALSE); 3080 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_RSPREQIDMISMATCHERR, data); 3081 nvswitch_clear_flags(&unhandled, bit); 3082 } 3083 3084 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _URRSPERR, 1); 3085 if (nvswitch_test_flags(pending, bit)) 3086 { 3087 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_DROPNPURRSPERR, "egress non-posted UR error", NV_FALSE); 3088 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_DROPNPURRSPERR, data); 3089 nvswitch_clear_flags(&unhandled, bit); 3090 } 3091 3092 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _HWRSPERR, 1); 3093 if (nvswitch_test_flags(pending, bit)) 3094 { 3095 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_HWRSPERR, "egress non-posted HW error", NV_FALSE); 3096 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_HWRSPERR, data); 3097 nvswitch_clear_flags(&unhandled, bit); 3098 } 3099 3100 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_HDR_PARITY_ERR, 1); 3101 if (nvswitch_test_flags(pending, bit)) 3102 { 3103 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NXBAR_HDR_PARITY_ERR, "egress control parity error", NV_FALSE); 3104 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NXBAR_HDR_PARITY_ERR, data); 3105 nvswitch_clear_flags(&unhandled, bit); 3106 3107 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3108 NVSWITCH_ERR_HW_NPORT_EGRESS_NXBAR_HDR_PARITY_ERR, link, NV_FALSE, 0, 3109 NV_TRUE, 1); 3110 3111 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3112 } 3113 3114 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NCISOC_CREDIT_PARITY_ERR, 1); 3115 if (nvswitch_test_flags(pending, bit)) 3116 { 3117 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NCISOC_CREDIT_PARITY_ERR, "egress credit parity error", NV_FALSE); 3118 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NCISOC_CREDIT_PARITY_ERR, data); 3119 3120 credit_data.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT0); 3121 credit_data.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT1); 3122 credit_data.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT2); 3123 credit_data.data[3] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT3); 3124 credit_data.data[4] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT4); 3125 credit_data.data[5] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT5); 3126 credit_data.data[6] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT6); 3127 credit_data.data[7] = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _NCISOC_CREDIT7); 3128 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NCISOC_CREDIT_PARITY_ERR, credit_data); 3129 nvswitch_clear_flags(&unhandled, bit); 3130 3131 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3132 NVSWITCH_ERR_HW_NPORT_EGRESS_NCISOC_CREDIT_PARITY_ERR, link, NV_FALSE, 0, 3133 NV_TRUE, 1); 3134 3135 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3136 } 3137 3138 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_FLITTYPE_MISMATCH_ERR, 1); 3139 if (nvswitch_test_flags(pending, bit)) 3140 { 3141 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NXBAR_FLITTYPE_MISMATCH_ERR, "egress flit type mismatch", NV_FALSE); 3142 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NXBAR_FLITTYPE_MISMATCH_ERR, data); 3143 nvswitch_clear_flags(&unhandled, bit); 3144 } 3145 3146 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _CREDIT_TIME_OUT_ERR, 1); 3147 if (nvswitch_test_flags(pending, bit)) 3148 { 3149 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_CREDIT_TIME_OUT_ERR, "egress credit timeout", NV_FALSE); 3150 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_CREDIT_TIME_OUT_ERR, data); 3151 nvswitch_clear_flags(&unhandled, bit); 3152 } 3153 3154 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _NXBAR_SIDEBAND_PD_PARITY_ERR, 1); 3155 if (nvswitch_test_flags(pending, bit)) 3156 { 3157 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_NXBAR_SIDEBAND_PD_PARITY_ERR, "egress crossbar SB parity", NV_FALSE); 3158 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_NXBAR_SIDEBAND_PD_PARITY_ERR, data); 3159 nvswitch_clear_flags(&unhandled, bit); 3160 } 3161 3162 bit = DRF_NUM(_EGRESS, _ERR_STATUS_0, _INVALIDVCSET_ERR, 1); 3163 if (nvswitch_test_flags(pending, bit)) 3164 { 3165 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_INVALIDVCSET_ERR, "egress invalid VC set", NV_FALSE); 3166 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_INVALIDVCSET_ERR, data); 3167 nvswitch_clear_flags(&unhandled, bit); 3168 } 3169 3170 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 3171 3172 // Disable interrupts that have occurred after fatal error. 3173 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 3174 if (device->link[link].fatal_error_occurred) 3175 { 3176 if (nvswitch_is_soe_supported(device)) 3177 { 3178 nvswitch_soe_disable_nport_fatal_interrupts_ls10(device, link, 3179 report.raw_enable & ~pending, RM_SOE_CORE_NPORT_EGRESS_0_INTERRUPT); 3180 } 3181 else 3182 { 3183 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FATAL_REPORT_EN_0, 3184 report.raw_enable & ~pending); 3185 } 3186 } 3187 3188 if (report.raw_first & report.mask) 3189 { 3190 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FIRST_0, 3191 report.raw_first & report.mask); 3192 } 3193 3194 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_0, pending); 3195 3196 if (unhandled != 0) 3197 { 3198 status = -NVL_MORE_PROCESSING_REQUIRED; 3199 } 3200 3201 _nvswitch_service_egress_fatal_ls10_err_status_1: 3202 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1); 3203 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FATAL_REPORT_EN_1); 3204 report.mask = report.raw_enable & chip_device->intr_mask.egress[1].fatal; 3205 pending = report.raw_pending & report.mask; 3206 pending_1 = pending; 3207 3208 if ((pending_0 == 0) && (pending_1 == 0)) 3209 { 3210 return -NVL_NOT_FOUND; 3211 } 3212 3213 unhandled = pending; 3214 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_FIRST_1); 3215 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _EGRESS, _ERR_CONTAIN_EN_1); 3216 3217 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSPCTRLSTORE_ECC_DBE_ERR, 1); 3218 if (nvswitch_test_flags(pending, bit)) 3219 { 3220 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_DBE_ERR, "egress MC response ECC DBE error", NV_FALSE); 3221 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_DBE_ERR, data); 3222 nvswitch_clear_flags(&unhandled, bit); 3223 3224 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3225 NVSWITCH_ERR_HW_NPORT_EGRESS_MCRSPCTRLSTORE_ECC_DBE_ERR, link, NV_FALSE, 0, 3226 NV_TRUE, 1); 3227 3228 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3229 3230 // Clear associated LIMIT_ERR interrupt 3231 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSPCTRLSTORE_ECC_LIMIT_ERR, 1)) 3232 { 3233 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 3234 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSPCTRLSTORE_ECC_LIMIT_ERR, 1)); 3235 } 3236 } 3237 3238 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBCTRLSTORE_ECC_DBE_ERR, 1); 3239 if (nvswitch_test_flags(pending, bit)) 3240 { 3241 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_DBE_ERR, "egress reduction ECC DBE error", NV_FALSE); 3242 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_DBE_ERR, data); 3243 nvswitch_clear_flags(&unhandled, bit); 3244 3245 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3246 NVSWITCH_ERR_HW_NPORT_EGRESS_RBCTRLSTORE_ECC_DBE_ERR, link, NV_FALSE, 0, 3247 NV_TRUE, 1); 3248 3249 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3250 3251 // Clear associated LIMIT_ERR interrupt 3252 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBCTRLSTORE_ECC_LIMIT_ERR, 1)) 3253 { 3254 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 3255 DRF_NUM(_EGRESS, _ERR_STATUS_1, _RBCTRLSTORE_ECC_LIMIT_ERR, 1)); 3256 } 3257 } 3258 3259 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDSGT_ECC_DBE_ERR, 1); 3260 if (nvswitch_test_flags(pending, bit)) 3261 { 3262 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_MCREDSGT_ECC_DBE_ERR, "egress MC SG ECC DBE error", NV_FALSE); 3263 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_MCREDSGT_ECC_DBE_ERR, data); 3264 nvswitch_clear_flags(&unhandled, bit); 3265 3266 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3267 NVSWITCH_ERR_HW_NPORT_EGRESS_MCREDSGT_ECC_DBE_ERR, link, NV_FALSE, 0, 3268 NV_TRUE, 1); 3269 3270 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3271 3272 // Clear associated LIMIT_ERR interrupt 3273 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDSGT_ECC_LIMIT_ERR, 1)) 3274 { 3275 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 3276 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCREDSGT_ECC_LIMIT_ERR, 1)); 3277 } 3278 } 3279 3280 bit = DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_RAM_HDR_ECC_DBE_ERR, 1); 3281 if (nvswitch_test_flags(pending, bit)) 3282 { 3283 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_DBE_ERR, "egress MC ram ECC DBE error", NV_FALSE); 3284 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_DBE_ERR, data); 3285 nvswitch_clear_flags(&unhandled, bit); 3286 3287 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3288 NVSWITCH_ERR_HW_NPORT_EGRESS_MCRSP_RAM_HDR_ECC_DBE_ERR, link, NV_FALSE, 0, 3289 NV_TRUE, 1); 3290 3291 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3292 3293 // Clear associated LIMIT_ERR interrupt 3294 if (report.raw_pending & DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_RAM_HDR_ECC_LIMIT_ERR, 1)) 3295 { 3296 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 3297 DRF_NUM(_EGRESS, _ERR_STATUS_1, _MCRSP_RAM_HDR_ECC_LIMIT_ERR, 1)); 3298 } 3299 } 3300 3301 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 3302 3303 // Disable interrupts that have occurred after fatal error. 3304 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 3305 if (device->link[link].fatal_error_occurred) 3306 { 3307 if (nvswitch_is_soe_supported(device)) 3308 { 3309 nvswitch_soe_disable_nport_fatal_interrupts_ls10(device, link, 3310 report.raw_enable & ~pending, RM_SOE_CORE_NPORT_EGRESS_1_INTERRUPT); 3311 } 3312 else 3313 { 3314 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FATAL_REPORT_EN_1, 3315 report.raw_enable & ~pending); 3316 } 3317 } 3318 3319 if (report.raw_first & report.mask) 3320 { 3321 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_FIRST_1, 3322 report.raw_first & report.mask); 3323 } 3324 3325 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, pending); 3326 3327 // Clear all pending interrupts! 3328 NVSWITCH_ENG_WR32(device, NPORT, , link, _EGRESS, _ERR_STATUS_1, 0xFFFFFFFF); 3329 3330 if (unhandled != 0) 3331 { 3332 status = -NVL_MORE_PROCESSING_REQUIRED; 3333 } 3334 3335 return status; 3336 } 3337 3338 static NvlStatus 3339 _nvswitch_service_sourcetrack_nonfatal_ls10 3340 ( 3341 nvswitch_device *device, 3342 NvU32 link 3343 ) 3344 { 3345 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 3346 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 3347 NvU32 pending, bit, unhandled; 3348 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 3349 3350 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, 3351 _SOURCETRACK, _ERR_STATUS_0); 3352 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, 3353 _SOURCETRACK, _ERR_NON_FATAL_REPORT_EN_0); 3354 report.mask = report.raw_enable & chip_device->intr_mask.sourcetrack.nonfatal; 3355 3356 pending = report.raw_pending & report.mask; 3357 3358 if (pending == 0) 3359 { 3360 return -NVL_NOT_FOUND; 3361 } 3362 3363 unhandled = pending; 3364 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, _ERR_FIRST_0); 3365 3366 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, 1); 3367 if (nvswitch_test_flags(pending, bit)) 3368 { 3369 // Ignore LIMIT error if DBE is pending 3370 if (!(nvswitch_test_flags(report.raw_pending, 3371 DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _CREQ_TCEN0_CRUMBSTORE_ECC_DBE_ERR, 1)))) 3372 { 3373 NvBool bAddressValid = NV_FALSE; 3374 NvU32 address = 0; 3375 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3376 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 3377 3378 if (FLD_TEST_DRF(_SOURCETRACK_ERR_CREQ_TCEN0_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, 3379 _VALID, _VALID, addressValid)) 3380 { 3381 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3382 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS); 3383 bAddressValid = NV_TRUE; 3384 } 3385 3386 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3387 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_COUNTER); 3388 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3389 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS); 3390 report.data[2] = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3391 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 3392 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_SOURCETRACK_CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, 3393 "sourcetrack TCEN0 crumbstore ECC limit err"); 3394 3395 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3396 NVSWITCH_ERR_HW_NPORT_SOURCETRACK_CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, link, 3397 bAddressValid, address, NV_FALSE, 1); 3398 3399 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3400 } 3401 3402 nvswitch_clear_flags(&unhandled, bit); 3403 } 3404 3405 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 3406 3407 // 3408 // Disable interrupts that have occurred after fatal error. 3409 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 3410 // 3411 if (device->link[link].fatal_error_occurred) 3412 { 3413 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_NON_FATAL_REPORT_EN_0, 3414 report.raw_enable & ~pending); 3415 } 3416 3417 if (report.raw_first & report.mask) 3418 { 3419 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_FIRST_0, 3420 report.raw_first & report.mask); 3421 } 3422 3423 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_STATUS_0, pending); 3424 3425 if (unhandled != 0) 3426 { 3427 return -NVL_MORE_PROCESSING_REQUIRED; 3428 } 3429 3430 return NVL_SUCCESS; 3431 } 3432 3433 static NvlStatus 3434 _nvswitch_service_sourcetrack_fatal_ls10 3435 ( 3436 nvswitch_device *device, 3437 NvU32 link 3438 ) 3439 { 3440 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 3441 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 3442 NvU32 pending, bit, contain, unhandled; 3443 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 3444 3445 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, 3446 _SOURCETRACK, _ERR_STATUS_0); 3447 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, 3448 _SOURCETRACK, _ERR_FATAL_REPORT_EN_0); 3449 report.mask = report.raw_enable & chip_device->intr_mask.sourcetrack.fatal; 3450 pending = report.raw_pending & report.mask; 3451 3452 if (pending == 0) 3453 { 3454 return -NVL_NOT_FOUND; 3455 } 3456 3457 unhandled = pending; 3458 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, _ERR_FIRST_0); 3459 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, _ERR_CONTAIN_EN_0); 3460 3461 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _CREQ_TCEN0_CRUMBSTORE_ECC_DBE_ERR, 1); 3462 if (nvswitch_test_flags(pending, bit)) 3463 { 3464 NvBool bAddressValid = NV_FALSE; 3465 NvU32 address = 0; 3466 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3467 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 3468 3469 if (FLD_TEST_DRF(_SOURCETRACK_ERR_CREQ_TCEN0_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, 3470 _VALID, _VALID, addressValid)) 3471 { 3472 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3473 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS); 3474 bAddressValid = NV_TRUE; 3475 } 3476 3477 report.data[0] = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3478 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS); 3479 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _SOURCETRACK, 3480 _ERR_CREQ_TCEN0_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 3481 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_SOURCETRACK_CREQ_TCEN0_CRUMBSTORE_ECC_DBE_ERR, 3482 "sourcetrack TCEN0 crumbstore DBE", NV_FALSE); 3483 nvswitch_clear_flags(&unhandled, bit); 3484 3485 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3486 NVSWITCH_ERR_HW_NPORT_SOURCETRACK_CREQ_TCEN0_CRUMBSTORE_ECC_DBE_ERR, 3487 link, bAddressValid, address, NV_TRUE, 1); 3488 3489 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3490 3491 // Clear associated LIMIT_ERR interrupt 3492 if (report.raw_pending & DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, 1)) 3493 { 3494 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_STATUS_0, 3495 DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _CREQ_TCEN0_CRUMBSTORE_ECC_LIMIT_ERR, 1)); 3496 } 3497 } 3498 3499 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _DUP_CREQ_TCEN0_TAG_ERR, 1); 3500 if (nvswitch_test_flags(pending, bit)) 3501 { 3502 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_SOURCETRACK_DUP_CREQ_TCEN0_TAG_ERR, 3503 "sourcetrack duplicate CREQ", NV_FALSE); 3504 nvswitch_clear_flags(&unhandled, bit); 3505 } 3506 3507 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _INVALID_TCEN0_RSP_ERR, 1); 3508 if (nvswitch_test_flags(pending, bit)) 3509 { 3510 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_SOURCETRACK_INVALID_TCEN0_RSP_ERR, 3511 "sourcetrack invalid TCEN0 CREQ", NV_FALSE); 3512 nvswitch_clear_flags(&unhandled, bit); 3513 } 3514 3515 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _INVALID_TCEN1_RSP_ERR, 1); 3516 if (nvswitch_test_flags(pending, bit)) 3517 { 3518 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_SOURCETRACK_INVALID_TCEN1_RSP_ERR, 3519 "sourcetrack invalid TCEN1 CREQ", NV_FALSE); 3520 nvswitch_clear_flags(&unhandled, bit); 3521 } 3522 3523 bit = DRF_NUM(_SOURCETRACK, _ERR_STATUS_0, _SOURCETRACK_TIME_OUT_ERR, 1); 3524 if (nvswitch_test_flags(pending, bit)) 3525 { 3526 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_SOURCETRACK_SOURCETRACK_TIME_OUT_ERR, 3527 "sourcetrack timeout error", NV_FALSE); 3528 nvswitch_clear_flags(&unhandled, bit); 3529 } 3530 3531 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 3532 3533 // 3534 // Disable interrupts that have occurred after fatal error. 3535 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 3536 // 3537 if (device->link[link].fatal_error_occurred) 3538 { 3539 if (nvswitch_is_soe_supported(device)) 3540 { 3541 nvswitch_soe_disable_nport_fatal_interrupts_ls10(device, link, 3542 report.raw_enable & ~pending, RM_SOE_CORE_NPORT_SOURCETRACK_INTERRUPT); 3543 } 3544 else 3545 { 3546 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_FATAL_REPORT_EN_0, 3547 report.raw_enable & ~pending); 3548 } 3549 } 3550 3551 if (report.raw_first & report.mask) 3552 { 3553 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_FIRST_0, 3554 report.raw_first & report.mask); 3555 } 3556 3557 NVSWITCH_ENG_WR32(device, NPORT, , link, _SOURCETRACK, _ERR_STATUS_0, pending); 3558 3559 if (unhandled != 0) 3560 { 3561 return -NVL_MORE_PROCESSING_REQUIRED; 3562 } 3563 3564 return NVL_SUCCESS; 3565 3566 } 3567 3568 // 3569 // Multicast Tstate 3570 // 3571 3572 static NvlStatus 3573 _nvswitch_service_multicast_nonfatal_ls10 3574 ( 3575 nvswitch_device *device, 3576 NvU32 link 3577 ) 3578 { 3579 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 3580 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 3581 NvU32 pending, bit, unhandled; 3582 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 3583 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 3584 3585 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0); 3586 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_NON_FATAL_REPORT_EN_0); 3587 report.mask = report.raw_enable & chip_device->intr_mask.mc_tstate.nonfatal; 3588 pending = report.raw_pending & report.mask; 3589 3590 if (pending == 0) 3591 { 3592 return -NVL_NOT_FOUND; 3593 } 3594 3595 unhandled = pending; 3596 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FIRST_0); 3597 _nvswitch_collect_error_info_ls10(device, link, 3598 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_MC_TIME, 3599 &data); 3600 3601 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1); 3602 if (nvswitch_test_flags(pending, bit)) 3603 { 3604 // Ignore LIMIT error if DBE is pending 3605 if(!(nvswitch_test_flags(report.raw_pending, 3606 DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1)))) 3607 { 3608 NvBool bAddressValid = NV_FALSE; 3609 NvU32 address = 0; 3610 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, 3611 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID); 3612 3613 if (FLD_TEST_DRF(_MULTICASTTSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 3614 addressValid)) 3615 { 3616 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, 3617 _ERR_TAGPOOL_ECC_ERROR_ADDRESS); 3618 bAddressValid = NV_TRUE; 3619 } 3620 3621 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER); 3622 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, 3623 DRF_DEF(_MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 3624 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_LIMIT_ERR, "MC TS tag store single-bit threshold"); 3625 NVSWITCH_REPORT_DATA(_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_LIMIT_ERR, data); 3626 3627 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3628 NVSWITCH_ERR_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_LIMIT_ERR, link, 3629 bAddressValid, address, NV_FALSE, 1); 3630 3631 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3632 } 3633 3634 nvswitch_clear_flags(&unhandled, bit); 3635 } 3636 3637 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1); 3638 if (nvswitch_test_flags(pending, bit)) 3639 { 3640 // Ignore LIMIT error if DBE is pending 3641 if(!(nvswitch_test_flags(report.raw_pending, 3642 DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1)))) 3643 { 3644 NvBool bAddressValid = NV_FALSE; 3645 NvU32 address = 0; 3646 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, 3647 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 3648 3649 if (FLD_TEST_DRF(_MULTICASTTSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 3650 addressValid)) 3651 { 3652 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, 3653 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS); 3654 bAddressValid = NV_TRUE; 3655 } 3656 3657 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER); 3658 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, 3659 DRF_DEF(_MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 3660 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, "MC TS crumbstore single-bit threshold"); 3661 NVSWITCH_REPORT_DATA(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, data); 3662 3663 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3664 NVSWITCH_ERR_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, link, 3665 bAddressValid, address, NV_FALSE, 1); 3666 3667 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3668 } 3669 3670 nvswitch_clear_flags(&unhandled, bit); 3671 } 3672 3673 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_MCTO_ERR, 1); 3674 if (nvswitch_test_flags(pending, bit)) 3675 { 3676 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_MCTO_ERR, "MC TS crumbstore MCTO"); 3677 _nvswitch_collect_error_info_ls10(device, link, 3678 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME | 3679 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC | 3680 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR, 3681 &data); 3682 NVSWITCH_REPORT_DATA(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_MCTO_ERR, data); 3683 3684 nvswitch_clear_flags(&unhandled, bit); 3685 } 3686 3687 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 3688 3689 // Disable interrupts that have occurred after fatal error. 3690 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 3691 if (device->link[link].fatal_error_occurred) 3692 { 3693 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_NON_FATAL_REPORT_EN_0, 3694 report.raw_enable & ~pending); 3695 } 3696 3697 if (report.raw_first & report.mask) 3698 { 3699 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FIRST_0, 3700 report.raw_first & report.mask); 3701 } 3702 3703 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0, pending); 3704 3705 if (unhandled != 0) 3706 { 3707 return -NVL_MORE_PROCESSING_REQUIRED; 3708 } 3709 3710 return NVL_SUCCESS; 3711 } 3712 3713 static NvlStatus 3714 _nvswitch_service_multicast_fatal_ls10 3715 ( 3716 nvswitch_device *device, 3717 NvU32 link 3718 ) 3719 { 3720 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 3721 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 3722 NvU32 pending, bit, contain, unhandled; 3723 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 3724 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 3725 3726 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0); 3727 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FATAL_REPORT_EN_0); 3728 report.mask = report.raw_enable & chip_device->intr_mask.mc_tstate.fatal; 3729 pending = report.raw_pending & report.mask; 3730 3731 if (pending == 0) 3732 { 3733 return -NVL_NOT_FOUND; 3734 } 3735 3736 unhandled = pending; 3737 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FIRST_0); 3738 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_CONTAIN_EN_0); 3739 _nvswitch_collect_error_info_ls10(device, link, 3740 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_MC_TIME, 3741 &data); 3742 3743 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1); 3744 if (nvswitch_test_flags(pending, bit)) 3745 { 3746 NvBool bAddressValid = NV_FALSE; 3747 NvU32 address = 0; 3748 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, 3749 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID); 3750 3751 if (FLD_TEST_DRF(_MULTICASTTSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 3752 addressValid)) 3753 { 3754 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, 3755 _ERR_TAGPOOL_ECC_ERROR_ADDRESS); 3756 bAddressValid = NV_TRUE; 3757 } 3758 3759 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER); 3760 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, 3761 DRF_DEF(_MULTICASTTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 3762 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_DBE_ERR, "MC TS tag store fatal ECC", NV_FALSE); 3763 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_DBE_ERR, data); 3764 nvswitch_clear_flags(&unhandled, bit); 3765 3766 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3767 NVSWITCH_ERR_HW_NPORT_MULTICASTTSTATE_TAGPOOL_ECC_DBE_ERR, link, bAddressValid, 3768 address, NV_TRUE, 1); 3769 3770 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3771 3772 // Clear associated LIMIT_ERR interrupt 3773 if (report.raw_pending & DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1)) 3774 { 3775 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0, 3776 DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1)); 3777 } 3778 } 3779 3780 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1); 3781 if (nvswitch_test_flags(pending, bit)) 3782 { 3783 NvBool bAddressValid = NV_FALSE; 3784 NvU32 address = 0; 3785 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, 3786 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 3787 3788 if (FLD_TEST_DRF(_MULTICASTTSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 3789 addressValid)) 3790 { 3791 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, 3792 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS); 3793 bAddressValid = NV_TRUE; 3794 } 3795 3796 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER); 3797 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, 3798 DRF_DEF(_MULTICASTTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 3799 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_DBE_ERR, "MC TS crumbstore fatal ECC", NV_FALSE); 3800 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_DBE_ERR, data); 3801 nvswitch_clear_flags(&unhandled, bit); 3802 3803 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3804 NVSWITCH_ERR_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_ECC_DBE_ERR, link, bAddressValid, 3805 address, NV_TRUE, 1); 3806 3807 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3808 3809 // Clear associated LIMIT_ERR interrupt 3810 if (report.raw_pending & DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1)) 3811 { 3812 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0, 3813 DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1)); 3814 } 3815 } 3816 3817 bit = DRF_NUM(_MULTICASTTSTATE, _ERR_STATUS_0, _CRUMBSTORE_BUF_OVERWRITE_ERR, 1); 3818 if (nvswitch_test_flags(pending, bit)) 3819 { 3820 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_BUF_OVERWRITE_ERR, "MC crumbstore overwrite", NV_FALSE); 3821 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_MULTICASTTSTATE_CRUMBSTORE_BUF_OVERWRITE_ERR, data); 3822 nvswitch_clear_flags(&unhandled, bit); 3823 } 3824 3825 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 3826 3827 // Disable interrupts that have occurred after fatal error. 3828 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 3829 if (device->link[link].fatal_error_occurred) 3830 { 3831 if (nvswitch_is_soe_supported(device)) 3832 { 3833 nvswitch_soe_disable_nport_fatal_interrupts_ls10(device, link, 3834 report.raw_enable & ~pending, RM_SOE_CORE_NPORT_MULTICAST_INTERRUPT); 3835 } 3836 else 3837 { 3838 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FATAL_REPORT_EN_0, 3839 report.raw_enable & ~pending); 3840 } 3841 } 3842 3843 if (report.raw_first & report.mask) 3844 { 3845 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_FIRST_0, 3846 report.raw_first & report.mask); 3847 } 3848 3849 NVSWITCH_ENG_WR32(device, NPORT, , link, _MULTICASTTSTATE, _ERR_STATUS_0, pending); 3850 3851 if (unhandled != 0) 3852 { 3853 return -NVL_MORE_PROCESSING_REQUIRED; 3854 } 3855 3856 return NVL_SUCCESS; 3857 } 3858 3859 // 3860 // Reduction Tstate 3861 // 3862 3863 static NvlStatus 3864 _nvswitch_service_reduction_nonfatal_ls10 3865 ( 3866 nvswitch_device *device, 3867 NvU32 link 3868 ) 3869 { 3870 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 3871 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 3872 NvU32 pending, bit, unhandled; 3873 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 3874 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 3875 3876 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0); 3877 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_NON_FATAL_REPORT_EN_0); 3878 report.mask = report.raw_enable & chip_device->intr_mask.mc_tstate.nonfatal; 3879 pending = report.raw_pending & report.mask; 3880 3881 if (pending == 0) 3882 { 3883 return -NVL_NOT_FOUND; 3884 } 3885 3886 unhandled = pending; 3887 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FIRST_0); 3888 _nvswitch_collect_error_info_ls10(device, link, 3889 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_RED_TIME, 3890 &data); 3891 3892 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1); 3893 if (nvswitch_test_flags(pending, bit)) 3894 { 3895 // Ignore LIMIT error if DBE is pending 3896 if(!(nvswitch_test_flags(report.raw_pending, 3897 DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1)))) 3898 { 3899 NvBool bAddressValid = NV_FALSE; 3900 NvU32 address = 0; 3901 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, 3902 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID); 3903 3904 if (FLD_TEST_DRF(_REDUCTIONTSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 3905 addressValid)) 3906 { 3907 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, 3908 _ERR_TAGPOOL_ECC_ERROR_ADDRESS); 3909 bAddressValid = NV_TRUE; 3910 } 3911 3912 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER); 3913 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, 3914 DRF_DEF(_REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 3915 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_LIMIT_ERR, "Red TS tag store single-bit threshold"); 3916 NVSWITCH_REPORT_DATA(_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_LIMIT_ERR, data); 3917 3918 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3919 NVSWITCH_ERR_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_LIMIT_ERR, link, 3920 bAddressValid, address, NV_FALSE, 1); 3921 3922 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3923 } 3924 3925 nvswitch_clear_flags(&unhandled, bit); 3926 } 3927 3928 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1); 3929 if (nvswitch_test_flags(pending, bit)) 3930 { 3931 // Ignore LIMIT error if DBE is pending 3932 if(!(nvswitch_test_flags(report.raw_pending, 3933 DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1)))) 3934 { 3935 NvBool bAddressValid = NV_FALSE; 3936 NvU32 address = 0; 3937 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, 3938 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 3939 3940 if (FLD_TEST_DRF(_REDUCTIONTSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 3941 addressValid)) 3942 { 3943 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, 3944 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS); 3945 bAddressValid = NV_TRUE; 3946 } 3947 3948 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER); 3949 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, 3950 DRF_DEF(_REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 3951 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, "Red TS crumbstore single-bit threshold"); 3952 NVSWITCH_REPORT_DATA(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, data); 3953 3954 _nvswitch_construct_ecc_error_event_ls10(&err_event, 3955 NVSWITCH_ERR_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_LIMIT_ERR, link, 3956 bAddressValid, address, NV_FALSE, 1); 3957 3958 nvswitch_inforom_ecc_log_err_event(device, &err_event); 3959 } 3960 3961 nvswitch_clear_flags(&unhandled, bit); 3962 } 3963 3964 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_RTO_ERR, 1); 3965 if (nvswitch_test_flags(pending, bit)) 3966 { 3967 NVSWITCH_REPORT_NONFATAL(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_RTO_ERR, "Red TS crumbstore RTO"); 3968 NVSWITCH_REPORT_DATA(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_RTO_ERR, data); 3969 3970 nvswitch_clear_flags(&unhandled, bit); 3971 } 3972 3973 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 3974 3975 // Disable interrupts that have occurred after fatal error. 3976 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 3977 if (device->link[link].fatal_error_occurred) 3978 { 3979 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_NON_FATAL_REPORT_EN_0, 3980 report.raw_enable & ~pending); 3981 } 3982 3983 if (report.raw_first & report.mask) 3984 { 3985 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FIRST_0, 3986 report.raw_first & report.mask); 3987 } 3988 3989 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0, pending); 3990 3991 if (unhandled != 0) 3992 { 3993 return -NVL_MORE_PROCESSING_REQUIRED; 3994 } 3995 3996 return NVL_SUCCESS; 3997 } 3998 3999 static NvlStatus 4000 _nvswitch_service_reduction_fatal_ls10 4001 ( 4002 nvswitch_device *device, 4003 NvU32 link 4004 ) 4005 { 4006 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 4007 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 4008 NvU32 pending, bit, contain, unhandled; 4009 NVSWITCH_RAW_ERROR_LOG_TYPE data = {0, { 0 }}; 4010 INFOROM_NVS_ECC_ERROR_EVENT err_event = {0}; 4011 4012 report.raw_pending = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0); 4013 report.raw_enable = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FATAL_REPORT_EN_0); 4014 report.mask = report.raw_enable & chip_device->intr_mask.mc_tstate.fatal; 4015 pending = report.raw_pending & report.mask; 4016 4017 if (pending == 0) 4018 { 4019 return -NVL_NOT_FOUND; 4020 } 4021 4022 unhandled = pending; 4023 report.raw_first = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FIRST_0); 4024 contain = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_CONTAIN_EN_0); 4025 _nvswitch_collect_error_info_ls10(device, link, 4026 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_RED_TIME, 4027 &data); 4028 4029 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_DBE_ERR, 1); 4030 if (nvswitch_test_flags(pending, bit)) 4031 { 4032 NvBool bAddressValid = NV_FALSE; 4033 NvU32 address = 0; 4034 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, 4035 _ERR_TAGPOOL_ECC_ERROR_ADDRESS_VALID); 4036 4037 if (FLD_TEST_DRF(_REDUCTIONTSTATE_ERR_TAGPOOL, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 4038 addressValid)) 4039 { 4040 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, 4041 _ERR_TAGPOOL_ECC_ERROR_ADDRESS); 4042 bAddressValid = NV_TRUE; 4043 } 4044 4045 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER); 4046 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, 4047 DRF_DEF(_REDUCTIONTSTATE, _ERR_TAGPOOL_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 4048 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_DBE_ERR, "Red TS tag store fatal ECC", NV_FALSE); 4049 _nvswitch_collect_error_info_ls10(device, link, 4050 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_TIME | 4051 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_MISC | 4052 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_EGRESS_HDR, 4053 &data); 4054 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_DBE_ERR, data); 4055 nvswitch_clear_flags(&unhandled, bit); 4056 4057 _nvswitch_construct_ecc_error_event_ls10(&err_event, 4058 NVSWITCH_ERR_HW_NPORT_REDUCTIONTSTATE_TAGPOOL_ECC_DBE_ERR, link, bAddressValid, 4059 address, NV_TRUE, 1); 4060 4061 nvswitch_inforom_ecc_log_err_event(device, &err_event); 4062 4063 // Clear associated LIMIT_ERR interrupt 4064 if (report.raw_pending & DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1)) 4065 { 4066 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0, 4067 DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _TAGPOOL_ECC_LIMIT_ERR, 1)); 4068 } 4069 } 4070 4071 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_DBE_ERR, 1); 4072 if (nvswitch_test_flags(pending, bit)) 4073 { 4074 NvBool bAddressValid = NV_FALSE; 4075 NvU32 address = 0; 4076 NvU32 addressValid = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, 4077 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS_VALID); 4078 4079 if (FLD_TEST_DRF(_REDUCTIONTSTATE_ERR_CRUMBSTORE, _ECC_ERROR_ADDRESS_VALID, _VALID, _VALID, 4080 addressValid)) 4081 { 4082 address = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, 4083 _ERR_CRUMBSTORE_ECC_ERROR_ADDRESS); 4084 bAddressValid = NV_TRUE; 4085 } 4086 4087 report.data[1] = NVSWITCH_ENG_RD32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER); 4088 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, 4089 DRF_DEF(_REDUCTIONTSTATE, _ERR_CRUMBSTORE_ECC_ERROR_COUNTER, _ERROR_COUNT, _INIT)); 4090 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_DBE_ERR, "Red TS crumbstore fatal ECC", NV_FALSE); 4091 _nvswitch_collect_error_info_ls10(device, link, 4092 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME | 4093 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC | 4094 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR, 4095 &data); 4096 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_DBE_ERR, data); 4097 nvswitch_clear_flags(&unhandled, bit); 4098 4099 _nvswitch_construct_ecc_error_event_ls10(&err_event, 4100 NVSWITCH_ERR_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_ECC_DBE_ERR, link, bAddressValid, 4101 address, NV_TRUE, 1); 4102 4103 nvswitch_inforom_ecc_log_err_event(device, &err_event); 4104 4105 // Clear associated LIMIT_ERR interrupt 4106 if (report.raw_pending & DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1)) 4107 { 4108 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0, 4109 DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_ECC_LIMIT_ERR, 1)); 4110 } 4111 } 4112 4113 bit = DRF_NUM(_REDUCTIONTSTATE, _ERR_STATUS_0, _CRUMBSTORE_BUF_OVERWRITE_ERR, 1); 4114 if (nvswitch_test_flags(pending, bit)) 4115 { 4116 NVSWITCH_REPORT_CONTAIN(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_BUF_OVERWRITE_ERR, "Red crumbstore overwrite", NV_FALSE); 4117 _nvswitch_collect_error_info_ls10(device, link, 4118 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_TIME | 4119 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_MISC | 4120 NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR, 4121 &data); 4122 NVSWITCH_REPORT_CONTAIN_DATA(_HW_NPORT_REDUCTIONTSTATE_CRUMBSTORE_BUF_OVERWRITE_ERR, data); 4123 nvswitch_clear_flags(&unhandled, bit); 4124 } 4125 4126 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 4127 4128 // Disable interrupts that have occurred after fatal error. 4129 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 4130 if (device->link[link].fatal_error_occurred) 4131 { 4132 if (nvswitch_is_soe_supported(device)) 4133 { 4134 nvswitch_soe_disable_nport_fatal_interrupts_ls10(device, link, 4135 report.raw_enable & ~pending, RM_SOE_CORE_NPORT_REDUCTION_INTERRUPT); 4136 } 4137 else 4138 { 4139 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FATAL_REPORT_EN_0, 4140 report.raw_enable & ~pending); 4141 } 4142 } 4143 4144 if (report.raw_first & report.mask) 4145 { 4146 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_FIRST_0, 4147 report.raw_first & report.mask); 4148 } 4149 4150 NVSWITCH_ENG_WR32(device, NPORT, , link, _REDUCTIONTSTATE, _ERR_STATUS_0, pending); 4151 4152 if (unhandled != 0) 4153 { 4154 return -NVL_MORE_PROCESSING_REQUIRED; 4155 } 4156 4157 return NVL_SUCCESS; 4158 } 4159 4160 static NvlStatus 4161 _nvswitch_service_nport_fatal_ls10 4162 ( 4163 nvswitch_device *device, 4164 NvU32 link 4165 ) 4166 { 4167 NvlStatus status[7]; 4168 4169 status[0] = _nvswitch_service_route_fatal_ls10(device, link); 4170 status[1] = _nvswitch_service_ingress_fatal_ls10(device, link); 4171 status[2] = _nvswitch_service_egress_fatal_ls10(device, link); 4172 status[3] = _nvswitch_service_tstate_fatal_ls10(device, link); 4173 status[4] = _nvswitch_service_sourcetrack_fatal_ls10(device, link); 4174 status[5] = _nvswitch_service_multicast_fatal_ls10(device, link); 4175 status[6] = _nvswitch_service_reduction_fatal_ls10(device, link); 4176 4177 if ((status[0] != NVL_SUCCESS) && 4178 (status[1] != NVL_SUCCESS) && 4179 (status[2] != NVL_SUCCESS) && 4180 (status[3] != NVL_SUCCESS) && 4181 (status[4] != NVL_SUCCESS) && 4182 (status[5] != NVL_SUCCESS) && 4183 (status[6] != NVL_SUCCESS)) 4184 { 4185 return -NVL_MORE_PROCESSING_REQUIRED; 4186 } 4187 4188 return NVL_SUCCESS; 4189 } 4190 4191 static NvlStatus 4192 _nvswitch_service_npg_fatal_ls10 4193 ( 4194 nvswitch_device *device, 4195 NvU32 npg 4196 ) 4197 { 4198 NvU32 pending, mask, bit, unhandled; 4199 NvU32 nport; 4200 NvU32 link; 4201 4202 pending = NVSWITCH_ENG_RD32(device, NPG, , npg, _NPG, _NPG_INTERRUPT_STATUS); 4203 4204 if (pending == 0) 4205 { 4206 return -NVL_NOT_FOUND; 4207 } 4208 4209 mask = 4210 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV0_INT_STATUS, _FATAL) | 4211 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV1_INT_STATUS, _FATAL) | 4212 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV2_INT_STATUS, _FATAL) | 4213 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV3_INT_STATUS, _FATAL); 4214 pending &= mask; 4215 unhandled = pending; 4216 4217 for (nport = 0; nport < NVSWITCH_NPORT_PER_NPG_LS10; nport++) 4218 { 4219 switch (nport) 4220 { 4221 case 0: 4222 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV0_INT_STATUS, _FATAL); 4223 break; 4224 case 1: 4225 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV1_INT_STATUS, _FATAL); 4226 break; 4227 case 2: 4228 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV2_INT_STATUS, _FATAL); 4229 break; 4230 case 3: 4231 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV3_INT_STATUS, _FATAL); 4232 break; 4233 } 4234 if (nvswitch_test_flags(pending, bit)) 4235 { 4236 link = NPORT_TO_LINK_LS10(device, npg, nport); 4237 if (NVSWITCH_ENG_IS_VALID(device, NPORT, link)) 4238 { 4239 if (_nvswitch_service_nport_fatal_ls10(device, link) == NVL_SUCCESS) 4240 { 4241 nvswitch_clear_flags(&unhandled, bit); 4242 } 4243 } 4244 } 4245 } 4246 4247 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 4248 4249 if (unhandled != 0) 4250 { 4251 return -NVL_MORE_PROCESSING_REQUIRED; 4252 } 4253 4254 return NVL_SUCCESS; 4255 } 4256 4257 static NvlStatus 4258 _nvswitch_service_nport_nonfatal_ls10 4259 ( 4260 nvswitch_device *device, 4261 NvU32 link 4262 ) 4263 { 4264 NvlStatus status[7]; 4265 4266 status[0] = _nvswitch_service_route_nonfatal_ls10(device, link); 4267 status[1] = _nvswitch_service_ingress_nonfatal_ls10(device, link); 4268 status[2] = _nvswitch_service_egress_nonfatal_ls10(device, link); 4269 status[3] = _nvswitch_service_tstate_nonfatal_ls10(device, link); 4270 status[4] = _nvswitch_service_sourcetrack_nonfatal_ls10(device, link); 4271 status[5] = _nvswitch_service_multicast_nonfatal_ls10(device, link); 4272 status[6] = _nvswitch_service_reduction_nonfatal_ls10(device, link); 4273 4274 if ((status[0] != NVL_SUCCESS) && 4275 (status[1] != NVL_SUCCESS) && 4276 (status[2] != NVL_SUCCESS) && 4277 (status[3] != NVL_SUCCESS) && 4278 (status[4] != NVL_SUCCESS) && 4279 (status[5] != NVL_SUCCESS) && 4280 (status[6] != NVL_SUCCESS)) 4281 { 4282 return -NVL_MORE_PROCESSING_REQUIRED; 4283 } 4284 4285 return NVL_SUCCESS; 4286 } 4287 4288 static NvlStatus 4289 _nvswitch_service_npg_nonfatal_ls10 4290 ( 4291 nvswitch_device *device, 4292 NvU32 npg 4293 ) 4294 { 4295 NvU32 pending, mask, bit, unhandled; 4296 NvU32 nport; 4297 NvU32 link; 4298 4299 pending = NVSWITCH_ENG_RD32(device, NPG, , npg, _NPG, _NPG_INTERRUPT_STATUS); 4300 4301 if (pending == 0) 4302 { 4303 return -NVL_NOT_FOUND; 4304 } 4305 4306 mask = 4307 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV0_INT_STATUS, _NONFATAL) | 4308 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV1_INT_STATUS, _NONFATAL) | 4309 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV2_INT_STATUS, _NONFATAL) | 4310 DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV3_INT_STATUS, _NONFATAL); 4311 pending &= mask; 4312 unhandled = pending; 4313 4314 for (nport = 0; nport < NVSWITCH_NPORT_PER_NPG_LS10; nport++) 4315 { 4316 switch (nport) 4317 { 4318 case 0: 4319 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV0_INT_STATUS, _NONFATAL); 4320 break; 4321 case 1: 4322 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV1_INT_STATUS, _NONFATAL); 4323 break; 4324 case 2: 4325 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV2_INT_STATUS, _NONFATAL); 4326 break; 4327 case 3: 4328 bit = DRF_DEF(_NPG, _NPG_INTERRUPT_STATUS, _DEV3_INT_STATUS, _NONFATAL); 4329 break; 4330 } 4331 if (nvswitch_test_flags(pending, bit)) 4332 { 4333 link = NPORT_TO_LINK_LS10(device, npg, nport); 4334 if (NVSWITCH_ENG_IS_VALID(device, NPORT, link)) 4335 { 4336 if (_nvswitch_service_nport_nonfatal_ls10(device, link) == NVL_SUCCESS) 4337 { 4338 nvswitch_clear_flags(&unhandled, bit); 4339 } 4340 } 4341 } 4342 } 4343 4344 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 4345 4346 if (unhandled != 0) 4347 { 4348 return -NVL_MORE_PROCESSING_REQUIRED; 4349 } 4350 4351 return NVL_SUCCESS; 4352 } 4353 4354 static NvlStatus 4355 _nvswitch_service_nvldl_fatal_ls10 4356 ( 4357 nvswitch_device *device, 4358 NvU32 nvlipt_instance, 4359 NvU64 intrLinkMask 4360 ) 4361 { 4362 NvU64 enabledLinkMask, localLinkMask, localIntrLinkMask, runtimeErrorMask = 0; 4363 NvU32 i; 4364 nvlink_link *link; 4365 NvU32 clocksMask = NVSWITCH_PER_LINK_CLOCK_SET(RXCLK) | NVSWITCH_PER_LINK_CLOCK_SET(TXCLK); 4366 NvlStatus status = -NVL_MORE_PROCESSING_REQUIRED; 4367 NVSWITCH_LINK_TRAINING_ERROR_INFO linkTrainingErrorInfo = { 0 }; 4368 NVSWITCH_LINK_RUNTIME_ERROR_INFO linkRuntimeErrorInfo = { 0 }; 4369 4370 // 4371 // The passed in interruptLinkMask should contain a link that is part of the 4372 // given nvlipt instance 4373 // 4374 enabledLinkMask = nvswitch_get_enabled_link_mask(device); 4375 localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(nvlipt_instance); 4376 localIntrLinkMask = localLinkMask & intrLinkMask & enabledLinkMask; 4377 4378 if (localIntrLinkMask == 0) 4379 { 4380 NVSWITCH_PRINT(device, ERROR, "%s: Bad link mask provided for link interrupt servicing!\n", __FUNCTION__); 4381 NVSWITCH_ASSERT(0); 4382 return -NVL_BAD_ARGS; 4383 } 4384 4385 FOR_EACH_INDEX_IN_MASK(64, i, localIntrLinkMask) 4386 { 4387 link = nvswitch_get_link(device, i); 4388 if (link == NULL) 4389 { 4390 // An interrupt on an invalid link should never occur 4391 NVSWITCH_ASSERT(link != NULL); 4392 continue; 4393 } 4394 4395 if (NVSWITCH_GET_LINK_ENG_INST(device, i, NVLIPT) != nvlipt_instance) 4396 { 4397 NVSWITCH_ASSERT(0); 4398 break; 4399 } 4400 4401 if (nvswitch_is_link_in_reset(device, link) || 4402 !nvswitch_are_link_clocks_on_ls10(device, link, clocksMask)) 4403 { 4404 continue; 4405 } 4406 4407 if (device->hal.nvswitch_service_nvldl_fatal_link(device, nvlipt_instance, i) == NVL_SUCCESS) 4408 { 4409 runtimeErrorMask |= NVBIT64(i); 4410 status = NVL_SUCCESS; 4411 } 4412 } 4413 FOR_EACH_INDEX_IN_MASK_END; 4414 4415 linkTrainingErrorInfo.isValid = NV_FALSE; 4416 linkRuntimeErrorInfo.isValid = NV_TRUE; 4417 linkRuntimeErrorInfo.mask0 = runtimeErrorMask; 4418 4419 // Check runtimeErrorMask is non-zero before consuming it further. 4420 if ((runtimeErrorMask != 0) && 4421 (nvswitch_smbpbi_set_link_error_info(device, 4422 &linkTrainingErrorInfo, &linkRuntimeErrorInfo) != NVL_SUCCESS)) 4423 { 4424 NVSWITCH_PRINT(device, ERROR, 4425 "%s: NVLDL[0x%x, 0x%llx]: Unable to send Runtime Error bitmask: 0x%llx,\n", 4426 __FUNCTION__, 4427 nvlipt_instance, localIntrLinkMask, 4428 runtimeErrorMask); 4429 } 4430 4431 return status; 4432 } 4433 4434 static NvlStatus 4435 _nvswitch_service_nvltlc_tx_sys_fatal_ls10 4436 ( 4437 nvswitch_device *device, 4438 NvU32 nvlipt_instance, 4439 NvU32 link 4440 ) 4441 { 4442 NvU32 pending, bit, unhandled; 4443 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 4444 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 }; 4445 4446 report.raw_pending = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_STATUS_0); 4447 report.raw_enable = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_FATAL_REPORT_EN_0); 4448 report.mask = report.raw_enable; 4449 pending = report.raw_pending & report.mask; 4450 4451 error_event.nvliptInstance = (NvU8) nvlipt_instance; 4452 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link); 4453 4454 if (pending == 0) 4455 { 4456 return -NVL_NOT_FOUND; 4457 } 4458 4459 unhandled = pending; 4460 report.raw_first = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_FIRST_0); 4461 4462 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _NCISOC_PARITY_ERR, 1); 4463 if (nvswitch_test_flags(pending, bit)) 4464 { 4465 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_NCISOC_PARITY_ERR, "NCISOC Parity Error", NV_FALSE); 4466 nvswitch_clear_flags(&unhandled, bit); 4467 { 4468 error_event.error = INFOROM_NVLINK_TLC_TX_NCISOC_PARITY_ERR_FATAL; 4469 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4470 } 4471 } 4472 4473 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _NCISOC_HDR_ECC_DBE_ERR, 1); 4474 if (nvswitch_test_flags(pending, bit)) 4475 { 4476 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_NCISOC_HDR_ECC_DBE_ERR, "NCISOC HDR ECC DBE Error", NV_FALSE); 4477 nvswitch_clear_flags(&unhandled, bit); 4478 { 4479 error_event.error = INFOROM_NVLINK_TLC_TX_NCISOC_HDR_ECC_DBE_FATAL; 4480 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4481 } 4482 } 4483 4484 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _NCISOC_DAT_ECC_DBE_ERR, 1); 4485 if (nvswitch_test_flags(pending, bit)) 4486 { 4487 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_NCISOC_DAT_ECC_DBE_ERR, "NCISOC DAT ECC DBE Error", NV_FALSE); 4488 nvswitch_clear_flags(&unhandled, bit); 4489 } 4490 4491 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _NCISOC_ECC_LIMIT_ERR, 1); 4492 if (nvswitch_test_flags(pending, bit)) 4493 { 4494 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_NCISOC_ECC_LIMIT_ERR, "NCISOC ECC Limit Error", NV_FALSE); 4495 nvswitch_clear_flags(&unhandled, bit); 4496 } 4497 4498 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _TXPOISONDET, 1); 4499 if (nvswitch_test_flags(pending, bit)) 4500 { 4501 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TXPOISONDET, "Poison Error", NV_FALSE); 4502 nvswitch_clear_flags(&unhandled, bit); 4503 } 4504 4505 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _TXRSPSTATUS_HW_ERR, 1); 4506 if (nvswitch_test_flags(pending, bit)) 4507 { 4508 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_TXRSPSTATUS_HW_ERR, "TX Response Status HW Error", NV_FALSE); 4509 nvswitch_clear_flags(&unhandled, bit); 4510 } 4511 4512 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _TXRSPSTATUS_UR_ERR, 1); 4513 if (nvswitch_test_flags(pending, bit)) 4514 { 4515 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_TXRSPSTATUS_UR_ERR, "TX Response Status UR Error", NV_FALSE); 4516 nvswitch_clear_flags(&unhandled, bit); 4517 } 4518 4519 bit = DRF_NUM(_NVLTLC_TX_SYS, _ERR_STATUS_0, _TXRSPSTATUS_PRIV_ERR, 1); 4520 if (nvswitch_test_flags(pending, bit)) 4521 { 4522 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_SYS_TXRSPSTATUS_PRIV_ERR, "TX Response Status PRIV Error", NV_FALSE); 4523 nvswitch_clear_flags(&unhandled, bit); 4524 } 4525 4526 if (report.raw_first & report.mask) 4527 { 4528 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_FIRST_0, 4529 report.raw_first & report.mask); 4530 } 4531 4532 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 4533 4534 // Disable interrupts that have occurred after fatal error. 4535 if (device->link[link].fatal_error_occurred) 4536 { 4537 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_FATAL_REPORT_EN_0, 4538 report.raw_enable & ~pending); 4539 } 4540 4541 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_SYS, _ERR_STATUS_0, pending); 4542 4543 if (unhandled != 0) 4544 { 4545 NVSWITCH_PRINT(device, WARN, 4546 "%s: Unhandled NVLTLC_TX_SYS interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 4547 __FUNCTION__, link, pending, report.raw_enable); 4548 return -NVL_MORE_PROCESSING_REQUIRED; 4549 } 4550 4551 return NVL_SUCCESS; 4552 } 4553 4554 static NvlStatus 4555 _nvswitch_service_nvltlc_rx_sys_fatal_ls10 4556 ( 4557 nvswitch_device *device, 4558 NvU32 nvlipt_instance, 4559 NvU32 link 4560 ) 4561 { 4562 NvU32 pending, bit, unhandled; 4563 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 4564 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 }; 4565 4566 report.raw_pending = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_STATUS_0); 4567 report.raw_enable = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_FATAL_REPORT_EN_0); 4568 report.mask = report.raw_enable; 4569 pending = report.raw_pending & report.mask; 4570 4571 error_event.nvliptInstance = (NvU8) nvlipt_instance; 4572 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link); 4573 4574 if (pending == 0) 4575 { 4576 return -NVL_NOT_FOUND; 4577 } 4578 4579 unhandled = pending; 4580 report.raw_first = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_FIRST_0); 4581 4582 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _NCISOC_PARITY_ERR, 1); 4583 if (nvswitch_test_flags(pending, bit)) 4584 { 4585 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_NCISOC_PARITY_ERR, "NCISOC Parity Error", NV_FALSE); 4586 nvswitch_clear_flags(&unhandled, bit); 4587 } 4588 4589 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _HDR_RAM_ECC_DBE_ERR, 1); 4590 if (nvswitch_test_flags(pending, bit)) 4591 { 4592 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_HDR_RAM_ECC_DBE_ERR, "HDR RAM ECC DBE Error", NV_FALSE); 4593 nvswitch_clear_flags(&unhandled, bit); 4594 { 4595 // TODO 3014908 log these in the NVL object until we have ECC object support 4596 error_event.error = INFOROM_NVLINK_TLC_RX_HDR_RAM_ECC_DBE_FATAL; 4597 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4598 } 4599 } 4600 4601 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _HDR_RAM_ECC_LIMIT_ERR, 1); 4602 if (nvswitch_test_flags(pending, bit)) 4603 { 4604 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_HDR_RAM_ECC_LIMIT_ERR, "HDR RAM ECC Limit Error", NV_FALSE); 4605 nvswitch_clear_flags(&unhandled, bit); 4606 } 4607 4608 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _DAT0_RAM_ECC_DBE_ERR, 1); 4609 if (nvswitch_test_flags(pending, bit)) 4610 { 4611 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_DAT0_RAM_ECC_DBE_ERR, "DAT0 RAM ECC DBE Error", NV_FALSE); 4612 nvswitch_clear_flags(&unhandled, bit); 4613 { 4614 // TODO 3014908 log these in the NVL object until we have ECC object support 4615 error_event.error = INFOROM_NVLINK_TLC_RX_DAT0_RAM_ECC_DBE_FATAL; 4616 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4617 } 4618 } 4619 4620 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _DAT0_RAM_ECC_LIMIT_ERR, 1); 4621 if (nvswitch_test_flags(pending, bit)) 4622 { 4623 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_DAT0_RAM_ECC_LIMIT_ERR, "DAT0 RAM ECC Limit Error", NV_FALSE); 4624 nvswitch_clear_flags(&unhandled, bit); 4625 } 4626 4627 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _DAT1_RAM_ECC_DBE_ERR, 1); 4628 if (nvswitch_test_flags(pending, bit)) 4629 { 4630 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_DAT1_RAM_ECC_DBE_ERR, "DAT1 RAM ECC DBE Error", NV_FALSE); 4631 nvswitch_clear_flags(&unhandled, bit); 4632 { 4633 // TODO 3014908 log these in the NVL object until we have ECC object support 4634 error_event.error = INFOROM_NVLINK_TLC_RX_DAT1_RAM_ECC_DBE_FATAL; 4635 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4636 } 4637 } 4638 4639 bit = DRF_NUM(_NVLTLC_RX_SYS, _ERR_STATUS_0, _DAT1_RAM_ECC_LIMIT_ERR, 1); 4640 if (nvswitch_test_flags(pending, bit)) 4641 { 4642 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_SYS_DAT1_RAM_ECC_LIMIT_ERR, "DAT1 RAM ECC Limit Error", NV_FALSE); 4643 nvswitch_clear_flags(&unhandled, bit); 4644 } 4645 4646 if (report.raw_first & report.mask) 4647 { 4648 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_FIRST_0, 4649 report.raw_first & report.mask); 4650 } 4651 4652 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 4653 4654 // Disable interrupts that have occurred after fatal error. 4655 if (device->link[link].fatal_error_occurred) 4656 { 4657 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_FATAL_REPORT_EN_0, 4658 report.raw_enable & ~pending); 4659 } 4660 4661 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_SYS, _ERR_STATUS_0, pending); 4662 4663 if (unhandled != 0) 4664 { 4665 NVSWITCH_PRINT(device, WARN, 4666 "%s: Unhandled NVLTLC_RX_SYS interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 4667 __FUNCTION__, link, pending, report.raw_enable); 4668 return -NVL_MORE_PROCESSING_REQUIRED; 4669 } 4670 4671 return NVL_SUCCESS; 4672 } 4673 4674 static NvlStatus 4675 _nvswitch_service_nvltlc_tx_lnk_fatal_0_ls10 4676 ( 4677 nvswitch_device *device, 4678 NvU32 nvlipt_instance, 4679 NvU32 link 4680 ) 4681 { 4682 NvU32 pending, bit, unhandled; 4683 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 4684 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 }; 4685 4686 report.raw_pending = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_0); 4687 report.raw_enable = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FATAL_REPORT_EN_0); 4688 report.mask = report.raw_enable; 4689 pending = report.raw_pending & report.mask; 4690 4691 error_event.nvliptInstance = (NvU8) nvlipt_instance; 4692 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link); 4693 4694 if (pending == 0) 4695 { 4696 return -NVL_NOT_FOUND; 4697 } 4698 4699 unhandled = pending; 4700 report.raw_first = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_0); 4701 4702 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _TXDLCREDITPARITYERR, 1); 4703 if (nvswitch_test_flags(pending, bit)) 4704 { 4705 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TXDLCREDITPARITYERR, "TX DL Credit Parity Error", NV_FALSE); 4706 nvswitch_clear_flags(&unhandled, bit); 4707 { 4708 error_event.error = INFOROM_NVLINK_TLC_TX_DL_CREDIT_PARITY_ERR_FATAL; 4709 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4710 } 4711 } 4712 4713 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _CREQ_RAM_HDR_ECC_DBE_ERR, 1); 4714 if (nvswitch_test_flags(pending, bit)) 4715 { 4716 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_LNK_CREQ_RAM_HDR_ECC_DBE_ERR, "CREQ RAM HDR ECC DBE Error", NV_FALSE); 4717 nvswitch_clear_flags(&unhandled, bit); 4718 } 4719 4720 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP_RAM_HDR_ECC_DBE_ERR, 1); 4721 if (nvswitch_test_flags(pending, bit)) 4722 { 4723 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_LNK_RSP_RAM_HDR_ECC_DBE_ERR, "Response RAM HDR ECC DBE Error", NV_FALSE); 4724 nvswitch_clear_flags(&unhandled, bit); 4725 } 4726 4727 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _COM_RAM_HDR_ECC_DBE_ERR, 1); 4728 if (nvswitch_test_flags(pending, bit)) 4729 { 4730 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_LNK_COM_RAM_HDR_ECC_DBE_ERR, "COM RAM HDR ECC DBE Error", NV_FALSE); 4731 nvswitch_clear_flags(&unhandled, bit); 4732 } 4733 4734 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP1_RAM_HDR_ECC_DBE_ERR, 1); 4735 if (nvswitch_test_flags(pending, bit)) 4736 { 4737 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_LNK_RSP1_RAM_HDR_ECC_DBE_ERR, "RSP1 RAM HDR ECC DBE Error", NV_FALSE); 4738 nvswitch_clear_flags(&unhandled, bit); 4739 } 4740 4741 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP1_RAM_DAT_ECC_DBE_ERR, 1); 4742 if (nvswitch_test_flags(pending, bit)) 4743 { 4744 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_TX_LNK_RSP1_RAM_DAT_ECC_DBE_ERR, "RSP1 RAM DAT ECC DBE Error", NV_FALSE); 4745 nvswitch_clear_flags(&unhandled, bit); 4746 { 4747 // TODO 3014908 log these in the NVL object until we have ECC object support 4748 error_event.error = INFOROM_NVLINK_TLC_TX_RSP1_DAT_RAM_ECC_DBE_FATAL; 4749 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4750 } 4751 } 4752 4753 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 4754 4755 // Disable interrupts that have occurred after fatal error. 4756 if (device->link[link].fatal_error_occurred) 4757 { 4758 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FATAL_REPORT_EN_0, 4759 report.raw_enable & ~pending); 4760 } 4761 4762 if (report.raw_first & report.mask) 4763 { 4764 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_0, 4765 report.raw_first & report.mask); 4766 } 4767 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_0, pending); 4768 4769 if (unhandled != 0) 4770 { 4771 NVSWITCH_PRINT(device, WARN, 4772 "%s: Unhandled NVLTLC_TX_LNK _0 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 4773 __FUNCTION__, link, pending, report.raw_enable); 4774 return -NVL_MORE_PROCESSING_REQUIRED; 4775 } 4776 4777 return NVL_SUCCESS; 4778 } 4779 4780 static NvlStatus 4781 _nvswitch_service_nvltlc_rx_lnk_fatal_0_ls10 4782 ( 4783 nvswitch_device *device, 4784 NvU32 nvlipt_instance, 4785 NvU32 link 4786 ) 4787 { 4788 NvU32 pending, bit, unhandled; 4789 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 4790 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 }; 4791 4792 report.raw_pending = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_0); 4793 report.raw_enable = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FATAL_REPORT_EN_0); 4794 report.mask = report.raw_enable; 4795 pending = report.raw_pending & report.mask; 4796 4797 error_event.nvliptInstance = (NvU8) nvlipt_instance; 4798 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link); 4799 4800 if (pending == 0) 4801 { 4802 return -NVL_NOT_FOUND; 4803 } 4804 4805 unhandled = pending; 4806 report.raw_first = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_0); 4807 4808 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXDLHDRPARITYERR, 1); 4809 if (nvswitch_test_flags(pending, bit)) 4810 { 4811 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXDLHDRPARITYERR, "RX DL HDR Parity Error", NV_FALSE); 4812 nvswitch_clear_flags(&unhandled, bit); 4813 { 4814 error_event.error = INFOROM_NVLINK_TLC_RX_DL_HDR_PARITY_ERR_FATAL; 4815 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4816 } 4817 } 4818 4819 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXDLDATAPARITYERR, 1); 4820 if (nvswitch_test_flags(pending, bit)) 4821 { 4822 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXDLDATAPARITYERR, "RX DL Data Parity Error", NV_FALSE); 4823 nvswitch_clear_flags(&unhandled, bit); 4824 { 4825 error_event.error = INFOROM_NVLINK_TLC_RX_DL_DATA_PARITY_ERR_FATAL; 4826 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4827 } 4828 } 4829 4830 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXDLCTRLPARITYERR, 1); 4831 if (nvswitch_test_flags(pending, bit)) 4832 { 4833 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXDLCTRLPARITYERR, "RX DL Ctrl Parity Error", NV_FALSE); 4834 nvswitch_clear_flags(&unhandled, bit); 4835 { 4836 error_event.error = INFOROM_NVLINK_TLC_RX_DL_CTRL_PARITY_ERR_FATAL; 4837 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4838 } 4839 } 4840 4841 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXPKTLENERR, 1); 4842 if (nvswitch_test_flags(pending, bit)) 4843 { 4844 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXPKTLENERR, "RX Packet Length Error", NV_FALSE); 4845 nvswitch_clear_flags(&unhandled, bit); 4846 { 4847 error_event.error = INFOROM_NVLINK_TLC_RX_PKTLEN_ERR_FATAL; 4848 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4849 } 4850 } 4851 4852 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RSVCACHEATTRPROBEREQERR, 1); 4853 if (nvswitch_test_flags(pending, bit)) 4854 { 4855 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RSVCACHEATTRPROBEREQERR, "RSV Packet Status Error", NV_FALSE); 4856 nvswitch_clear_flags(&unhandled, bit); 4857 { 4858 error_event.error = INFOROM_NVLINK_TLC_RX_RSVD_CACHE_ATTR_PROBE_REQ_ERR_FATAL; 4859 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4860 } 4861 } 4862 4863 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RSVCACHEATTRPROBERSPERR, 1); 4864 if (nvswitch_test_flags(pending, bit)) 4865 { 4866 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RSVCACHEATTRPROBERSPERR, "RSV CacheAttr Probe Rsp Error", NV_FALSE); 4867 nvswitch_clear_flags(&unhandled, bit); 4868 { 4869 error_event.error = INFOROM_NVLINK_TLC_RX_RSVD_CACHE_ATTR_PROBE_RSP_ERR_FATAL; 4870 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4871 } 4872 } 4873 4874 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _DATLENGTRMWREQMAXERR, 1); 4875 if (nvswitch_test_flags(pending, bit)) 4876 { 4877 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_DATLENGTRMWREQMAXERR, "Data Length RMW Req Max Error", NV_FALSE); 4878 nvswitch_clear_flags(&unhandled, bit); 4879 { 4880 error_event.error = INFOROM_NVLINK_TLC_RX_DATLEN_GT_RMW_REQ_MAX_ERR_FATAL; 4881 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4882 } 4883 } 4884 4885 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _DATLENLTATRRSPMINERR, 1); 4886 if (nvswitch_test_flags(pending, bit)) 4887 { 4888 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_DATLENLTATRRSPMINERR, "Data Len Lt ATR RSP Min Error", NV_FALSE); 4889 nvswitch_clear_flags(&unhandled, bit); 4890 { 4891 error_event.error = INFOROM_NVLINK_TLC_RX_DATLEN_LT_ATR_RSP_MIN_ERR_FATAL; 4892 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4893 } 4894 } 4895 4896 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _INVALIDCACHEATTRPOERR, 1); 4897 if (nvswitch_test_flags(pending, bit)) 4898 { 4899 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_INVALIDCACHEATTRPOERR, "Invalid Cache Attr PO Error", NV_FALSE); 4900 nvswitch_clear_flags(&unhandled, bit); 4901 { 4902 error_event.error = INFOROM_NVLINK_TLC_RX_INVALID_PO_FOR_CACHE_ATTR_FATAL; 4903 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4904 } 4905 } 4906 4907 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXRSPSTATUS_HW_ERR, 1); 4908 if (nvswitch_test_flags(pending, bit)) 4909 { 4910 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_LNK_RXRSPSTATUS_HW_ERR, "RX Rsp Status HW Error", NV_FALSE); 4911 nvswitch_clear_flags(&unhandled, bit); 4912 { 4913 error_event.error = INFOROM_NVLINK_TLC_RX_RSP_STATUS_HW_ERR_NONFATAL; 4914 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4915 } 4916 } 4917 4918 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXRSPSTATUS_UR_ERR, 1); 4919 if (nvswitch_test_flags(pending, bit)) 4920 { 4921 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_LNK_RXRSPSTATUS_UR_ERR, "RX Rsp Status UR Error", NV_FALSE); 4922 nvswitch_clear_flags(&unhandled, bit); 4923 { 4924 error_event.error = INFOROM_NVLINK_TLC_RX_RSP_STATUS_UR_ERR_NONFATAL; 4925 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4926 } 4927 } 4928 4929 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _INVALID_COLLAPSED_RESPONSE_ERR, 1); 4930 if (nvswitch_test_flags(pending, bit)) 4931 { 4932 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RX_LNK_INVALID_COLLAPSED_RESPONSE_ERR, "Invalid Collapsed Response Error", NV_FALSE); 4933 nvswitch_clear_flags(&unhandled, bit); 4934 { 4935 error_event.error = INFOROM_NVLINK_TLC_RX_INVALID_COLLAPSED_RESPONSE_FATAL; 4936 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 4937 } 4938 } 4939 4940 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 4941 4942 // Disable interrupts that have occurred after fatal error. 4943 if (device->link[link].fatal_error_occurred) 4944 { 4945 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FATAL_REPORT_EN_0, 4946 report.raw_enable & ~pending); 4947 } 4948 4949 if (report.raw_first & report.mask) 4950 { 4951 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_0, 4952 report.raw_first & report.mask); 4953 } 4954 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_0, pending); 4955 4956 if (unhandled != 0) 4957 { 4958 NVSWITCH_PRINT(device, WARN, 4959 "%s: Unhandled NVLTLC_RX_LNK _0 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 4960 __FUNCTION__, link, pending, report.raw_enable); 4961 return -NVL_MORE_PROCESSING_REQUIRED; 4962 } 4963 4964 return NVL_SUCCESS; 4965 } 4966 4967 static NvlStatus 4968 _nvswitch_service_nvltlc_rx_lnk_fatal_1_ls10 4969 ( 4970 nvswitch_device *device, 4971 NvU32 nvlipt_instance, 4972 NvU32 link 4973 ) 4974 { 4975 NvU32 pending, bit, unhandled; 4976 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 4977 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 }; 4978 NvU32 injected; 4979 4980 report.raw_pending = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_1); 4981 report.raw_enable = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FATAL_REPORT_EN_1); 4982 report.mask = report.raw_enable; 4983 pending = report.raw_pending & report.mask; 4984 4985 error_event.nvliptInstance = (NvU8) nvlipt_instance; 4986 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link); 4987 4988 if (pending == 0) 4989 { 4990 return -NVL_NOT_FOUND; 4991 } 4992 4993 unhandled = pending; 4994 report.raw_first = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_1); 4995 injected = NVSWITCH_LINK_RD32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_REPORT_INJECT_1); 4996 4997 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _RXHDROVFERR, 1); 4998 if (nvswitch_test_flags(pending, bit)) 4999 { 5000 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXHDROVFERR, "RX HDR OVF Error", NV_FALSE); 5001 nvswitch_clear_flags(&unhandled, bit); 5002 5003 if (FLD_TEST_DRF_NUM(_NVLTLC, _RX_LNK_ERR_REPORT_INJECT_1, _RXHDROVFERR, 0x0, injected)) 5004 { 5005 error_event.error = INFOROM_NVLINK_TLC_RX_HDR_OVERFLOW_FATAL; 5006 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 5007 } 5008 } 5009 5010 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _RXDATAOVFERR, 1); 5011 if (nvswitch_test_flags(pending, bit)) 5012 { 5013 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXDATAOVFERR, "RX Data OVF Error", NV_FALSE); 5014 nvswitch_clear_flags(&unhandled, bit); 5015 5016 if (FLD_TEST_DRF_NUM(_NVLTLC, _RX_LNK_ERR_REPORT_INJECT_1, _RXDATAOVFERR, 0x0, injected)) 5017 { 5018 error_event.error = INFOROM_NVLINK_TLC_RX_DATA_OVERFLOW_FATAL; 5019 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 5020 } 5021 } 5022 5023 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _STOMPDETERR, 1); 5024 if (nvswitch_test_flags(pending, bit)) 5025 { 5026 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_STOMPDETERR, "Stomp Det Error", NV_FALSE); 5027 nvswitch_clear_flags(&unhandled, bit); 5028 5029 if (FLD_TEST_DRF_NUM(_NVLTLC, _RX_LNK_ERR_REPORT_INJECT_1, _STOMPDETERR, 0x0, injected)) 5030 { 5031 error_event.error = INFOROM_NVLINK_TLC_RX_STOMP_DETECTED_FATAL; 5032 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 5033 } 5034 } 5035 5036 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _RXPOISONERR, 1); 5037 if (nvswitch_test_flags(pending, bit)) 5038 { 5039 NVSWITCH_REPORT_FATAL(_HW_NVLTLC_RXPOISONERR, "RX Poison Error", NV_FALSE); 5040 nvswitch_clear_flags(&unhandled, bit); 5041 } 5042 5043 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 5044 5045 // Disable interrupts that have occurred after fatal error. 5046 if (device->link[link].fatal_error_occurred) 5047 { 5048 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FATAL_REPORT_EN_1, 5049 report.raw_enable & ~pending); 5050 } 5051 5052 if (report.raw_first & report.mask) 5053 { 5054 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_1, 5055 report.raw_first & report.mask); 5056 } 5057 NVSWITCH_LINK_WR32_LS10(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_1, pending); 5058 5059 if (unhandled != 0) 5060 { 5061 NVSWITCH_PRINT(device, WARN, 5062 "%s: Unhandled NVLTLC_RX_LNK _1 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 5063 __FUNCTION__, link, pending, report.raw_enable); 5064 return -NVL_MORE_PROCESSING_REQUIRED; 5065 } 5066 5067 return NVL_SUCCESS; 5068 } 5069 5070 NvlStatus 5071 _nvswitch_service_nvltlc_fatal_ls10 5072 ( 5073 nvswitch_device *device, 5074 NvU32 nvlipt_instance, 5075 NvU64 intrLinkMask 5076 ) 5077 { 5078 NvU64 enabledLinkMask, localLinkMask, localIntrLinkMask; 5079 NvU32 i; 5080 nvlink_link *link; 5081 NvlStatus status = -NVL_MORE_PROCESSING_REQUIRED; 5082 5083 // 5084 // The passed in interruptLinkMask should contain a link that is part of the 5085 // given nvlipt instance 5086 // 5087 enabledLinkMask = nvswitch_get_enabled_link_mask(device); 5088 localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(nvlipt_instance); 5089 localIntrLinkMask = localLinkMask & intrLinkMask & enabledLinkMask; 5090 5091 if (localIntrLinkMask == 0) 5092 { 5093 NVSWITCH_PRINT(device, ERROR, "%s: Bad link mask provided for link interrupt servicing!\n", __FUNCTION__); 5094 NVSWITCH_ASSERT(0); 5095 return -NVL_BAD_ARGS; 5096 } 5097 5098 FOR_EACH_INDEX_IN_MASK(64, i, localIntrLinkMask) 5099 { 5100 link = nvswitch_get_link(device, i); 5101 if (link == NULL) 5102 { 5103 // An interrupt on an invalid link should never occur 5104 NVSWITCH_ASSERT(link != NULL); 5105 continue; 5106 } 5107 5108 if (NVSWITCH_GET_LINK_ENG_INST(device, i, NVLIPT) != nvlipt_instance) 5109 { 5110 NVSWITCH_ASSERT(0); 5111 break; 5112 } 5113 5114 // 5115 // If link is in reset or NCISOC clock is off then 5116 // don't need to check the link for NVLTLC errors 5117 // as the IP's registers are off 5118 // 5119 if (nvswitch_is_link_in_reset(device, link) || 5120 !nvswitch_are_link_clocks_on_ls10(device, link,NVSWITCH_PER_LINK_CLOCK_SET(NCISOCCLK))) 5121 { 5122 continue; 5123 } 5124 5125 if (_nvswitch_service_nvltlc_tx_sys_fatal_ls10(device, nvlipt_instance, i) == NVL_SUCCESS) 5126 { 5127 status = NVL_SUCCESS; 5128 } 5129 5130 if (_nvswitch_service_nvltlc_rx_sys_fatal_ls10(device, nvlipt_instance, i) == NVL_SUCCESS) 5131 { 5132 status = NVL_SUCCESS; 5133 } 5134 5135 if (_nvswitch_service_nvltlc_tx_lnk_fatal_0_ls10(device, nvlipt_instance, i) == NVL_SUCCESS) 5136 { 5137 status = NVL_SUCCESS; 5138 } 5139 5140 if (_nvswitch_service_nvltlc_rx_lnk_fatal_0_ls10(device, nvlipt_instance, i) == NVL_SUCCESS) 5141 { 5142 status = NVL_SUCCESS; 5143 } 5144 5145 if (_nvswitch_service_nvltlc_rx_lnk_fatal_1_ls10(device, nvlipt_instance, i) == NVL_SUCCESS) 5146 { 5147 status = NVL_SUCCESS; 5148 } 5149 } 5150 FOR_EACH_INDEX_IN_MASK_END; 5151 5152 return status; 5153 } 5154 5155 static NvlStatus 5156 _nvswitch_service_nvlipt_common_fatal_ls10 5157 ( 5158 nvswitch_device *device, 5159 NvU32 instance 5160 ) 5161 { 5162 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5163 NvU32 pending, bit, contain, unhandled; 5164 NvU32 link, local_link_idx; 5165 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 }; 5166 5167 report.raw_pending = NVSWITCH_ENG_RD32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_STATUS_0); 5168 report.raw_enable = NVSWITCH_ENG_RD32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_FATAL_REPORT_EN_0); 5169 report.mask = report.raw_enable & (DRF_NUM(_NVLIPT_COMMON, _ERR_STATUS_0, _CLKCTL_ILLEGAL_REQUEST, 1)); 5170 5171 pending = report.raw_pending & report.mask; 5172 if (pending == 0) 5173 { 5174 return -NVL_NOT_FOUND; 5175 } 5176 5177 error_event.nvliptInstance = (NvU8) instance; 5178 5179 unhandled = pending; 5180 report.raw_first = NVSWITCH_ENG_RD32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_FIRST_0); 5181 contain = NVSWITCH_ENG_RD32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_CONTAIN_EN_0); 5182 5183 bit = DRF_NUM(_NVLIPT_COMMON, _ERR_STATUS_0, _CLKCTL_ILLEGAL_REQUEST, 1); 5184 if (nvswitch_test_flags(pending, bit)) 5185 { 5186 for (local_link_idx = 0; local_link_idx < NVSWITCH_LINKS_PER_NVLIPT_LS10; local_link_idx++) 5187 { 5188 link = (instance * NVSWITCH_LINKS_PER_NVLIPT_LS10) + local_link_idx; 5189 if (nvswitch_is_link_valid(device, link)) 5190 { 5191 NVSWITCH_REPORT_CONTAIN(_HW_NVLIPT_CLKCTL_ILLEGAL_REQUEST, "CLKCTL_ILLEGAL_REQUEST", NV_FALSE); 5192 } 5193 } 5194 5195 nvswitch_clear_flags(&unhandled, bit); 5196 { 5197 error_event.error = INFOROM_NVLINK_NVLIPT_CLKCTL_ILLEGAL_REQUEST_FATAL; 5198 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 5199 } 5200 } 5201 5202 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 5203 5204 // Disable interrupts that have occurred after fatal error. 5205 for (local_link_idx = 0; local_link_idx < NVSWITCH_LINKS_PER_NVLIPT_LS10; local_link_idx++) 5206 { 5207 link = (instance * NVSWITCH_LINKS_PER_NVLIPT_LS10) + local_link_idx; 5208 if (nvswitch_is_link_valid(device, link) && 5209 (device->link[link].fatal_error_occurred)) 5210 { 5211 NVSWITCH_ENG_WR32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_FATAL_REPORT_EN_0, 5212 report.raw_enable & ~pending); 5213 break; 5214 } 5215 } 5216 5217 // clear the interrupts 5218 if (report.raw_first & report.mask) 5219 { 5220 NVSWITCH_ENG_WR32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_FIRST_0, 5221 report.raw_first & report.mask); 5222 } 5223 NVSWITCH_ENG_WR32(device, NVLIPT, , instance, _NVLIPT_COMMON, _ERR_STATUS_0, pending); 5224 5225 if (unhandled != 0) 5226 { 5227 NVSWITCH_PRINT(device, WARN, 5228 "%s: Unhandled NVLIPT_COMMON FATAL interrupts, pending: 0x%x enabled: 0x%x.\n", 5229 __FUNCTION__, pending, report.raw_enable); 5230 return -NVL_MORE_PROCESSING_REQUIRED; 5231 } 5232 5233 return NVL_SUCCESS; 5234 } 5235 5236 static NvlStatus 5237 _nvswitch_service_nxbar_tile_ls10 5238 ( 5239 nvswitch_device *device, 5240 NvU32 tile 5241 ) 5242 { 5243 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5244 NvU32 pending, bit, unhandled; 5245 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5246 NvU32 link = tile; 5247 5248 report.raw_pending = NVSWITCH_TILE_RD32(device, tile, _NXBAR_TILE, _ERR_STATUS); 5249 report.raw_enable = NVSWITCH_TILE_RD32(device, tile, _NXBAR_TILE, _ERR_FATAL_INTR_EN); 5250 report.mask = chip_device->intr_mask.tile.fatal; 5251 pending = report.raw_pending & report.mask; 5252 5253 if (pending == 0) 5254 { 5255 return -NVL_NOT_FOUND; 5256 } 5257 5258 unhandled = pending; 5259 report.raw_first = NVSWITCH_TILE_RD32(device, tile, _NXBAR_TILE, _ERR_FIRST); 5260 5261 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_BUFFER_OVERFLOW, 1); 5262 if (nvswitch_test_flags(pending, bit)) 5263 { 5264 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_BUFFER_OVERFLOW, "ingress SRC-VC buffer overflow", NV_TRUE); 5265 nvswitch_clear_flags(&unhandled, bit); 5266 } 5267 5268 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_BUFFER_UNDERFLOW, 1); 5269 if (nvswitch_test_flags(pending, bit)) 5270 { 5271 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_BUFFER_UNDERFLOW, "ingress SRC-VC buffer underflow", NV_TRUE); 5272 nvswitch_clear_flags(&unhandled, bit); 5273 } 5274 5275 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _EGRESS_CREDIT_OVERFLOW, 1); 5276 if (nvswitch_test_flags(pending, bit)) 5277 { 5278 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_EGRESS_CREDIT_OVERFLOW, "egress DST-VC credit overflow", NV_TRUE); 5279 nvswitch_clear_flags(&unhandled, bit); 5280 } 5281 5282 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _EGRESS_CREDIT_UNDERFLOW, 1); 5283 if (nvswitch_test_flags(pending, bit)) 5284 { 5285 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_EGRESS_CREDIT_UNDERFLOW, "egress DST-VC credit underflow", NV_TRUE); 5286 nvswitch_clear_flags(&unhandled, bit); 5287 } 5288 5289 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_NON_BURSTY_PKT, 1); 5290 if (nvswitch_test_flags(pending, bit)) 5291 { 5292 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_NON_BURSTY_PKT, "ingress packet burst error", NV_TRUE); 5293 nvswitch_clear_flags(&unhandled, bit); 5294 } 5295 5296 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_NON_STICKY_PKT, 1); 5297 if (nvswitch_test_flags(pending, bit)) 5298 { 5299 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_NON_STICKY_PKT, "ingress packet sticky error", NV_TRUE); 5300 nvswitch_clear_flags(&unhandled, bit); 5301 } 5302 5303 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_BURST_GT_9_DATA_VC, 1); 5304 if (nvswitch_test_flags(pending, bit)) 5305 { 5306 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_BURST_GT_9_DATA_VC, "possible bubbles at ingress", NV_TRUE); 5307 nvswitch_clear_flags(&unhandled, bit); 5308 } 5309 5310 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_PKT_INVALID_DST, 1); 5311 if (nvswitch_test_flags(pending, bit)) 5312 { 5313 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_PKT_INVALID_DST, "ingress packet invalid dst error", NV_TRUE); 5314 nvswitch_clear_flags(&unhandled, bit); 5315 } 5316 5317 bit = DRF_NUM(_NXBAR_TILE, _ERR_STATUS, _INGRESS_PKT_PARITY_ERROR, 1); 5318 if (nvswitch_test_flags(pending, bit)) 5319 { 5320 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILE_INGRESS_PKT_PARITY_ERROR, "ingress packet parity error", NV_TRUE); 5321 nvswitch_clear_flags(&unhandled, bit); 5322 } 5323 5324 if (report.raw_first & report.mask) 5325 { 5326 NVSWITCH_TILE_WR32(device, tile, _NXBAR_TILE, _ERR_FIRST, 5327 report.raw_first & report.mask); 5328 } 5329 5330 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 5331 5332 // Disable interrupts that have occurred after fatal error. 5333 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 5334 NVSWITCH_TILE_WR32(device, tile, _NXBAR_TILE, _ERR_FATAL_INTR_EN, 5335 report.raw_enable & ~pending); 5336 5337 NVSWITCH_TILE_WR32(device, link, _NXBAR_TILE, _ERR_STATUS, pending); 5338 5339 if (unhandled != 0) 5340 { 5341 return -NVL_MORE_PROCESSING_REQUIRED; 5342 } 5343 5344 return NVL_SUCCESS; 5345 } 5346 5347 static NvlStatus 5348 _nvswitch_service_nxbar_tileout_ls10 5349 ( 5350 nvswitch_device *device, 5351 NvU32 tileout 5352 ) 5353 { 5354 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5355 NvU32 pending, bit, unhandled; 5356 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5357 NvU32 link = tileout; 5358 5359 report.raw_pending = NVSWITCH_TILEOUT_RD32(device, tileout, _NXBAR_TILEOUT, _ERR_STATUS); 5360 report.raw_enable = NVSWITCH_TILEOUT_RD32(device, tileout, _NXBAR_TILEOUT, _ERR_FATAL_INTR_EN); 5361 report.mask = chip_device->intr_mask.tileout.fatal; 5362 pending = report.raw_pending & report.mask; 5363 5364 if (pending == 0) 5365 { 5366 return -NVL_NOT_FOUND; 5367 } 5368 5369 unhandled = pending; 5370 report.raw_first = NVSWITCH_TILEOUT_RD32(device, tileout, _NXBAR_TILEOUT, _ERR_FIRST); 5371 5372 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _INGRESS_BUFFER_OVERFLOW, 1); 5373 if (nvswitch_test_flags(pending, bit)) 5374 { 5375 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_INGRESS_BUFFER_OVERFLOW, "ingress SRC-VC buffer overflow", NV_TRUE); 5376 nvswitch_clear_flags(&unhandled, bit); 5377 } 5378 5379 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _INGRESS_BUFFER_UNDERFLOW, 1); 5380 if (nvswitch_test_flags(pending, bit)) 5381 { 5382 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_INGRESS_BUFFER_UNDERFLOW, "ingress SRC-VC buffer underflow", NV_TRUE); 5383 nvswitch_clear_flags(&unhandled, bit); 5384 } 5385 5386 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _EGRESS_CREDIT_OVERFLOW, 1); 5387 if (nvswitch_test_flags(pending, bit)) 5388 { 5389 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_EGRESS_CREDIT_OVERFLOW, "egress DST-VC credit overflow", NV_TRUE); 5390 nvswitch_clear_flags(&unhandled, bit); 5391 } 5392 5393 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _EGRESS_CREDIT_UNDERFLOW, 1); 5394 if (nvswitch_test_flags(pending, bit)) 5395 { 5396 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_EGRESS_CREDIT_UNDERFLOW, "egress DST-VC credit underflow", NV_TRUE); 5397 nvswitch_clear_flags(&unhandled, bit); 5398 } 5399 5400 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _INGRESS_NON_BURSTY_PKT, 1); 5401 if (nvswitch_test_flags(pending, bit)) 5402 { 5403 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_INGRESS_NON_BURSTY_PKT, "ingress packet burst error", NV_TRUE); 5404 nvswitch_clear_flags(&unhandled, bit); 5405 } 5406 5407 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _INGRESS_NON_STICKY_PKT, 1); 5408 if (nvswitch_test_flags(pending, bit)) 5409 { 5410 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_INGRESS_NON_STICKY_PKT, "ingress packet sticky error", NV_TRUE); 5411 nvswitch_clear_flags(&unhandled, bit); 5412 } 5413 5414 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _INGRESS_BURST_GT_9_DATA_VC, 1); 5415 if (nvswitch_test_flags(pending, bit)) 5416 { 5417 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_INGRESS_BURST_GT_9_DATA_VC, "possible bubbles at ingress", NV_TRUE); 5418 nvswitch_clear_flags(&unhandled, bit); 5419 } 5420 5421 bit = DRF_NUM(_NXBAR_TILEOUT, _ERR_STATUS, _EGRESS_CDT_PARITY_ERROR, 1); 5422 if (nvswitch_test_flags(pending, bit)) 5423 { 5424 NVSWITCH_REPORT_FATAL(_HW_NXBAR_TILEOUT_EGRESS_CDT_PARITY_ERROR, "ingress credit parity error", NV_TRUE); 5425 nvswitch_clear_flags(&unhandled, bit); 5426 } 5427 5428 if (report.raw_first & report.mask) 5429 { 5430 NVSWITCH_TILEOUT_WR32(device, tileout, _NXBAR_TILEOUT, _ERR_FIRST, 5431 report.raw_first & report.mask); 5432 } 5433 5434 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 5435 5436 // Disable interrupts that have occurred after fatal error. 5437 // This helps prevent an interrupt storm if HW keeps triggering unnecessary stream of interrupts. 5438 NVSWITCH_TILEOUT_WR32(device, tileout, _NXBAR_TILEOUT, _ERR_FATAL_INTR_EN, 5439 report.raw_enable & ~pending); 5440 5441 NVSWITCH_TILEOUT_WR32(device, tileout, _NXBAR_TILEOUT, _ERR_STATUS, pending); 5442 5443 if (unhandled != 0) 5444 { 5445 return -NVL_MORE_PROCESSING_REQUIRED; 5446 } 5447 5448 return NVL_SUCCESS; 5449 } 5450 5451 static NvlStatus 5452 _nvswitch_service_nxbar_fatal_ls10 5453 ( 5454 nvswitch_device *device, 5455 NvU32 nxbar 5456 ) 5457 { 5458 NvU32 pending, bit, unhandled; 5459 NvU32 tile_idx; 5460 NvU32 tile, tileout; 5461 5462 pending = NVSWITCH_ENG_RD32(device, NXBAR, , nxbar, _NXBAR, _TCP_ERROR_STATUS); 5463 if (pending == 0) 5464 { 5465 return -NVL_NOT_FOUND; 5466 } 5467 5468 unhandled = pending; 5469 5470 for (tile = 0; tile < NUM_NXBAR_TILES_PER_TC_LS10; tile++) 5471 { 5472 bit = DRF_NUM(_NXBAR, _TCP_ERROR_STATUS, _TILE0, 1) << tile; 5473 if (nvswitch_test_flags(pending, bit)) 5474 { 5475 tile_idx = TILE_INDEX_LS10(device, nxbar, tile); 5476 if (NVSWITCH_ENG_VALID_LS10(device, TILE, tile_idx)) 5477 { 5478 if (_nvswitch_service_nxbar_tile_ls10(device, tile_idx) == NVL_SUCCESS) 5479 { 5480 nvswitch_clear_flags(&unhandled, bit); 5481 } 5482 } 5483 } 5484 } 5485 5486 for (tileout = 0; tileout < NUM_NXBAR_TILEOUTS_PER_TC_LS10; tileout++) 5487 { 5488 bit = DRF_NUM(_NXBAR, _TCP_ERROR_STATUS, _TILEOUT0, 1) << tileout; 5489 if (nvswitch_test_flags(pending, bit)) 5490 { 5491 tile_idx = TILE_INDEX_LS10(device, nxbar, tileout); 5492 if (NVSWITCH_ENG_VALID_LS10(device, TILEOUT, tile_idx)) 5493 { 5494 if (_nvswitch_service_nxbar_tileout_ls10(device, tile_idx) == NVL_SUCCESS) 5495 { 5496 nvswitch_clear_flags(&unhandled, bit); 5497 } 5498 } 5499 } 5500 } 5501 5502 // TODO: Perform hot_reset to recover NXBAR 5503 5504 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 5505 5506 5507 if (unhandled != 0) 5508 { 5509 return -NVL_MORE_PROCESSING_REQUIRED; 5510 } 5511 5512 return NVL_SUCCESS; 5513 } 5514 5515 static void 5516 _nvswitch_emit_link_errors_nvldl_fatal_link_ls10 5517 ( 5518 nvswitch_device *device, 5519 NvU32 nvlipt_instance, 5520 NvU32 link 5521 ) 5522 { 5523 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5524 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5525 NvU32 pending, bit; 5526 INFOROM_NVLINK_ERROR_EVENT error_event; 5527 5528 // Only enabled link errors are deffered 5529 pending = chip_device->deferredLinkErrors[link].data.fatalIntrMask.dl; 5530 report.raw_pending = pending; 5531 report.raw_enable = pending; 5532 report.mask = report.raw_enable; 5533 5534 error_event.nvliptInstance = (NvU8) nvlipt_instance; 5535 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link); 5536 5537 bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1); 5538 if (nvswitch_test_flags(pending, bit)) 5539 { 5540 NVSWITCH_REPORT_FATAL(_HW_DLPL_LTSSM_FAULT_UP, "LTSSM Fault Up", NV_FALSE); 5541 error_event.error = INFOROM_NVLINK_DL_LTSSM_FAULT_UP_FATAL; 5542 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 5543 } 5544 5545 bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_DOWN, 1); 5546 if (nvswitch_test_flags(pending, bit)) 5547 { 5548 NVSWITCH_REPORT_FATAL(_HW_DLPL_LTSSM_FAULT_DOWN, "LTSSM Fault Down", NV_FALSE); 5549 } 5550 } 5551 5552 static void 5553 _nvswitch_dump_minion_ali_debug_registers_ls10 5554 ( 5555 nvswitch_device *device, 5556 NvU32 link 5557 ) 5558 { 5559 NVSWITCH_MINION_ALI_DEBUG_REGISTERS params; 5560 nvlink_link *nvlink = nvswitch_get_link(device, link); 5561 5562 if ((nvlink != NULL) && 5563 (nvswitch_minion_get_ali_debug_registers_ls10(device, nvlink, ¶ms) == NVL_SUCCESS)) 5564 { 5565 NVSWITCH_PRINT(device, ERROR, 5566 "%s: Minion error on link #%d!:\n" 5567 "Minion DLSTAT MN00 = 0x%x\n" 5568 "Minion DLSTAT UC01 = 0x%x\n" 5569 "Minion DLSTAT UC01 = 0x%x\n", 5570 __FUNCTION__, link, 5571 params.dlstatMn00, params.dlstatUc01, params.dlstatLinkIntr); 5572 } 5573 } 5574 5575 static void 5576 _nvswitch_emit_link_errors_minion_fatal_ls10 5577 ( 5578 nvswitch_device *device, 5579 NvU32 nvlipt_instance, 5580 NvU32 link 5581 ) 5582 { 5583 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5584 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5585 NvU32 regData; 5586 NvU32 enabledLinks; 5587 NvU32 localLinkIdx = NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link); 5588 NvU32 bit = BIT(localLinkIdx); 5589 5590 if (!chip_device->deferredLinkErrors[link].data.fatalIntrMask.minionLinkIntr.bPending) 5591 { 5592 return; 5593 } 5594 5595 // Grab the cached interrupt data 5596 regData = chip_device->deferredLinkErrors[link].data.fatalIntrMask.minionLinkIntr.regData; 5597 5598 // get all possible interrupting links associated with this minion 5599 report.raw_enable = link; 5600 report.raw_pending = report.raw_enable; 5601 report.mask = report.raw_enable; 5602 report.data[0] = regData; 5603 5604 switch(DRF_VAL(_MINION, _NVLINK_LINK_INTR, _CODE, regData)) 5605 { 5606 case NV_MINION_NVLINK_LINK_INTR_CODE_NA: 5607 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link NA interrupt", NV_FALSE); 5608 break; 5609 case NV_MINION_NVLINK_LINK_INTR_CODE_DLCMDFAULT: 5610 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link DLCMDFAULT interrupt", NV_FALSE); 5611 break; 5612 case NV_MINION_NVLINK_LINK_INTR_CODE_NOINIT: 5613 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link NOINIT interrupt", NV_FALSE); 5614 break; 5615 case NV_MINION_NVLINK_LINK_INTR_CODE_LOCAL_CONFIG_ERR: 5616 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link Local-Config-Error interrupt", NV_FALSE); 5617 break; 5618 case NV_MINION_NVLINK_LINK_INTR_CODE_NEGOTIATION_CONFIG_ERR: 5619 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link Negotiation Config Err Interrupt", NV_FALSE); 5620 break; 5621 case NV_MINION_NVLINK_LINK_INTR_CODE_BADINIT: 5622 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link BADINIT interrupt", NV_FALSE); 5623 break; 5624 case NV_MINION_NVLINK_LINK_INTR_CODE_PMFAIL: 5625 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link PMFAIL interrupt", NV_FALSE); 5626 break; 5627 default: 5628 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Interrupt code unknown", NV_FALSE); 5629 } 5630 5631 // Fatal error was hit so disable the interrupt 5632 regData = NVSWITCH_MINION_RD32_LS10(device, nvlipt_instance, _MINION, _MINION_INTR_STALL_EN); 5633 enabledLinks = DRF_VAL(_MINION, _MINION_INTR_STALL_EN, _LINK, regData); 5634 enabledLinks &= ~bit; 5635 regData = DRF_NUM(_MINION, _MINION_INTR_STALL_EN, _LINK, enabledLinks); 5636 NVSWITCH_MINION_LINK_WR32_LS10(device, link, _MINION, _MINION_INTR_STALL_EN, regData); 5637 5638 _nvswitch_dump_minion_ali_debug_registers_ls10(device, link); 5639 } 5640 5641 static void 5642 _nvswitch_emit_link_errors_minion_nonfatal_ls10 5643 ( 5644 nvswitch_device *device, 5645 NvU32 nvlipt_instance, 5646 NvU32 link 5647 ) 5648 { 5649 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5650 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5651 NvU32 regData; 5652 NvU32 localLinkIdx = NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link); 5653 NvU32 bit = BIT(localLinkIdx); 5654 5655 if (!chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.minionLinkIntr.bPending) 5656 { 5657 return; 5658 } 5659 5660 // read in the enaled minion interrupts on this minion 5661 regData = NVSWITCH_MINION_RD32_LS10(device, nvlipt_instance, _MINION, _MINION_INTR_STALL_EN); 5662 5663 // Grab the cached interrupt data 5664 regData = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.minionLinkIntr.regData; 5665 5666 // get all possible interrupting links associated with this minion 5667 report.raw_enable = link; 5668 report.raw_pending = report.raw_enable; 5669 report.mask = report.raw_enable; 5670 report.data[0] = regData; 5671 5672 switch(DRF_VAL(_MINION, _NVLINK_LINK_INTR, _CODE, regData)) 5673 { 5674 case NV_MINION_NVLINK_LINK_INTR_CODE_DLREQ: 5675 NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link DLREQ interrupt"); 5676 break; 5677 case NV_MINION_NVLINK_LINK_INTR_CODE_PMDISABLED: 5678 NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link PMDISABLED interrupt"); 5679 break; 5680 case NV_MINION_NVLINK_LINK_INTR_CODE_DLCMDFAULT: 5681 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Link DLCMDFAULT interrupt", NV_FALSE); 5682 break; 5683 case NV_MINION_NVLINK_LINK_INTR_CODE_TLREQ: 5684 NVSWITCH_REPORT_NONFATAL(_HW_MINION_NONFATAL, "Minion Link TLREQ interrupt"); 5685 break; 5686 } 5687 5688 _nvswitch_dump_minion_ali_debug_registers_ls10(device, link); 5689 } 5690 5691 static void 5692 _nvswitch_emit_link_errors_nvldl_nonfatal_link_ls10 5693 ( 5694 nvswitch_device *device, 5695 NvU32 link 5696 ) 5697 { 5698 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5699 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5700 NvU32 pending, bit, reg; 5701 5702 // Only enabled link errors are deffered 5703 pending = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.dl; 5704 report.raw_pending = pending; 5705 report.raw_enable = pending; 5706 report.mask = report.raw_enable; 5707 5708 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_SHORT_ERROR_RATE, 1); 5709 if (nvswitch_test_flags(pending, bit)) 5710 { 5711 // Disable further interrupts 5712 nvlink_link *nvlink = nvswitch_get_link(device, link); 5713 if (nvlink == NULL) 5714 { 5715 // If we get here, it is a bug. Disable interrupt and assert. 5716 reg = NVSWITCH_LINK_RD32_LS10(device, link, NVLDL, _NVLDL_TOP, _INTR_NONSTALL_EN); 5717 reg = FLD_SET_DRF_NUM(_NVLDL_TOP, _INTR_NONSTALL_EN, _RX_SHORT_ERROR_RATE, 0, reg); 5718 NVSWITCH_LINK_WR32_LS10(device, link, NVLDL, _NVLDL_TOP, _INTR_NONSTALL_EN, reg); 5719 NVSWITCH_ASSERT(nvlink != NULL); 5720 } 5721 else 5722 { 5723 nvlink->errorThreshold.bInterruptTrigerred = NV_TRUE; 5724 nvswitch_configure_error_rate_threshold_interrupt_ls10(nvlink, NV_FALSE); 5725 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_RX_SHORT_ERROR_RATE, "RX Short Error Rate"); 5726 } 5727 } 5728 5729 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_CRC_COUNTER, 1); 5730 if (nvswitch_test_flags(pending, bit)) 5731 { 5732 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_RX_CRC_COUNTER, "RX CRC Error Rate"); 5733 } 5734 } 5735 5736 static void 5737 _nvswitch_emit_link_errors_nvltlc_rx_lnk_nonfatal_1_ls10 5738 ( 5739 nvswitch_device *device, 5740 NvU32 nvlipt_instance, 5741 NvU32 link 5742 ) 5743 { 5744 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5745 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5746 NvU32 pending, bit; 5747 INFOROM_NVLINK_ERROR_EVENT error_event; 5748 NvU32 injected; 5749 5750 // Only enabled link errors are deffered 5751 pending = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1; 5752 injected = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1Injected; 5753 report.raw_pending = pending; 5754 report.raw_enable = pending; 5755 report.mask = report.raw_enable; 5756 5757 error_event.nvliptInstance = (NvU8) nvlipt_instance; 5758 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link); 5759 5760 5761 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _HEARTBEAT_TIMEOUT_ERR, 1); 5762 if (nvswitch_test_flags(pending, bit)) 5763 { 5764 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_RX_LNK_AN1_HEARTBEAT_TIMEOUT_ERR, "AN1 Heartbeat Timeout Error"); 5765 5766 if (FLD_TEST_DRF_NUM(_NVLTLC_RX_LNK, _ERR_REPORT_INJECT_1, _HEARTBEAT_TIMEOUT_ERR, 0x0, injected)) 5767 { 5768 error_event.error = INFOROM_NVLINK_TLC_RX_AN1_HEARTBEAT_TIMEOUT_NONFATAL; 5769 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 5770 } 5771 } 5772 } 5773 5774 static void 5775 _nvswitch_emit_link_errors_nvlipt_lnk_nonfatal_ls10 5776 ( 5777 nvswitch_device *device, 5778 NvU32 nvlipt_instance, 5779 NvU32 link 5780 ) 5781 { 5782 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5783 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 5784 NvU32 pending, bit; 5785 INFOROM_NVLINK_ERROR_EVENT error_event; 5786 5787 // Only enabled link errors are deffered 5788 pending = chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.liptLnk; 5789 report.raw_pending = pending; 5790 report.raw_enable = pending; 5791 report.mask = report.raw_enable; 5792 5793 error_event.nvliptInstance = (NvU8) nvlipt_instance; 5794 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link); 5795 5796 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _FAILEDMINIONREQUEST, 1); 5797 if (nvswitch_test_flags(pending, bit)) 5798 { 5799 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_FAILEDMINIONREQUEST, "_FAILEDMINIONREQUEST"); 5800 5801 { 5802 error_event.error = INFOROM_NVLINK_NVLIPT_FAILED_MINION_REQUEST_NONFATAL; 5803 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 5804 } 5805 } 5806 } 5807 5808 static void 5809 _nvswitch_emit_deferred_link_errors_ls10 5810 ( 5811 nvswitch_device *device, 5812 NvU32 nvlipt_instance, 5813 NvU32 link 5814 ) 5815 { 5816 _nvswitch_emit_link_errors_nvldl_fatal_link_ls10(device, nvlipt_instance, link); 5817 _nvswitch_emit_link_errors_nvldl_nonfatal_link_ls10(device, link); 5818 _nvswitch_emit_link_errors_nvltlc_rx_lnk_nonfatal_1_ls10(device, nvlipt_instance, link); 5819 _nvswitch_emit_link_errors_nvlipt_lnk_nonfatal_ls10(device, nvlipt_instance, link); 5820 _nvswitch_emit_link_errors_minion_fatal_ls10(device, nvlipt_instance, link); 5821 _nvswitch_emit_link_errors_minion_nonfatal_ls10(device, nvlipt_instance, link); 5822 5823 } 5824 5825 static void 5826 _nvswitch_clear_deferred_link_errors_ls10 5827 ( 5828 nvswitch_device *device, 5829 NvU32 link 5830 ) 5831 { 5832 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5833 NVLINK_LINK_ERROR_REPORTING_DATA *pLinkErrorsData; 5834 5835 pLinkErrorsData = &chip_device->deferredLinkErrors[link].data; 5836 5837 nvswitch_os_memset(pLinkErrorsData, 0, sizeof(NVLINK_LINK_ERROR_REPORTING_DATA)); 5838 } 5839 5840 static void 5841 _nvswitch_deferred_link_state_check_ls10 5842 ( 5843 nvswitch_device *device, 5844 void *fn_args 5845 ) 5846 { 5847 NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams = 5848 (NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS*)fn_args; 5849 NvU32 nvlipt_instance = pErrorReportParams->nvlipt_instance; 5850 NvU32 link = pErrorReportParams->link; 5851 ls10_device *chip_device; 5852 NvU64 lastLinkUpTime; 5853 NvU64 lastRetrainTime; 5854 NvU64 current_time = nvswitch_os_get_platform_time(); 5855 NvBool bRedeferLinkStateCheck; 5856 5857 chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5858 lastLinkUpTime = chip_device->deferredLinkErrors[link].state.lastLinkUpTime; 5859 lastRetrainTime = chip_device->deferredLinkErrors[link].state.lastRetrainTime; 5860 // Sanity Check 5861 NVSWITCH_ASSERT(nvswitch_is_link_valid(device, link)); 5862 5863 nvswitch_os_free(pErrorReportParams); 5864 pErrorReportParams = NULL; 5865 chip_device->deferredLinkErrors[link].state.bLinkStateCallBackEnabled = NV_FALSE; 5866 bRedeferLinkStateCheck = NV_FALSE; 5867 5868 // Ask CCI if link state check should be futher deferred 5869 if (cciIsLinkManaged(device, link) && !cciReportLinkErrors(device, link)) 5870 { 5871 bRedeferLinkStateCheck = NV_TRUE; 5872 } 5873 5874 // Link came up after last retrain 5875 if (lastLinkUpTime >= lastRetrainTime) 5876 { 5877 return; 5878 } 5879 5880 // 5881 // If the last time this link was up was before the last 5882 // reset_and_drain execution and not enough time has past since the last 5883 // retrain then schedule another callback. 5884 // 5885 if (lastLinkUpTime < lastRetrainTime) 5886 { 5887 if ((current_time - lastRetrainTime) < NVSWITCH_DEFERRED_LINK_STATE_CHECK_INTERVAL_NS) 5888 { 5889 bRedeferLinkStateCheck = NV_TRUE; 5890 } 5891 } 5892 5893 if (bRedeferLinkStateCheck) 5894 { 5895 nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link); 5896 return; 5897 } 5898 5899 // 5900 // Otherwise, the link hasn't retrained within the timeout so emit the 5901 // deferred errors. 5902 // 5903 _nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link); 5904 _nvswitch_clear_deferred_link_errors_ls10(device, link); 5905 } 5906 5907 void 5908 nvswitch_create_deferred_link_state_check_task_ls10 5909 ( 5910 nvswitch_device *device, 5911 NvU32 nvlipt_instance, 5912 NvU32 link 5913 ) 5914 { 5915 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5916 NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams; 5917 NvlStatus status; 5918 5919 if (chip_device->deferredLinkErrors[link].state.bLinkStateCallBackEnabled) 5920 { 5921 return; 5922 } 5923 5924 status = NVL_ERR_GENERIC; 5925 pErrorReportParams = nvswitch_os_malloc(sizeof(NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS)); 5926 if(pErrorReportParams != NULL) 5927 { 5928 pErrorReportParams->nvlipt_instance = nvlipt_instance; 5929 pErrorReportParams->link = link; 5930 5931 status = nvswitch_task_create_args(device, (void*)pErrorReportParams, 5932 &_nvswitch_deferred_link_state_check_ls10, 5933 NVSWITCH_DEFERRED_LINK_STATE_CHECK_INTERVAL_NS, 5934 NVSWITCH_TASK_TYPE_FLAGS_RUN_ONCE | 5935 NVSWITCH_TASK_TYPE_FLAGS_VOID_PTR_ARGS); 5936 } 5937 5938 if (status == NVL_SUCCESS) 5939 { 5940 chip_device->deferredLinkErrors[link].state.bLinkStateCallBackEnabled = NV_TRUE; 5941 } 5942 else 5943 { 5944 NVSWITCH_PRINT(device, ERROR, 5945 "%s: Failed to allocate memory. Cannot defer link state check.\n", 5946 __FUNCTION__); 5947 _nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link); 5948 _nvswitch_clear_deferred_link_errors_ls10(device, link); 5949 nvswitch_os_free(pErrorReportParams); 5950 } 5951 } 5952 5953 static void 5954 _nvswitch_deferred_link_errors_check_ls10 5955 ( 5956 nvswitch_device *device, 5957 void *fn_args 5958 ) 5959 { 5960 NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams = 5961 (NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS*)fn_args; 5962 NvU32 nvlipt_instance = pErrorReportParams->nvlipt_instance; 5963 NvU32 link = pErrorReportParams->link; 5964 ls10_device *chip_device; 5965 NvU32 pending; 5966 5967 nvswitch_os_free(pErrorReportParams); 5968 pErrorReportParams = NULL; 5969 5970 chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 5971 chip_device->deferredLinkErrors[link].state.bLinkErrorsCallBackEnabled = NV_FALSE; 5972 5973 pending = chip_device->deferredLinkErrors[link].data.fatalIntrMask.dl; 5974 5975 // A link fault was observed which means we also did the retrain and 5976 // scheduled a state check task. We can exit. 5977 if (FLD_TEST_DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1U, pending)) 5978 return; 5979 5980 if (FLD_TEST_DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_DOWN, 1U, pending)) 5981 return; 5982 5983 // 5984 // No link fault, emit the deferred errors. 5985 // It is assumed that this callback runs long before a link could have been 5986 // retrained and hit errors again. 5987 // 5988 _nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link); 5989 _nvswitch_clear_deferred_link_errors_ls10(device, link); 5990 } 5991 5992 static void 5993 _nvswitch_create_deferred_link_errors_task_ls10 5994 ( 5995 nvswitch_device *device, 5996 NvU32 nvlipt_instance, 5997 NvU32 link 5998 ) 5999 { 6000 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 6001 NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS *pErrorReportParams; 6002 NvlStatus status; 6003 6004 if (chip_device->deferredLinkErrors[link].state.bLinkErrorsCallBackEnabled) 6005 { 6006 return; 6007 } 6008 6009 status = NVL_ERR_GENERIC; 6010 pErrorReportParams = nvswitch_os_malloc(sizeof(NVSWITCH_DEFERRED_ERROR_REPORTING_ARGS)); 6011 if(pErrorReportParams != NULL) 6012 { 6013 pErrorReportParams->nvlipt_instance = nvlipt_instance; 6014 pErrorReportParams->link = link; 6015 6016 status = nvswitch_task_create_args(device, (void*)pErrorReportParams, 6017 &_nvswitch_deferred_link_errors_check_ls10, 6018 NVSWITCH_DEFERRED_FAULT_UP_CHECK_INTERVAL_NS, 6019 NVSWITCH_TASK_TYPE_FLAGS_RUN_ONCE | 6020 NVSWITCH_TASK_TYPE_FLAGS_VOID_PTR_ARGS); 6021 } 6022 6023 if (status == NVL_SUCCESS) 6024 { 6025 chip_device->deferredLinkErrors[link].state.bLinkErrorsCallBackEnabled = NV_TRUE; 6026 } 6027 else 6028 { 6029 NVSWITCH_PRINT(device, ERROR, 6030 "%s: Failed to create task. Cannot defer link error check.\n", 6031 __FUNCTION__); 6032 _nvswitch_emit_deferred_link_errors_ls10(device, nvlipt_instance, link); 6033 _nvswitch_clear_deferred_link_errors_ls10(device, link); 6034 nvswitch_os_free(pErrorReportParams); 6035 } 6036 } 6037 6038 static NvlStatus 6039 _nvswitch_service_nvldl_nonfatal_link_ls10 6040 ( 6041 nvswitch_device *device, 6042 NvU32 nvlipt_instance, 6043 NvU32 link 6044 ) 6045 { 6046 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 6047 NvU32 pending, bit, unhandled; 6048 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 6049 6050 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLDL, _NVLDL_TOP, _INTR); 6051 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLDL, _NVLDL_TOP, _INTR_NONSTALL_EN); 6052 report.mask = report.raw_enable; 6053 pending = report.raw_pending & report.mask; 6054 6055 if (pending == 0) 6056 { 6057 return -NVL_NOT_FOUND; 6058 } 6059 6060 unhandled = pending; 6061 6062 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_REPLAY, 1); 6063 if (nvswitch_test_flags(pending, bit)) 6064 { 6065 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_TX_REPLAY, "TX Replay Error"); 6066 nvswitch_clear_flags(&unhandled, bit); 6067 } 6068 6069 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_RECOVERY_SHORT, 1); 6070 if (nvswitch_test_flags(pending, bit)) 6071 { 6072 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_TX_RECOVERY_SHORT, "TX Recovery Short"); 6073 nvswitch_clear_flags(&unhandled, bit); 6074 } 6075 6076 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_SHORT_ERROR_RATE, 1); 6077 if (nvswitch_test_flags(pending, bit)) 6078 { 6079 chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.dl |= bit; 6080 _nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link); 6081 nvswitch_clear_flags(&unhandled, bit); 6082 } 6083 6084 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_LONG_ERROR_RATE, 1); 6085 if (nvswitch_test_flags(pending, bit)) 6086 { 6087 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_RX_LONG_ERROR_RATE, "RX Long Error Rate"); 6088 nvswitch_clear_flags(&unhandled, bit); 6089 } 6090 6091 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_ILA_TRIGGER, 1); 6092 if (nvswitch_test_flags(pending, bit)) 6093 { 6094 NVSWITCH_REPORT_NONFATAL(_HW_DLPL_RX_ILA_TRIGGER, "RX ILA Trigger"); 6095 nvswitch_clear_flags(&unhandled, bit); 6096 } 6097 6098 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_CRC_COUNTER, 1); 6099 if (nvswitch_test_flags(pending, bit)) 6100 { 6101 6102 chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.dl |= bit; 6103 _nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link); 6104 nvswitch_clear_flags(&unhandled, bit); 6105 6106 // 6107 // Mask CRC counter after first occurrance - otherwise, this interrupt 6108 // will continue to fire once the CRC counter has hit the threshold 6109 // See Bug 3341528 6110 // 6111 report.raw_enable = report.raw_enable & (~bit); 6112 NVSWITCH_LINK_WR32(device, link, NVLDL, _NVLDL_TOP, _INTR_NONSTALL_EN, 6113 report.raw_enable); 6114 } 6115 6116 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 6117 6118 NVSWITCH_LINK_WR32(device, link, NVLDL, _NVLDL_TOP, _INTR, pending); 6119 6120 if (unhandled != 0) 6121 { 6122 NVSWITCH_PRINT(device, WARN, 6123 "%s: Unhandled NVLDL nonfatal interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 6124 __FUNCTION__, link, pending, report.raw_enable); 6125 return -NVL_MORE_PROCESSING_REQUIRED; 6126 } 6127 6128 return NVL_SUCCESS; 6129 } 6130 6131 static NvlStatus 6132 _nvswitch_service_nvldl_nonfatal_ls10 6133 ( 6134 nvswitch_device *device, 6135 NvU32 nvlipt_instance, 6136 NvU64 intrLinkMask 6137 ) 6138 { 6139 NvU64 localLinkMask, enabledLinkMask, localIntrLinkMask; 6140 NvU32 i; 6141 nvlink_link *link; 6142 NvlStatus status; 6143 NvlStatus return_status = -NVL_NOT_FOUND; 6144 NvU32 clocksMask = NVSWITCH_PER_LINK_CLOCK_SET(RXCLK) | NVSWITCH_PER_LINK_CLOCK_SET(TXCLK); 6145 6146 // 6147 // The passed in interruptLinkMask should contain a link that is part of the 6148 // given nvlipt instance 6149 // 6150 localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(nvlipt_instance); 6151 enabledLinkMask = nvswitch_get_enabled_link_mask(device); 6152 localIntrLinkMask = localLinkMask & intrLinkMask & enabledLinkMask; 6153 6154 if (localIntrLinkMask == 0) 6155 { 6156 NVSWITCH_PRINT(device, ERROR, "%s: Bad link mask provided for link interrupt servicing!\n", __FUNCTION__); 6157 NVSWITCH_ASSERT(0); 6158 return -NVL_BAD_ARGS; 6159 } 6160 6161 FOR_EACH_INDEX_IN_MASK(64, i, localIntrLinkMask) 6162 { 6163 link = nvswitch_get_link(device, i); 6164 if (link == NULL) 6165 { 6166 // An interrupt on an invalid link should never occur 6167 NVSWITCH_ASSERT(link != NULL); 6168 continue; 6169 } 6170 6171 if (NVSWITCH_GET_LINK_ENG_INST(device, i, NVLIPT) != nvlipt_instance) 6172 { 6173 NVSWITCH_ASSERT(0); 6174 break; 6175 } 6176 6177 if (nvswitch_is_link_in_reset(device, link) || 6178 !nvswitch_are_link_clocks_on_ls10(device, link, clocksMask)) 6179 { 6180 continue; 6181 } 6182 6183 status = _nvswitch_service_nvldl_nonfatal_link_ls10(device, nvlipt_instance, i); 6184 if (status != NVL_SUCCESS) 6185 { 6186 return_status = status; 6187 } 6188 } 6189 FOR_EACH_INDEX_IN_MASK_END; 6190 6191 return return_status; 6192 } 6193 6194 static NvlStatus 6195 _nvswitch_service_nvltlc_rx_lnk_nonfatal_0_ls10 6196 ( 6197 nvswitch_device *device, 6198 NvU32 nvlipt_instance, 6199 NvU32 link 6200 ) 6201 { 6202 NvU32 pending, bit, unhandled; 6203 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 6204 INFOROM_NVLINK_ERROR_EVENT error_event; 6205 6206 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_0); 6207 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_NON_FATAL_REPORT_EN_0); 6208 report.mask = report.raw_enable; 6209 6210 error_event.nvliptInstance = (NvU8) nvlipt_instance; 6211 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link); 6212 6213 pending = report.raw_pending & report.mask; 6214 if (pending == 0) 6215 { 6216 return -NVL_NOT_FOUND; 6217 } 6218 6219 unhandled = pending; 6220 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_0); 6221 6222 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_0, _RXRSPSTATUS_PRIV_ERR, 1); 6223 if (nvswitch_test_flags(pending, bit)) 6224 { 6225 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_RX_LNK_RXRSPSTATUS_PRIV_ERR, "RX Rsp Status PRIV Error"); 6226 nvswitch_clear_flags(&unhandled, bit); 6227 { 6228 error_event.error = INFOROM_NVLINK_TLC_RX_RSP_STATUS_PRIV_ERR_NONFATAL; 6229 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 6230 } 6231 } 6232 6233 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 6234 6235 if (report.raw_first & report.mask) 6236 { 6237 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_0, 6238 report.raw_first & report.mask); 6239 } 6240 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_0, pending); 6241 6242 if (unhandled != 0) 6243 { 6244 NVSWITCH_PRINT(device, WARN, 6245 "%s: Unhandled NVLTLC_RX_LNK _0 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 6246 __FUNCTION__, link, pending, report.raw_enable); 6247 return -NVL_MORE_PROCESSING_REQUIRED; 6248 } 6249 6250 return NVL_SUCCESS; 6251 } 6252 6253 static NvlStatus 6254 _nvswitch_service_nvltlc_tx_lnk_nonfatal_0_ls10 6255 ( 6256 nvswitch_device *device, 6257 NvU32 nvlipt_instance, 6258 NvU32 link 6259 ) 6260 { 6261 NvU32 pending, bit, unhandled; 6262 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 6263 INFOROM_NVLINK_ERROR_EVENT error_event; 6264 6265 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_0); 6266 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_NON_FATAL_REPORT_EN_0); 6267 report.mask = report.raw_enable; 6268 pending = report.raw_pending & report.mask; 6269 6270 error_event.nvliptInstance = (NvU8) nvlipt_instance; 6271 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link); 6272 6273 if (pending == 0) 6274 { 6275 return -NVL_NOT_FOUND; 6276 } 6277 6278 unhandled = pending; 6279 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_0); 6280 6281 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _CREQ_RAM_DAT_ECC_DBE_ERR, 1); 6282 if (nvswitch_test_flags(pending, bit)) 6283 { 6284 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_CREQ_RAM_DAT_ECC_DBE_ERR, "CREQ RAM DAT ECC DBE Error"); 6285 nvswitch_clear_flags(&unhandled, bit); 6286 { 6287 // TODO 3014908 log these in the NVL object until we have ECC object support 6288 error_event.error = INFOROM_NVLINK_TLC_TX_CREQ_DAT_RAM_ECC_DBE_NONFATAL; 6289 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 6290 } 6291 } 6292 6293 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _CREQ_RAM_ECC_LIMIT_ERR, 1); 6294 if (nvswitch_test_flags(pending, bit)) 6295 { 6296 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_CREQ_RAM_ECC_LIMIT_ERR, "CREQ RAM DAT ECC Limit Error"); 6297 nvswitch_clear_flags(&unhandled, bit); 6298 } 6299 6300 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP_RAM_DAT_ECC_DBE_ERR, 1); 6301 if (nvswitch_test_flags(pending, bit)) 6302 { 6303 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_RSP_RAM_DAT_ECC_DBE_ERR, "Response RAM DAT ECC DBE Error"); 6304 nvswitch_clear_flags(&unhandled, bit); 6305 } 6306 6307 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP_RAM_ECC_LIMIT_ERR, 1); 6308 if (nvswitch_test_flags(pending, bit)) 6309 { 6310 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_RSP_RAM_ECC_LIMIT_ERR, "Response RAM ECC Limit Error"); 6311 nvswitch_clear_flags(&unhandled, bit); 6312 } 6313 6314 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _COM_RAM_DAT_ECC_DBE_ERR, 1); 6315 if (nvswitch_test_flags(pending, bit)) 6316 { 6317 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_COM_RAM_DAT_ECC_DBE_ERR, "COM RAM DAT ECC DBE Error"); 6318 nvswitch_clear_flags(&unhandled, bit); 6319 { 6320 // TODO 3014908 log these in the NVL object until we have ECC object support 6321 error_event.error = INFOROM_NVLINK_TLC_TX_COM_DAT_RAM_ECC_DBE_NONFATAL; 6322 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 6323 } 6324 } 6325 6326 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _COM_RAM_ECC_LIMIT_ERR, 1); 6327 if (nvswitch_test_flags(pending, bit)) 6328 { 6329 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_COM_RAM_ECC_LIMIT_ERR, "COM RAM ECC Limit Error"); 6330 nvswitch_clear_flags(&unhandled, bit); 6331 } 6332 6333 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_0, _RSP1_RAM_ECC_LIMIT_ERR, 1); 6334 if (nvswitch_test_flags(pending, bit)) 6335 { 6336 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_RSP1_RAM_ECC_LIMIT_ERR, "RSP1 RAM ECC Limit Error"); 6337 nvswitch_clear_flags(&unhandled, bit); 6338 } 6339 6340 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 6341 6342 // Disable interrupts that have occurred after fatal error. 6343 if (device->link[link].fatal_error_occurred) 6344 { 6345 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_NON_FATAL_REPORT_EN_0, 6346 report.raw_enable & ~pending); 6347 } 6348 6349 if (report.raw_first & report.mask) 6350 { 6351 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_0, 6352 report.raw_first & report.mask); 6353 } 6354 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_0, pending); 6355 6356 if (unhandled != 0) 6357 { 6358 NVSWITCH_PRINT(device, WARN, 6359 "%s: Unhandled NVLTLC_TX_LNK _0 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 6360 __FUNCTION__, link, pending, report.raw_enable); 6361 return -NVL_MORE_PROCESSING_REQUIRED; 6362 } 6363 6364 return NVL_SUCCESS; 6365 } 6366 6367 static NvlStatus 6368 _nvswitch_service_nvltlc_rx_lnk_nonfatal_1_ls10 6369 ( 6370 nvswitch_device *device, 6371 NvU32 nvlipt_instance, 6372 NvU32 link 6373 ) 6374 { 6375 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 6376 NvU32 pending, bit, unhandled; 6377 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 6378 NvU32 injected; 6379 6380 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_1); 6381 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_NON_FATAL_REPORT_EN_1); 6382 report.mask = report.raw_enable; 6383 pending = report.raw_pending & report.mask; 6384 6385 if (pending == 0) 6386 { 6387 return -NVL_NOT_FOUND; 6388 } 6389 6390 unhandled = pending; 6391 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_1); 6392 injected = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_REPORT_INJECT_1); 6393 6394 bit = DRF_NUM(_NVLTLC_RX_LNK, _ERR_STATUS_1, _HEARTBEAT_TIMEOUT_ERR, 1); 6395 if (nvswitch_test_flags(pending, bit)) 6396 { 6397 chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1 |= bit; 6398 chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.tlcRx1Injected |= injected; 6399 _nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link); 6400 6401 if (FLD_TEST_DRF_NUM(_NVLTLC_RX_LNK, _ERR_REPORT_INJECT_1, _HEARTBEAT_TIMEOUT_ERR, 0x0, injected)) 6402 { 6403 // 6404 // WAR Bug 200627368: Mask off HBTO to avoid a storm 6405 // During the start of reset_and_drain, all links on the GPU 6406 // will go into contain, causing HBTO on other switch links connected 6407 // to that GPU. For the switch side, these interrupts are not fatal, 6408 // but until we get to reset_and_drain for this link, HBTO will continue 6409 // to fire repeatedly. After reset_and_drain, HBTO will be re-enabled 6410 // by MINION after links are trained. 6411 // 6412 report.raw_enable = report.raw_enable & (~bit); 6413 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_NON_FATAL_REPORT_EN_1, 6414 report.raw_enable); 6415 } 6416 nvswitch_clear_flags(&unhandled, bit); 6417 } 6418 6419 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 6420 6421 // Disable interrupts that have occurred after fatal error. 6422 if (device->link[link].fatal_error_occurred) 6423 { 6424 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_NON_FATAL_REPORT_EN_1, 6425 report.raw_enable & (~pending)); 6426 } 6427 6428 if (report.raw_first & report.mask) 6429 { 6430 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_FIRST_1, 6431 report.raw_first & report.mask); 6432 } 6433 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_RX_LNK, _ERR_STATUS_1, pending); 6434 6435 if (unhandled != 0) 6436 { 6437 NVSWITCH_PRINT(device, WARN, 6438 "%s: Unhandled NVLTLC_RX_LNK _1 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 6439 __FUNCTION__, link, pending, report.raw_enable); 6440 return -NVL_MORE_PROCESSING_REQUIRED; 6441 } 6442 6443 return NVL_SUCCESS; 6444 } 6445 6446 static NvlStatus 6447 _nvswitch_service_nvltlc_tx_lnk_nonfatal_1_ls10 6448 ( 6449 nvswitch_device *device, 6450 NvU32 nvlipt_instance, 6451 NvU32 link 6452 ) 6453 { 6454 NvU32 pending, bit, unhandled; 6455 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 6456 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 }; 6457 6458 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_1); 6459 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_NON_FATAL_REPORT_EN_1); 6460 report.mask = report.raw_enable; 6461 pending = report.raw_pending & report.mask; 6462 6463 error_event.nvliptInstance = (NvU8) nvlipt_instance; 6464 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link); 6465 6466 if (pending == 0) 6467 { 6468 return -NVL_NOT_FOUND; 6469 } 6470 6471 unhandled = pending; 6472 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_1); 6473 6474 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC0, 1); 6475 if (nvswitch_test_flags(pending, bit)) 6476 { 6477 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC0, "AN1 Timeout VC0"); 6478 nvswitch_clear_flags(&unhandled, bit); 6479 { 6480 error_event.error = INFOROM_NVLINK_TLC_TX_AN1_TIMEOUT_VC0_NONFATAL; 6481 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 6482 } 6483 } 6484 6485 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC1, 1); 6486 if (nvswitch_test_flags(pending, bit)) 6487 { 6488 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC1, "AN1 Timeout VC1"); 6489 nvswitch_clear_flags(&unhandled, bit); 6490 { 6491 error_event.error = INFOROM_NVLINK_TLC_TX_AN1_TIMEOUT_VC1_NONFATAL; 6492 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 6493 } 6494 } 6495 6496 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC2, 1); 6497 if (nvswitch_test_flags(pending, bit)) 6498 { 6499 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC2, "AN1 Timeout VC2"); 6500 nvswitch_clear_flags(&unhandled, bit); 6501 { 6502 error_event.error = INFOROM_NVLINK_TLC_TX_AN1_TIMEOUT_VC2_NONFATAL; 6503 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 6504 } 6505 } 6506 6507 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC3, 1); 6508 if (nvswitch_test_flags(pending, bit)) 6509 { 6510 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC3, "AN1 Timeout VC3"); 6511 nvswitch_clear_flags(&unhandled, bit); 6512 { 6513 error_event.error = INFOROM_NVLINK_TLC_TX_AN1_TIMEOUT_VC3_NONFATAL; 6514 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 6515 } 6516 } 6517 6518 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC4, 1); 6519 if (nvswitch_test_flags(pending, bit)) 6520 { 6521 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC4, "AN1 Timeout VC4"); 6522 nvswitch_clear_flags(&unhandled, bit); 6523 { 6524 error_event.error = INFOROM_NVLINK_TLC_TX_AN1_TIMEOUT_VC4_NONFATAL; 6525 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 6526 } 6527 } 6528 6529 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC5, 1); 6530 if (nvswitch_test_flags(pending, bit)) 6531 { 6532 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC5, "AN1 Timeout VC5"); 6533 nvswitch_clear_flags(&unhandled, bit); 6534 { 6535 error_event.error = INFOROM_NVLINK_TLC_TX_AN1_TIMEOUT_VC5_NONFATAL; 6536 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 6537 } 6538 } 6539 6540 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC6, 1); 6541 if (nvswitch_test_flags(pending, bit)) 6542 { 6543 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC6, "AN1 Timeout VC6"); 6544 nvswitch_clear_flags(&unhandled, bit); 6545 { 6546 error_event.error = INFOROM_NVLINK_TLC_TX_AN1_TIMEOUT_VC6_NONFATAL; 6547 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 6548 } 6549 } 6550 6551 bit = DRF_NUM(_NVLTLC_TX_LNK, _ERR_STATUS_1, _TIMEOUT_VC7, 1); 6552 if (nvswitch_test_flags(pending, bit)) 6553 { 6554 NVSWITCH_REPORT_NONFATAL(_HW_NVLTLC_TX_LNK_AN1_TIMEOUT_VC7, "AN1 Timeout VC7"); 6555 nvswitch_clear_flags(&unhandled, bit); 6556 { 6557 error_event.error = INFOROM_NVLINK_TLC_TX_AN1_TIMEOUT_VC7_NONFATAL; 6558 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 6559 } 6560 } 6561 6562 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 6563 6564 // Disable interrupts that have occurred after fatal error. 6565 if (device->link[link].fatal_error_occurred) 6566 { 6567 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_NON_FATAL_REPORT_EN_1, 6568 report.raw_enable & ~pending); 6569 } 6570 6571 if (report.raw_first & report.mask) 6572 { 6573 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_FIRST_1, 6574 report.raw_first & report.mask); 6575 } 6576 NVSWITCH_LINK_WR32(device, link, NVLTLC, _NVLTLC_TX_LNK, _ERR_STATUS_1, pending); 6577 6578 if (unhandled != 0) 6579 { 6580 NVSWITCH_PRINT(device, WARN, 6581 "%s: Unhandled NVLTLC_TX_LNK _1 interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 6582 __FUNCTION__, link, pending, report.raw_enable); 6583 return -NVL_MORE_PROCESSING_REQUIRED; 6584 } 6585 6586 return NVL_SUCCESS; 6587 } 6588 6589 static NvlStatus 6590 _nvswitch_service_nvltlc_nonfatal_ls10 6591 ( 6592 nvswitch_device *device, 6593 NvU32 nvlipt_instance, 6594 NvU64 intrLinkMask 6595 ) 6596 { 6597 NvU64 localLinkMask, enabledLinkMask, localIntrLinkMask; 6598 NvU32 i; 6599 nvlink_link *link; 6600 NvlStatus status; 6601 NvlStatus return_status = NVL_SUCCESS; 6602 6603 localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(nvlipt_instance); 6604 enabledLinkMask = nvswitch_get_enabled_link_mask(device); 6605 localIntrLinkMask = localLinkMask & intrLinkMask & enabledLinkMask; 6606 6607 if (localIntrLinkMask == 0) 6608 { 6609 NVSWITCH_PRINT(device, ERROR, "%s: Bad link mask provided for link interrupt servicing!\n", __FUNCTION__); 6610 NVSWITCH_ASSERT(0); 6611 return -NVL_BAD_ARGS; 6612 } 6613 6614 FOR_EACH_INDEX_IN_MASK(64, i, localIntrLinkMask) 6615 { 6616 link = nvswitch_get_link(device, i); 6617 if (link == NULL) 6618 { 6619 // An interrupt on an invalid link should never occur 6620 NVSWITCH_ASSERT(link != NULL); 6621 continue; 6622 } 6623 6624 if (NVSWITCH_GET_LINK_ENG_INST(device, i, NVLIPT) != nvlipt_instance) 6625 { 6626 NVSWITCH_ASSERT(0); 6627 break; 6628 } 6629 6630 // 6631 // If link is in reset or NCISOC clock is off then 6632 // don't need to check the link for NVLTLC errors 6633 // as the IP's registers are off 6634 // 6635 if (nvswitch_is_link_in_reset(device, link) || 6636 !nvswitch_are_link_clocks_on_ls10(device, link, NVSWITCH_PER_LINK_CLOCK_SET(NCISOCCLK))) 6637 { 6638 continue; 6639 } 6640 6641 status = _nvswitch_service_nvltlc_rx_lnk_nonfatal_0_ls10(device, nvlipt_instance, i); 6642 if (status != NVL_SUCCESS) 6643 { 6644 return_status = status; 6645 } 6646 6647 status = _nvswitch_service_nvltlc_tx_lnk_nonfatal_0_ls10(device, nvlipt_instance, i); 6648 if (status != NVL_SUCCESS) 6649 { 6650 return_status = status; 6651 } 6652 6653 status = _nvswitch_service_nvltlc_rx_lnk_nonfatal_1_ls10(device, nvlipt_instance, i); 6654 if (status != NVL_SUCCESS) 6655 { 6656 return_status = status; 6657 } 6658 6659 status = _nvswitch_service_nvltlc_tx_lnk_nonfatal_1_ls10(device, nvlipt_instance, i); 6660 if (status != NVL_SUCCESS) 6661 { 6662 return_status = status; 6663 } 6664 } 6665 FOR_EACH_INDEX_IN_MASK_END; 6666 6667 return return_status; 6668 } 6669 6670 static NvlStatus 6671 _nvswitch_service_nvlipt_lnk_status_ls10 6672 ( 6673 nvswitch_device *device, 6674 NvU32 nvlipt_instance, 6675 NvU32 link_id 6676 ) 6677 { 6678 NvU32 pending, enabled, unhandled, bit; 6679 NvU64 mode; 6680 nvlink_link *link; 6681 ls10_device *chip_device; 6682 6683 link = nvswitch_get_link(device, link_id); 6684 chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 6685 6686 if (link == NULL) 6687 { 6688 return -NVL_BAD_ARGS; 6689 } 6690 6691 pending = NVSWITCH_LINK_RD32(device, link_id, NVLIPT_LNK, _NVLIPT_LNK, _INTR_STATUS); 6692 enabled = NVSWITCH_LINK_RD32(device, link_id, NVLIPT_LNK, _NVLIPT_LNK, _INTR_INT1_EN); 6693 pending &= enabled; 6694 unhandled = pending; 6695 6696 bit = DRF_NUM(_NVLIPT_LNK, _INTR_STATUS, _LINKSTATEREQUESTREADYSET, 1); 6697 if (nvswitch_test_flags(pending, bit)) 6698 { 6699 link = nvswitch_get_link(device, link_id); 6700 if (link == NULL) 6701 { 6702 // If we get here, it's a bug. Assert, then let callers detect unhandled IRQ. 6703 NVSWITCH_ASSERT(link != NULL); 6704 } 6705 6706 nvswitch_clear_flags(&unhandled, bit); 6707 if(nvswitch_corelib_get_dl_link_mode_ls10(link, &mode) != NVL_SUCCESS) 6708 { 6709 NVSWITCH_PRINT(device, ERROR, "%s: nvlipt_lnk_status: Failed to check link mode! LinkId %d\n", 6710 __FUNCTION__, link_id); 6711 } 6712 else if(mode == NVLINK_LINKSTATE_HS) 6713 { 6714 NVSWITCH_PRINT(device, INFO, "%s: nvlipt_lnk_status: Link is up!. LinkId %d\n", 6715 __FUNCTION__, link_id); 6716 6717 // 6718 // When a link comes up ensure that we finish off the post-training tasks: 6719 // -- enabling per-link DL interrupts 6720 // -- releasing buffer_ready on the link 6721 // 6722 nvswitch_corelib_training_complete_ls10(link); 6723 nvswitch_init_buffer_ready(device, link, NV_TRUE); 6724 link->bRxDetected = NV_TRUE; 6725 6726 // 6727 // Clear out any cached interrupts for the link and update the last link up timestamp 6728 // 6729 _nvswitch_clear_deferred_link_errors_ls10(device, link_id); 6730 chip_device->deferredLinkErrors[link_id].state.lastLinkUpTime = nvswitch_os_get_platform_time(); 6731 } 6732 else if (mode == NVLINK_LINKSTATE_FAULT) 6733 { 6734 // 6735 // If we are here then a previous state transition caused 6736 // the link to FAULT as there is no TL Link state requests 6737 // that explicitly transitions a link to fault. If that is the 6738 // case, set the DL interrupts so any errors can be handled 6739 // 6740 nvswitch_set_dlpl_interrupts_ls10(link); 6741 } 6742 } 6743 6744 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 6745 NVSWITCH_LINK_WR32(device, link_id, NVLIPT_LNK, _NVLIPT_LNK, _INTR_STATUS, pending); 6746 6747 if (unhandled != 0) 6748 { 6749 NVSWITCH_PRINT(device, WARN, 6750 "%s: Unhandled NVLIPT_LNK STATUS interrupts, pending: 0x%x enabled: 0x%x.\n", 6751 __FUNCTION__, pending, enabled); 6752 return -NVL_MORE_PROCESSING_REQUIRED; 6753 } 6754 6755 return NVL_SUCCESS; 6756 } 6757 6758 static NvlStatus 6759 _nvswitch_service_nvlipt_lnk_nonfatal_ls10 6760 ( 6761 nvswitch_device *device, 6762 NvU32 nvlipt_instance, 6763 NvU32 link 6764 ) 6765 { 6766 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 6767 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 6768 NvU32 pending, bit, unhandled; 6769 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 }; 6770 6771 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0); 6772 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_NON_FATAL_REPORT_EN_0); 6773 report.mask = report.raw_enable; 6774 6775 error_event.nvliptInstance = (NvU8) nvlipt_instance; 6776 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link); 6777 6778 pending = report.raw_pending & report.mask; 6779 if (pending == 0) 6780 { 6781 return -NVL_NOT_FOUND; 6782 } 6783 6784 unhandled = pending; 6785 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FIRST_0); 6786 6787 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _ILLEGALLINKSTATEREQUEST, 1); 6788 if (nvswitch_test_flags(pending, bit)) 6789 { 6790 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_ILLEGALLINKSTATEREQUEST, "_HW_NVLIPT_LNK_ILLEGALLINKSTATEREQUEST"); 6791 nvswitch_clear_flags(&unhandled, bit); 6792 { 6793 error_event.error = INFOROM_NVLINK_NVLIPT_ILLEGAL_LINK_STATE_REQUEST_NONFATAL; 6794 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 6795 } 6796 } 6797 6798 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _FAILEDMINIONREQUEST, 1); 6799 if (nvswitch_test_flags(pending, bit)) 6800 { 6801 // 6802 // based off of HW's assertion. FAILEDMINIONREQUEST always trails a DL fault. So no need to 6803 // do reset_and_drain here 6804 // 6805 chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.liptLnk |= bit; 6806 _nvswitch_create_deferred_link_errors_task_ls10(device, nvlipt_instance, link); 6807 nvswitch_clear_flags(&unhandled, bit); 6808 } 6809 6810 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _RESERVEDREQUESTVALUE, 1); 6811 if (nvswitch_test_flags(pending, bit)) 6812 { 6813 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_RESERVEDREQUESTVALUE, "_RESERVEDREQUESTVALUE"); 6814 nvswitch_clear_flags(&unhandled, bit); 6815 { 6816 error_event.error = INFOROM_NVLINK_NVLIPT_RESERVED_REQUEST_VALUE_NONFATAL; 6817 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 6818 } 6819 } 6820 6821 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _LINKSTATEWRITEWHILEBUSY, 1); 6822 if (nvswitch_test_flags(pending, bit)) 6823 { 6824 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_LINKSTATEWRITEWHILEBUSY, "_LINKSTATEWRITEWHILEBUSY"); 6825 nvswitch_clear_flags(&unhandled, bit); 6826 { 6827 error_event.error = INFOROM_NVLINK_NVLIPT_LINK_STATE_WRITE_WHILE_BUSY_NONFATAL; 6828 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 6829 } 6830 } 6831 6832 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _LINK_STATE_REQUEST_TIMEOUT, 1); 6833 if (nvswitch_test_flags(pending, bit)) 6834 { 6835 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_LINK_STATE_REQUEST_TIMEOUT, "_LINK_STATE_REQUEST_TIMEOUT"); 6836 nvswitch_clear_flags(&unhandled, bit); 6837 { 6838 error_event.error = INFOROM_NVLINK_NVLIPT_LINK_STATE_REQUEST_TIMEOUT_NONFATAL; 6839 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 6840 } 6841 } 6842 6843 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _WRITE_TO_LOCKED_SYSTEM_REG_ERR, 1); 6844 if (nvswitch_test_flags(pending, bit)) 6845 { 6846 NVSWITCH_REPORT_NONFATAL(_HW_NVLIPT_LNK_WRITE_TO_LOCKED_SYSTEM_REG_ERR, "_WRITE_TO_LOCKED_SYSTEM_REG_ERR"); 6847 nvswitch_clear_flags(&unhandled, bit); 6848 { 6849 error_event.error = INFOROM_NVLINK_NVLIPT_WRITE_TO_LOCKED_SYSTEM_REG_NONFATAL; 6850 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 6851 } 6852 } 6853 6854 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 6855 6856 if (report.raw_first & report.mask) 6857 { 6858 NVSWITCH_LINK_WR32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FIRST_0, 6859 report.raw_first & report.mask); 6860 } 6861 NVSWITCH_LINK_WR32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0, pending); 6862 6863 if (unhandled != 0) 6864 { 6865 NVSWITCH_PRINT(device, WARN, 6866 "%s: Unhandled NVLIPT_LNK NON_FATAL interrupts, pending: 0x%x enabled: 0x%x.\n", 6867 __FUNCTION__, pending, report.raw_enable); 6868 return -NVL_MORE_PROCESSING_REQUIRED; 6869 } 6870 6871 return NVL_SUCCESS; 6872 } 6873 6874 static NvlStatus 6875 _nvswitch_service_nvlipt_link_nonfatal_ls10 6876 ( 6877 nvswitch_device *device, 6878 NvU32 instance, 6879 NvU64 intrLinkMask 6880 ) 6881 { 6882 NvU32 i, intrLink; 6883 NvU64 localLinkMask, enabledLinkMask, localIntrLinkMask; 6884 NvU64 interruptingLinks = 0; 6885 NvU64 lnkStatusChangeLinks = 0; 6886 NvlStatus status = NVL_SUCCESS; 6887 NvlStatus retStatus = NVL_SUCCESS; 6888 6889 // 6890 // The passed in interruptLinkMask should contain a link that is part of the 6891 // given nvlipt instance 6892 // 6893 localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(instance); 6894 enabledLinkMask = nvswitch_get_enabled_link_mask(device); 6895 localIntrLinkMask = localLinkMask & intrLinkMask & enabledLinkMask; 6896 6897 if (localIntrLinkMask == 0) 6898 { 6899 NVSWITCH_PRINT(device, ERROR, "%s: Bad link mask provided for link interrupt servicing!\n", __FUNCTION__); 6900 NVSWITCH_ASSERT(0); 6901 return -NVL_BAD_ARGS; 6902 } 6903 6904 6905 FOR_EACH_INDEX_IN_MASK(64, i, localIntrLinkMask) 6906 { 6907 if (NVSWITCH_GET_LINK_ENG_INST(device, i, NVLIPT) != instance) 6908 { 6909 NVSWITCH_ASSERT(0); 6910 break; 6911 } 6912 6913 intrLink = NVSWITCH_LINK_RD32(device, i, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0); 6914 6915 if(intrLink) 6916 { 6917 interruptingLinks |= NVBIT64(i); 6918 } 6919 6920 intrLink = NVSWITCH_LINK_RD32(device, i, NVLIPT_LNK, _NVLIPT_LNK, _INTR_STATUS); 6921 6922 if(intrLink) 6923 { 6924 lnkStatusChangeLinks |= NVBIT64(i); 6925 } 6926 } 6927 FOR_EACH_INDEX_IN_MASK_END; 6928 6929 6930 FOR_EACH_INDEX_IN_MASK(64, i, lnkStatusChangeLinks) 6931 { 6932 6933 if(_nvswitch_service_nvlipt_lnk_status_ls10(device, instance, i) != NVL_SUCCESS) 6934 { 6935 NVSWITCH_PRINT(device, WARN, "%s: Could not process nvlipt link status interrupt. Continuing. LinkId %d\n", 6936 __FUNCTION__, i); 6937 } 6938 } 6939 FOR_EACH_INDEX_IN_MASK_END; 6940 6941 FOR_EACH_INDEX_IN_MASK(64, i, interruptingLinks) 6942 { 6943 6944 status = _nvswitch_service_nvlipt_lnk_nonfatal_ls10(device, instance, i); 6945 if (status != NVL_SUCCESS && status != -NVL_NOT_FOUND) 6946 { 6947 retStatus = -NVL_MORE_PROCESSING_REQUIRED; 6948 } 6949 } 6950 FOR_EACH_INDEX_IN_MASK_END; 6951 6952 return retStatus; 6953 } 6954 6955 6956 NvlStatus 6957 _nvswitch_service_minion_fatal_ls10 6958 ( 6959 nvswitch_device *device, 6960 NvU32 instance 6961 ) 6962 { 6963 NvU32 pending, bit, unhandled, mask; 6964 6965 pending = NVSWITCH_MINION_RD32_LS10(device, instance, _MINION, _MINION_INTR); 6966 mask = NVSWITCH_MINION_RD32_LS10(device, instance, _MINION, _MINION_INTR_STALL_EN); 6967 6968 // Don't consider MINION Link interrupts in this handler 6969 mask &= ~(DRF_NUM(_MINION, _MINION_INTR_STALL_EN, _LINK, NV_MINION_MINION_INTR_STALL_EN_LINK_ENABLE_ALL)); 6970 6971 pending &= mask; 6972 6973 if (pending == 0) 6974 { 6975 return -NVL_NOT_FOUND; 6976 } 6977 6978 unhandled = pending; 6979 6980 bit = DRF_NUM(_MINION, _MINION_INTR, _FALCON_STALL, 0x1); 6981 if (nvswitch_test_flags(pending, bit)) 6982 { 6983 if (nvswitch_minion_service_falcon_interrupts_ls10(device, instance) == NVL_SUCCESS) 6984 { 6985 nvswitch_clear_flags(&unhandled, bit); 6986 } 6987 } 6988 6989 bit = DRF_NUM(_MINION, _MINION_INTR, _NONFATAL, 0x1); 6990 if (nvswitch_test_flags(pending, bit)) 6991 { 6992 NVSWITCH_PRINT(device, ERROR, "%s: servicing minion nonfatal interrupt\n", 6993 __FUNCTION__); 6994 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _MINION_INTR, bit); 6995 nvswitch_clear_flags(&unhandled, bit); 6996 } 6997 6998 bit = DRF_NUM(_MINION, _MINION_INTR, _FATAL, 0x1); 6999 if (nvswitch_test_flags(pending, bit)) 7000 { 7001 NVSWITCH_PRINT(device, ERROR, "%s: servicing minion fatal interrupt\n", 7002 __FUNCTION__); 7003 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _MINION_INTR, bit); 7004 nvswitch_clear_flags(&unhandled, bit); 7005 } 7006 7007 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 7008 7009 if (unhandled != 0) 7010 { 7011 return -NVL_MORE_PROCESSING_REQUIRED; 7012 } 7013 7014 return NVL_SUCCESS; 7015 } 7016 7017 static NvlStatus 7018 _nvswitch_service_nvlw_nonfatal_ls10 7019 ( 7020 nvswitch_device *device, 7021 NvU32 instance 7022 ) 7023 { 7024 NvlStatus status[3]; 7025 NvU32 reg; 7026 NvU64 intrLinkMask = 0; 7027 7028 reg = NVSWITCH_ENG_RD32_LS10(device, NVLW, instance, _NVLW, _TOP_INTR_1_STATUS); 7029 intrLinkMask = DRF_VAL(_NVLW, _TOP_INTR_1_STATUS, _LINK, reg); 7030 7031 // 7032 // Shift the mask of interrupting links from the local to the 7033 // NVLW instance to a global mask 7034 // 7035 intrLinkMask = intrLinkMask << (NVSWITCH_LINKS_PER_NVLW_LS10*instance); 7036 7037 // If there is no pending link interrupts then there is nothing to service 7038 if (intrLinkMask == 0) 7039 { 7040 return NVL_SUCCESS; 7041 } 7042 7043 status[0] = _nvswitch_service_nvlipt_link_nonfatal_ls10(device, instance, intrLinkMask); 7044 status[1] = _nvswitch_service_nvldl_nonfatal_ls10(device, instance, intrLinkMask); 7045 status[2] = _nvswitch_service_nvltlc_nonfatal_ls10(device, instance, intrLinkMask); 7046 7047 if ((status[0] != NVL_SUCCESS) && (status[0] != -NVL_NOT_FOUND) && 7048 (status[1] != NVL_SUCCESS) && (status[1] != -NVL_NOT_FOUND) && 7049 (status[2] != NVL_SUCCESS) && (status[2] != -NVL_NOT_FOUND)) 7050 { 7051 return -NVL_MORE_PROCESSING_REQUIRED; 7052 } 7053 7054 return NVL_SUCCESS; 7055 } 7056 7057 static NvlStatus 7058 _nvswitch_service_soe_fatal_ls10 7059 ( 7060 nvswitch_device *device 7061 ) 7062 { 7063 // We only support 1 SOE as of LS10. 7064 if (soeService_HAL(device, (PSOE)device->pSoe) != NVL_SUCCESS) 7065 { 7066 return -NVL_MORE_PROCESSING_REQUIRED; 7067 } 7068 7069 return NVL_SUCCESS; 7070 } 7071 7072 static NvlStatus 7073 _nvswitch_service_nvlipt_lnk_fatal_ls10 7074 ( 7075 nvswitch_device *device, 7076 NvU32 nvlipt_instance, 7077 NvU32 link 7078 ) 7079 { 7080 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 7081 NvU32 pending, bit, unhandled; 7082 INFOROM_NVLINK_ERROR_EVENT error_event = { 0 }; 7083 7084 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0); 7085 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FATAL_REPORT_EN_0); 7086 report.mask = report.raw_enable; 7087 7088 pending = report.raw_pending & report.mask; 7089 if (pending == 0) 7090 { 7091 return -NVL_NOT_FOUND; 7092 } 7093 7094 error_event.nvliptInstance = (NvU8) nvlipt_instance; 7095 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link); 7096 7097 unhandled = pending; 7098 report.raw_first = NVSWITCH_LINK_RD32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FIRST_0); 7099 7100 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _SLEEPWHILEACTIVELINK, 1); 7101 if (nvswitch_test_flags(pending, bit)) 7102 { 7103 NVSWITCH_REPORT_FATAL(_HW_NVLIPT_LNK_SLEEPWHILEACTIVELINK, "No non-empty link is detected", NV_FALSE); 7104 nvswitch_clear_flags(&unhandled, bit); 7105 { 7106 error_event.error = INFOROM_NVLINK_NVLIPT_SLEEP_WHILE_ACTIVE_LINK_FATAL; 7107 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 7108 } 7109 } 7110 7111 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _RSTSEQ_PHYCTL_TIMEOUT, 1); 7112 if (nvswitch_test_flags(pending, bit)) 7113 { 7114 NVSWITCH_REPORT_FATAL(_HW_NVLIPT_LNK_RSTSEQ_PHYCTL_TIMEOUT, "Reset sequencer timed out waiting for a handshake from PHYCTL", NV_FALSE); 7115 nvswitch_clear_flags(&unhandled, bit); 7116 { 7117 error_event.error = INFOROM_NVLINK_NVLIPT_RSTSEQ_PHYCTL_TIMEOUT_FATAL; 7118 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 7119 } 7120 } 7121 7122 bit = DRF_NUM(_NVLIPT_LNK, _ERR_STATUS_0, _RSTSEQ_CLKCTL_TIMEOUT, 1); 7123 if (nvswitch_test_flags(pending, bit)) 7124 { 7125 NVSWITCH_REPORT_FATAL(_HW_NVLIPT_LNK_RSTSEQ_CLKCTL_TIMEOUT, "Reset sequencer timed out waiting for a handshake from CLKCTL", NV_FALSE); 7126 nvswitch_clear_flags(&unhandled, bit); 7127 { 7128 error_event.error = INFOROM_NVLINK_NVLIPT_RSTSEQ_CLKCTL_TIMEOUT_FATAL; 7129 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 7130 } 7131 } 7132 7133 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 7134 7135 // Disable interrupts that have occurred after fatal error. 7136 if (device->link[link].fatal_error_occurred) 7137 { 7138 NVSWITCH_LINK_WR32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FATAL_REPORT_EN_0, 7139 report.raw_enable & ~pending); 7140 } 7141 7142 // clear interrupts 7143 if (report.raw_first & report.mask) 7144 { 7145 NVSWITCH_LINK_WR32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_FIRST_0, 7146 report.raw_first & report.mask); 7147 } 7148 NVSWITCH_LINK_WR32(device, link, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0, pending); 7149 7150 if (unhandled != 0) 7151 { 7152 NVSWITCH_PRINT(device, WARN, 7153 "%s: Unhandled NVLIPT_LNK FATAL interrupts, pending: 0x%x enabled: 0x%x.\n", 7154 __FUNCTION__, pending, report.raw_enable); 7155 return -NVL_MORE_PROCESSING_REQUIRED; 7156 } 7157 7158 return NVL_SUCCESS; 7159 } 7160 7161 static NvlStatus 7162 _nvswitch_service_nvlipt_link_fatal_ls10 7163 ( 7164 nvswitch_device *device, 7165 NvU32 instance, 7166 NvU64 intrLinkMask 7167 ) 7168 { 7169 NvU32 i, intrLink; 7170 NvU64 localLinkMask, enabledLinkMask, localIntrLinkMask; 7171 NvlStatus status = NVL_SUCCESS; 7172 7173 // 7174 // The passed in interruptLinkMask should contain a link that is part of the 7175 // given nvlipt instance 7176 // 7177 localLinkMask = NVSWITCH_NVLIPT_GET_LOCAL_LINK_MASK64_LS10(instance); 7178 enabledLinkMask = nvswitch_get_enabled_link_mask(device); 7179 localIntrLinkMask = localLinkMask & intrLinkMask & enabledLinkMask; 7180 7181 if (localIntrLinkMask == 0) 7182 { 7183 NVSWITCH_PRINT(device, ERROR, "%s: Bad link mask provided for link interrupt servicing!\n", __FUNCTION__); 7184 NVSWITCH_ASSERT(0); 7185 return -NVL_BAD_ARGS; 7186 } 7187 7188 // read in error status of current link 7189 FOR_EACH_INDEX_IN_MASK(64, i, localIntrLinkMask) 7190 { 7191 intrLink = NVSWITCH_LINK_RD32(device, i, NVLIPT_LNK, _NVLIPT_LNK, _ERR_STATUS_0); 7192 if (intrLink != 0) 7193 { 7194 if( _nvswitch_service_nvlipt_lnk_fatal_ls10(device, instance, i) != NVL_SUCCESS) 7195 { 7196 status = -NVL_MORE_PROCESSING_REQUIRED; 7197 } 7198 } 7199 } 7200 FOR_EACH_INDEX_IN_MASK_END; 7201 7202 return status; 7203 } 7204 7205 static NvlStatus 7206 _nvswitch_service_nvlw_fatal_ls10 7207 ( 7208 nvswitch_device *device, 7209 NvU32 instance 7210 ) 7211 { 7212 NvlStatus status[6]; 7213 NvU64 intrLinkMask = 0; 7214 NvU32 reg; 7215 7216 reg = NVSWITCH_ENG_RD32_LS10(device, NVLW, instance, _NVLW, _TOP_INTR_0_STATUS); 7217 intrLinkMask = DRF_VAL(_NVLW, _TOP_INTR_0_STATUS, _LINK, reg); 7218 7219 // 7220 // Shift the mask of interrupting links from the local to the 7221 // NVLW instance to a global mask 7222 // 7223 intrLinkMask = intrLinkMask << (NVSWITCH_LINKS_PER_NVLW_LS10*instance); 7224 7225 status[0] = device->hal.nvswitch_service_minion_link(device, instance); 7226 status[1] = _nvswitch_service_minion_fatal_ls10(device, instance); 7227 status[2] = _nvswitch_service_nvlipt_common_fatal_ls10(device, instance); 7228 7229 // 7230 // If there is a pending link interrupt on this nvlw instance then service 7231 // those interrupts in the handlers below. Otherwise, mark the status's 7232 // as success as there is nothing to service 7233 // 7234 if (intrLinkMask != 0) 7235 { 7236 status[3] = _nvswitch_service_nvldl_fatal_ls10(device, instance, intrLinkMask); 7237 status[4] = _nvswitch_service_nvltlc_fatal_ls10(device, instance, intrLinkMask); 7238 status[5] = _nvswitch_service_nvlipt_link_fatal_ls10(device, instance, intrLinkMask); 7239 } 7240 else 7241 { 7242 status[3] = NVL_SUCCESS; 7243 status[4] = NVL_SUCCESS; 7244 status[5] = NVL_SUCCESS; 7245 } 7246 7247 if (status[0] != NVL_SUCCESS && status[0] != -NVL_NOT_FOUND && 7248 status[1] != NVL_SUCCESS && status[1] != -NVL_NOT_FOUND && 7249 status[2] != NVL_SUCCESS && status[2] != -NVL_NOT_FOUND && 7250 status[3] != NVL_SUCCESS && status[3] != -NVL_NOT_FOUND && 7251 status[4] != NVL_SUCCESS && status[4] != -NVL_NOT_FOUND && 7252 status[5] != NVL_SUCCESS && status[5] != -NVL_NOT_FOUND) 7253 { 7254 return -NVL_MORE_PROCESSING_REQUIRED; 7255 } 7256 7257 return NVL_SUCCESS; 7258 } 7259 7260 /* 7261 * @Brief : Enable top level HW interrupts. 7262 * 7263 * @Description : 7264 * 7265 * @param[in] device operate on this device 7266 */ 7267 void 7268 nvswitch_lib_enable_interrupts_ls10 7269 ( 7270 nvswitch_device *device 7271 ) 7272 { 7273 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NPG_FATAL_IDX), 0xFFFF); 7274 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NPG_NON_FATAL_IDX), 0xFFFF); 7275 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NPG_CORRECTABLE_IDX), 0); 7276 7277 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NVLW_FATAL_IDX), 0xFFFF); 7278 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_IDX), 0xFFFF); 7279 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NVLW_CORRECTABLE_IDX), 0); 7280 7281 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_NXBAR_FATAL_IDX), 0x7); 7282 7283 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_UNITS_IDX), 0xFFFFFFFF); 7284 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_SET(NV_CTRL_CPU_INTR_UNITS_IDX), 7285 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PMGR_HOST, 1) | 7286 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER, 1) | 7287 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER_ALARM, 1) | 7288 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _SEC0_INTR0_0, 1) | 7289 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XTL_CPU, 1) | 7290 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XAL_EP, 1) | 7291 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PRIV_RING, 1)); 7292 7293 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_TOP_EN_SET(0), 0xFFFFFFFF); 7294 } 7295 7296 /* 7297 * @Brief : Disable top level HW interrupts. 7298 * 7299 * @Description : 7300 * 7301 * @param[in] device operate on this device 7302 */ 7303 void 7304 nvswitch_lib_disable_interrupts_ls10 7305 ( 7306 nvswitch_device *device 7307 ) 7308 { 7309 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NPG_FATAL_IDX), 0xFFFF); 7310 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NPG_NON_FATAL_IDX), 0xFFFF); 7311 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NPG_CORRECTABLE_IDX), 0); 7312 7313 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NVLW_FATAL_IDX), 0xFFFF); 7314 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_IDX), 0xFFFF); 7315 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NVLW_CORRECTABLE_IDX), 0); 7316 7317 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_NXBAR_FATAL_IDX), 0x7); 7318 7319 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF_EN_CLEAR(NV_CTRL_CPU_INTR_UNITS_IDX), 7320 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PMGR_HOST, 1) | 7321 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER, 1) | 7322 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER_ALARM, 1) | 7323 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _SEC0_INTR0_0, 1) | 7324 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XTL_CPU, 1) | 7325 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XAL_EP, 1) | 7326 DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PRIV_RING, 1)); 7327 7328 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_TOP_EN_CLEAR(0), 0xFFFFFFFF); 7329 } 7330 7331 // 7332 // Check if there are interrupts pending. 7333 // 7334 // On silicon/emulation we only use MSIs which are not shared, so this 7335 // function does not need to be called. 7336 // 7337 NvlStatus 7338 nvswitch_lib_check_interrupts_ls10 7339 ( 7340 nvswitch_device *device 7341 ) 7342 { 7343 NvlStatus retval = NVL_SUCCESS; 7344 NvU32 val; 7345 7346 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_TOP(0)); 7347 if (DRF_NUM(_CTRL, _CPU_INTR_TOP, _VALUE, val) != 0) 7348 { 7349 retval = -NVL_MORE_PROCESSING_REQUIRED; 7350 } 7351 7352 return retval; 7353 } 7354 7355 static void 7356 _nvswitch_retrigger_engine_intr_ls10 7357 ( 7358 nvswitch_device *device 7359 ) 7360 { 7361 7362 // re-trigger engine to gin interrupts for CPR and NPG on the FATAL and NONFATAL trees 7363 NVSWITCH_BCAST_WR32_LS10(device, CPR, _CPR_SYS, _INTR_RETRIGGER(0), 1); 7364 NVSWITCH_BCAST_WR32_LS10(device, CPR, _CPR_SYS, _INTR_RETRIGGER(1), 1); 7365 7366 NVSWITCH_BCAST_WR32_LS10(device, NPG, _NPG, _INTR_RETRIGGER(0), 1); 7367 NVSWITCH_BCAST_WR32_LS10(device, NPG, _NPG, _INTR_RETRIGGER(1), 1); 7368 } 7369 7370 void 7371 nvswitch_service_minion_all_links_ls10 7372 ( 7373 nvswitch_device *device 7374 ) 7375 { 7376 NvU32 val, i; 7377 7378 // Check NVLW 7379 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_FATAL); 7380 val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_FATAL, _MASK, val); 7381 if (val != 0) 7382 { 7383 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, 7384 _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NVLW_FATAL_IDX), val); 7385 7386 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NVLW_FATAL_MASK); i++) 7387 { 7388 if (val & NVBIT(i)) 7389 (void)_nvswitch_service_nvlw_fatal_ls10(device, i); 7390 } 7391 } 7392 } 7393 7394 // 7395 // Service interrupt and re-enable interrupts. Interrupts should disabled when 7396 // this is called. 7397 // 7398 NvlStatus 7399 nvswitch_lib_service_interrupts_ls10 7400 ( 7401 nvswitch_device *device 7402 ) 7403 { 7404 NvlStatus status = NVL_SUCCESS; 7405 NvlStatus return_status = NVL_SUCCESS; 7406 NvU32 val; 7407 NvU32 i; 7408 7409 // 7410 // Interrupt handler steps: 7411 // 1. Read Leaf interrupt 7412 // 2. Clear leaf interrupt 7413 // 3. Run leaf specific interrupt handler 7414 // 7415 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_NON_FATAL); 7416 val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_NON_FATAL, _MASK, val); 7417 if (val != 0) 7418 { 7419 NVSWITCH_PRINT(device, INFO, "%s: NVLW NON_FATAL interrupts pending = 0x%x\n", 7420 __FUNCTION__, val); 7421 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_IDX), val); 7422 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NVLW_NON_FATAL_MASK); i++) 7423 { 7424 if (val & NVBIT(i)) 7425 { 7426 status = _nvswitch_service_nvlw_nonfatal_ls10(device, i); 7427 if (status != NVL_SUCCESS) 7428 { 7429 NVSWITCH_PRINT(device, INFO, "%s: NVLW[%d] NON_FATAL interrupt handling status = %d\n", 7430 __FUNCTION__, i, status); 7431 return_status = status; 7432 } 7433 } 7434 } 7435 } 7436 7437 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_FATAL); 7438 val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_FATAL, _MASK, val); 7439 if (val != 0) 7440 { 7441 NVSWITCH_PRINT(device, INFO, "%s: NVLW FATAL interrupts pending = 0x%x\n", 7442 __FUNCTION__, val); 7443 7444 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NVLW_FATAL_IDX), val); 7445 7446 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NVLW_FATAL_MASK); i++) 7447 { 7448 if (val & NVBIT(i)) 7449 { 7450 status = _nvswitch_service_nvlw_fatal_ls10(device, i); 7451 if (status != NVL_SUCCESS) 7452 { 7453 NVSWITCH_PRINT(device, INFO, "%s: NVLW[%d] FATAL interrupt handling status = %d\n", 7454 __FUNCTION__, i, status); 7455 return_status = status; 7456 } 7457 } 7458 } 7459 } 7460 7461 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NVLW_CORRECTABLE); 7462 val = DRF_NUM(_CTRL, _CPU_INTR_NVLW_CORRECTABLE, _MASK, val); 7463 if (val != 0) 7464 { 7465 NVSWITCH_PRINT(device, ERROR, "%s: NVLW CORRECTABLE interrupts pending = 0x%x\n", 7466 __FUNCTION__, val); 7467 return_status = -NVL_MORE_PROCESSING_REQUIRED; 7468 } 7469 7470 // Check NPG 7471 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NPG_FATAL); 7472 val = DRF_NUM(_CTRL, _CPU_INTR_NPG_FATAL, _MASK, val); 7473 if (val != 0) 7474 { 7475 NVSWITCH_PRINT(device, INFO, "%s: NPG FATAL interrupts pending = 0x%x\n", 7476 __FUNCTION__, val); 7477 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NPG_FATAL_IDX), val); 7478 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NPG_FATAL_MASK); i++) 7479 { 7480 if (val & NVBIT(i)) 7481 { 7482 status = _nvswitch_service_npg_fatal_ls10(device, i); 7483 if (status != NVL_SUCCESS) 7484 { 7485 NVSWITCH_PRINT(device, INFO, "%s: NPG[%d] FATAL interrupt handling status = %d\n", 7486 __FUNCTION__, i, status); 7487 return_status = status; 7488 } 7489 } 7490 } 7491 } 7492 7493 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NPG_NON_FATAL); 7494 val = DRF_NUM(_CTRL, _CPU_INTR_NPG_NON_FATAL, _MASK, val); 7495 if (val != 0) 7496 { 7497 NVSWITCH_PRINT(device, INFO, "%s: NPG NON_FATAL interrupts pending = 0x%x\n", 7498 __FUNCTION__, val); 7499 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NPG_NON_FATAL_IDX), val); 7500 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NPG_NON_FATAL_MASK); i++) 7501 { 7502 if (val & NVBIT(i)) 7503 { 7504 status = _nvswitch_service_npg_nonfatal_ls10(device, i); 7505 if (status != NVL_SUCCESS) 7506 { 7507 NVSWITCH_PRINT(device, INFO, "%s: NPG[%d] NON_FATAL interrupt handling status = %d\n", 7508 __FUNCTION__, i, status); 7509 return_status = status; 7510 } 7511 } 7512 } 7513 } 7514 7515 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NPG_CORRECTABLE); 7516 val = DRF_NUM(_CTRL, _CPU_INTR_NPG_CORRECTABLE, _MASK, val); 7517 if (val != 0) 7518 { 7519 NVSWITCH_PRINT(device, ERROR, "%s: NPG CORRECTABLE interrupts pending = 0x%x\n", 7520 __FUNCTION__, val); 7521 return_status = -NVL_MORE_PROCESSING_REQUIRED; 7522 } 7523 7524 // Check NXBAR 7525 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_NXBAR_FATAL); 7526 val = DRF_NUM(_CTRL, _CPU_INTR_NXBAR_FATAL, _MASK, val); 7527 if (val != 0) 7528 { 7529 NVSWITCH_PRINT(device, INFO, "%s: NXBAR FATAL interrupts pending = 0x%x\n", 7530 __FUNCTION__, val); 7531 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_NXBAR_FATAL_IDX), val); 7532 for (i = 0; i < DRF_SIZE(NV_CTRL_CPU_INTR_NXBAR_FATAL_MASK); i++) 7533 { 7534 if (val & NVBIT(i)) 7535 { 7536 status = _nvswitch_service_nxbar_fatal_ls10(device, i); 7537 if (status != NVL_SUCCESS) 7538 { 7539 NVSWITCH_PRINT(device, INFO, "%s: NXBAR[%d] FATAL interrupt handling status = %d\n", 7540 __FUNCTION__, i, status); 7541 return_status = status; 7542 } 7543 } 7544 } 7545 } 7546 7547 // Check UNITS 7548 val = NVSWITCH_ENG_RD32(device, GIN, , 0, _CTRL, _CPU_INTR_UNITS); 7549 if (val != 0) 7550 { 7551 NVSWITCH_PRINT(device, MMIO, "%s: UNIT interrupts pending = 0x%x\n", 7552 __FUNCTION__, val); 7553 7554 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_LEAF(NV_CTRL_CPU_INTR_UNITS_IDX), val); 7555 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PMGR_HOST, 1, val)) 7556 { 7557 NVSWITCH_PRINT(device, ERROR, "%s: _PMGR_HOST interrupt pending\n", 7558 __FUNCTION__); 7559 return_status = -NVL_MORE_PROCESSING_REQUIRED; 7560 } 7561 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER, 1, val)) 7562 { 7563 NVSWITCH_PRINT(device, ERROR, "%s: _PTIMER interrupt pending\n", 7564 __FUNCTION__); 7565 return_status = -NVL_MORE_PROCESSING_REQUIRED; 7566 } 7567 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PTIMER_ALARM, 1, val)) 7568 { 7569 NVSWITCH_PRINT(device, ERROR, "%s: _PTIMER_ALARM interrupt pending\n", 7570 __FUNCTION__); 7571 return_status = -NVL_MORE_PROCESSING_REQUIRED; 7572 } 7573 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XTL_CPU, 1, val)) 7574 { 7575 NVSWITCH_PRINT(device, ERROR, "%s: _XTL_CPU interrupt pending\n", 7576 __FUNCTION__); 7577 return_status = -NVL_MORE_PROCESSING_REQUIRED; 7578 } 7579 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _XAL_EP, 1, val)) 7580 { 7581 NVSWITCH_PRINT(device, ERROR, "%s: _XAL_EP interrupt pending\n", 7582 __FUNCTION__); 7583 return_status = -NVL_MORE_PROCESSING_REQUIRED; 7584 } 7585 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _PRIV_RING, 1, val)) 7586 { 7587 status = _nvswitch_service_priv_ring_ls10(device); 7588 if (status != NVL_SUCCESS) 7589 { 7590 NVSWITCH_PRINT(device, ERROR, "%s: Problem handling PRI errors\n", 7591 __FUNCTION__); 7592 return_status = status; 7593 } 7594 } 7595 if (!IS_RTLSIM(device) && !IS_FMODEL(device)) 7596 { 7597 if (FLD_TEST_DRF_NUM(_CTRL, _CPU_INTR_UNITS, _SEC0_INTR0_0, 1, val)) 7598 { 7599 status = _nvswitch_service_soe_fatal_ls10(device); 7600 if (status != NVL_SUCCESS) 7601 { 7602 NVSWITCH_PRINT(device, ERROR, "%s: Problem servicing SOE", 7603 __FUNCTION__); 7604 return_status = status; 7605 } 7606 } 7607 } 7608 } 7609 7610 // step 4 -- retrigger engine interrupts 7611 _nvswitch_retrigger_engine_intr_ls10(device); 7612 7613 // step 5 -- retrigger top level GIN interrupts 7614 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_TOP_EN_CLEAR(0), 0xFFFFFFFF); 7615 NVSWITCH_ENG_WR32(device, GIN, , 0, _CTRL, _CPU_INTR_TOP_EN_SET(0), 0xFFFFFFFF); 7616 7617 return return_status; 7618 } 7619 7620 /* 7621 * Initialize interrupt tree HW for all units. 7622 * 7623 * Init and servicing both depend on bits matching across STATUS/MASK 7624 * and IErr STATUS/LOG/REPORT/CONTAIN registers. 7625 */ 7626 void 7627 nvswitch_initialize_interrupt_tree_ls10 7628 ( 7629 nvswitch_device *device 7630 ) 7631 { 7632 NvU64 link_mask = nvswitch_get_enabled_link_mask(device); 7633 NvU32 i, val; 7634 7635 // NPG/NPORT 7636 _nvswitch_initialize_nport_interrupts_ls10(device); 7637 7638 // NXBAR 7639 _nvswitch_initialize_nxbar_interrupts_ls10(device); 7640 7641 FOR_EACH_INDEX_IN_MASK(64, i, link_mask) 7642 { 7643 val = NVSWITCH_LINK_RD32(device, i, 7644 NVLW, _NVLW, _LINK_INTR_0_MASK(i)); 7645 val = FLD_SET_DRF(_NVLW, _LINK_INTR_0_MASK, _FATAL, _ENABLE, val); 7646 val = FLD_SET_DRF(_NVLW, _LINK_INTR_0_MASK, _NONFATAL, _ENABLE, val); 7647 val = FLD_SET_DRF(_NVLW, _LINK_INTR_0_MASK, _CORRECTABLE, _ENABLE, val); 7648 val = FLD_SET_DRF(_NVLW, _LINK_INTR_0_MASK, _INTR0, _ENABLE, val); 7649 val = FLD_SET_DRF(_NVLW, _LINK_INTR_0_MASK, _INTR1, _ENABLE, val); 7650 NVSWITCH_LINK_WR32(device, i, NVLW, _NVLW, _LINK_INTR_0_MASK(i), val); 7651 } 7652 FOR_EACH_INDEX_IN_MASK_END; 7653 7654 FOR_EACH_INDEX_IN_MASK(64, i, link_mask) 7655 { 7656 val = NVSWITCH_LINK_RD32(device, i, 7657 NVLW, _NVLW, _LINK_INTR_1_MASK(i)); 7658 val = FLD_SET_DRF(_NVLW, _LINK_INTR_1_MASK, _FATAL, _ENABLE, val); 7659 val = FLD_SET_DRF(_NVLW, _LINK_INTR_1_MASK, _NONFATAL, _ENABLE, val); 7660 val = FLD_SET_DRF(_NVLW, _LINK_INTR_1_MASK, _CORRECTABLE, _ENABLE, val); 7661 val = FLD_SET_DRF(_NVLW, _LINK_INTR_1_MASK, _INTR0, _ENABLE, val); 7662 val = FLD_SET_DRF(_NVLW, _LINK_INTR_1_MASK, _INTR1, _ENABLE, val); 7663 NVSWITCH_LINK_WR32(device, i, NVLW, _NVLW, _LINK_INTR_1_MASK(i), val); 7664 } 7665 FOR_EACH_INDEX_IN_MASK_END; 7666 7667 // NVLIPT 7668 _nvswitch_initialize_nvlipt_interrupts_ls10(device); 7669 } 7670 7671 // 7672 // Service Nvswitch NVLDL Fatal interrupts 7673 // 7674 NvlStatus 7675 nvswitch_service_nvldl_fatal_link_ls10 7676 ( 7677 nvswitch_device *device, 7678 NvU32 nvlipt_instance, 7679 NvU32 link 7680 ) 7681 { 7682 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 7683 NvU32 pending, bit, unhandled; 7684 NvU32 dlDeferredIntrLinkMask = 0; 7685 NvBool bRequireResetAndDrain = NV_FALSE; 7686 7687 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 7688 INFOROM_NVLINK_ERROR_EVENT error_event; 7689 7690 report.raw_pending = NVSWITCH_LINK_RD32(device, link, NVLDL, _NVLDL_TOP, _INTR); 7691 report.raw_enable = NVSWITCH_LINK_RD32(device, link, NVLDL, _NVLDL_TOP, _INTR_STALL_EN); 7692 report.mask = report.raw_enable; 7693 pending = report.raw_pending & report.mask; 7694 7695 error_event.nvliptInstance = (NvU8) nvlipt_instance; 7696 error_event.localLinkIdx = (NvU8) NVSWITCH_NVLIPT_GET_LOCAL_LINK_ID_LS10(link); 7697 7698 if (pending == 0) 7699 { 7700 return -NVL_NOT_FOUND; 7701 } 7702 7703 unhandled = pending; 7704 7705 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_FAULT_RAM, 1); 7706 if (nvswitch_test_flags(pending, bit)) 7707 { 7708 NVSWITCH_REPORT_FATAL(_HW_DLPL_TX_FAULT_RAM, "TX Fault Ram", NV_FALSE); 7709 nvswitch_clear_flags(&unhandled, bit); 7710 error_event.error = INFOROM_NVLINK_DL_TX_FAULT_RAM_FATAL; 7711 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 7712 } 7713 7714 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_FAULT_INTERFACE, 1); 7715 if (nvswitch_test_flags(pending, bit)) 7716 { 7717 NVSWITCH_REPORT_FATAL(_HW_DLPL_TX_FAULT_INTERFACE, "TX Fault Interface", NV_FALSE); 7718 nvswitch_clear_flags(&unhandled, bit); 7719 error_event.error = INFOROM_NVLINK_DL_TX_FAULT_INTERFACE_FATAL; 7720 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 7721 } 7722 7723 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_FAULT_SUBLINK_CHANGE, 1); 7724 if (nvswitch_test_flags(pending, bit)) 7725 { 7726 NVSWITCH_REPORT_FATAL(_HW_DLPL_TX_FAULT_SUBLINK_CHANGE, "TX Fault Sublink Change", NV_FALSE); 7727 nvswitch_clear_flags(&unhandled, bit); 7728 error_event.error = INFOROM_NVLINK_DL_TX_FAULT_SUBLINK_CHANGE_FATAL; 7729 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 7730 } 7731 7732 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_FAULT_SUBLINK_CHANGE, 1); 7733 if (nvswitch_test_flags(pending, bit)) 7734 { 7735 NVSWITCH_REPORT_FATAL(_HW_DLPL_RX_FAULT_SUBLINK_CHANGE, "RX Fault Sublink Change", NV_FALSE); 7736 nvswitch_clear_flags(&unhandled, bit); 7737 error_event.error = INFOROM_NVLINK_DL_RX_FAULT_SUBLINK_CHANGE_FATAL; 7738 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 7739 } 7740 7741 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_FAULT_DL_PROTOCOL, 1); 7742 if (nvswitch_test_flags(pending, bit)) 7743 { 7744 NVSWITCH_REPORT_FATAL(_HW_DLPL_RX_FAULT_DL_PROTOCOL, "RX Fault DL Protocol", NV_FALSE); 7745 nvswitch_clear_flags(&unhandled, bit); 7746 error_event.error = INFOROM_NVLINK_DL_RX_FAULT_DL_PROTOCOL_FATAL; 7747 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 7748 } 7749 7750 bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_PROTOCOL, 1); 7751 if (nvswitch_test_flags(pending, bit)) 7752 { 7753 NVSWITCH_REPORT_FATAL(_HW_DLPL_LTSSM_PROTOCOL, "LTSSM Protocol Error", NV_FALSE); 7754 nvswitch_clear_flags(&unhandled, bit); 7755 7756 // TODO 2827793 this should be logged to the InfoROM as fatal 7757 } 7758 7759 bit = DRF_NUM(_NVLDL_TOP, _INTR, _PHY_A, 1); 7760 if (nvswitch_test_flags(pending, bit)) 7761 { 7762 NVSWITCH_REPORT_FATAL(_HW_DLPL_PHY_A, "PHY_A Error", NV_FALSE); 7763 nvswitch_clear_flags(&unhandled, bit); 7764 error_event.error = INFOROM_NVLINK_DL_PHY_A_FATAL; 7765 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 7766 } 7767 7768 bit = DRF_NUM(_NVLDL_TOP, _INTR, _TX_PL_ERROR, 1); 7769 if (nvswitch_test_flags(pending, bit)) 7770 { 7771 NVSWITCH_REPORT_FATAL(_HW_DLPL_TX_PL_ERROR, "TX_PL Error", NV_FALSE); 7772 nvswitch_clear_flags(&unhandled, bit); 7773 error_event.error = INFOROM_NVLINK_DL_TX_PL_ERROR_FATAL; 7774 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 7775 } 7776 7777 bit = DRF_NUM(_NVLDL_TOP, _INTR, _RX_PL_ERROR, 1); 7778 if (nvswitch_test_flags(pending, bit)) 7779 { 7780 NVSWITCH_REPORT_FATAL(_HW_DLPL_RX_PL_ERROR, "RX_PL Error", NV_FALSE); 7781 nvswitch_clear_flags(&unhandled, bit); 7782 error_event.error = INFOROM_NVLINK_DL_RX_PL_ERROR_FATAL; 7783 nvswitch_inforom_nvlink_log_error_event(device, &error_event); 7784 } 7785 7786 // 7787 // Note: LTSSM_FAULT_{UP/DOWN} must be the last interrupt serviced in the NVLDL 7788 // Fatal tree. The last step of handling this interrupt is going into the 7789 // reset_and_drain flow for the given link which will shutdown and reset 7790 // the link. The reset portion will also wipe away any link state including 7791 // pending DL interrupts. In order to log all error before wiping that state, 7792 // service all other interrupts before this one 7793 // 7794 bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_DOWN, 1); 7795 if (nvswitch_test_flags(pending, bit)) 7796 { 7797 nvswitch_record_port_event(device, &(device->log_PORT_EVENTS), link, NVSWITCH_PORT_EVENT_TYPE_DOWN); 7798 if (nvswitch_lib_notify_client_events(device, 7799 NVSWITCH_DEVICE_EVENT_PORT_DOWN) != NVL_SUCCESS) 7800 { 7801 NVSWITCH_PRINT(device, ERROR, "%s: Failed to notify PORT_DOWN event\n", 7802 __FUNCTION__); 7803 } 7804 dlDeferredIntrLinkMask |= bit; 7805 7806 // 7807 // Disable LTSSM FAULT DOWN, NPG, and NVLW interrupts to avoid interrupt storm. The interrupts 7808 // will be re-enabled in reset and drain 7809 // 7810 report.raw_enable = FLD_SET_DRF(_NVLDL_TOP, _INTR_STALL_EN, _LTSSM_FAULT_DOWN, _DISABLE, report.raw_enable); 7811 NVSWITCH_LINK_WR32(device, link, NVLDL, _NVLDL_TOP, _INTR_STALL_EN, report.raw_enable); 7812 nvswitch_link_disable_interrupts_ls10(device, link); 7813 7814 if (device->bModeContinuousALI) 7815 { 7816 // 7817 // Since reset and drain will reset the link, including clearing 7818 // pending interrupts, skip the clear write below. There are cases 7819 // where link clocks will not be on after reset and drain so there 7820 // maybe PRI errors on writing to the register 7821 // 7822 // CCI will perform reset and drain 7823 if (!cciIsLinkManaged(device, link)) 7824 { 7825 bRequireResetAndDrain = NV_TRUE; 7826 } 7827 } 7828 nvswitch_clear_flags(&unhandled, bit); 7829 } 7830 7831 bit = DRF_NUM(_NVLDL_TOP, _INTR, _LTSSM_FAULT_UP, 1); 7832 if (nvswitch_test_flags(pending, bit)) 7833 { 7834 nvswitch_record_port_event(device, &(device->log_PORT_EVENTS), link, NVSWITCH_PORT_EVENT_TYPE_DOWN); 7835 if (nvswitch_lib_notify_client_events(device, 7836 NVSWITCH_DEVICE_EVENT_PORT_DOWN) != NVL_SUCCESS) 7837 { 7838 NVSWITCH_PRINT(device, ERROR, "%s: Failed to notify PORT_DOWN event\n", 7839 __FUNCTION__); 7840 } 7841 dlDeferredIntrLinkMask |= bit; 7842 7843 // 7844 // Disable LTSSM FAULT UP, NPG, and NVLW link interrupts to avoid interrupt storm. The interrupts 7845 // will be re-enabled in reset and drain 7846 // 7847 report.raw_enable = FLD_SET_DRF(_NVLDL_TOP, _INTR_STALL_EN, _LTSSM_FAULT_UP, _DISABLE, report.raw_enable); 7848 NVSWITCH_LINK_WR32(device, link, NVLDL, _NVLDL_TOP, _INTR_STALL_EN, report.raw_enable); 7849 nvswitch_link_disable_interrupts_ls10(device, link); 7850 7851 if (device->bModeContinuousALI) 7852 { 7853 // 7854 // Since reset and drain will reset the link, including clearing 7855 // pending interrupts, skip the clear write below. There are cases 7856 // where link clocks will not be on after reset and drain so there 7857 // maybe PRI errors on writing to the register 7858 // 7859 // CCI will perform reset and drain 7860 if (!cciIsLinkManaged(device, link)) 7861 { 7862 bRequireResetAndDrain = NV_TRUE; 7863 } 7864 } 7865 nvswitch_clear_flags(&unhandled, bit); 7866 } 7867 7868 if (bRequireResetAndDrain) 7869 { 7870 device->hal.nvswitch_reset_and_drain_links(device, NVBIT64(link), NV_FALSE); 7871 } 7872 7873 chip_device->deferredLinkErrors[link].data.fatalIntrMask.dl |= dlDeferredIntrLinkMask; 7874 if (dlDeferredIntrLinkMask) 7875 { 7876 nvswitch_create_deferred_link_state_check_task_ls10(device, nvlipt_instance, link); 7877 } 7878 7879 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 7880 7881 // Disable interrupts that have occurred after fatal error. 7882 if (device->link[link].fatal_error_occurred) 7883 { 7884 NVSWITCH_LINK_WR32(device, link, NVLDL, _NVLDL_TOP, _INTR_STALL_EN, 7885 report.raw_enable & ~pending); 7886 } 7887 7888 if (!bRequireResetAndDrain) 7889 { 7890 NVSWITCH_LINK_WR32(device, link, NVLDL, _NVLDL_TOP, _INTR, pending); 7891 } 7892 7893 if (unhandled != 0) 7894 { 7895 NVSWITCH_PRINT(device, WARN, 7896 "%s: Unhandled NVLDL fatal interrupts, link: %d pending: 0x%x enabled: 0x%x.\n", 7897 __FUNCTION__, link, pending, report.raw_enable); 7898 return -NVL_MORE_PROCESSING_REQUIRED; 7899 } 7900 7901 return NVL_SUCCESS; 7902 } 7903 7904 NvlStatus 7905 nvswitch_service_minion_link_ls10 7906 ( 7907 nvswitch_device *device, 7908 NvU32 instance 7909 ) 7910 { 7911 NVSWITCH_INTERRUPT_LOG_TYPE report = { 0 }; 7912 NvU32 pending, unhandled, minionIntr, linkIntr, reg, enabledLinks, bit; 7913 NvU32 localLinkIdx, link; 7914 MINION_LINK_INTR minionLinkIntr = { 0 }; 7915 ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device); 7916 7917 // 7918 // _MINION_MINION_INTR shows all interrupts currently at the host on this minion 7919 // Note: _MINIO_MINION_INTR is not used to clear link specific interrupts 7920 // 7921 minionIntr = NVSWITCH_MINION_RD32_LS10(device, instance, _MINION, _MINION_INTR); 7922 7923 // get all possible interrupting links associated with this minion 7924 report.raw_pending = DRF_VAL(_MINION, _MINION_INTR, _LINK, minionIntr); 7925 7926 // read in the enaled minion interrupts on this minion 7927 reg = NVSWITCH_MINION_RD32_LS10(device, instance, _MINION, _MINION_INTR_STALL_EN); 7928 7929 // get the links with enabled interrupts on this minion 7930 enabledLinks = DRF_VAL(_MINION, _MINION_INTR_STALL_EN, _LINK, reg); 7931 7932 report.raw_enable = enabledLinks; 7933 report.mask = report.raw_enable; 7934 7935 // pending bit field contains interrupting links after being filtered 7936 pending = report.raw_pending & report.mask; 7937 7938 if (pending == 0) 7939 { 7940 return -NVL_NOT_FOUND; 7941 } 7942 7943 unhandled = pending; 7944 7945 minionLinkIntr.bPending = NV_TRUE; 7946 7947 FOR_EACH_INDEX_IN_MASK(32, localLinkIdx, pending) 7948 { 7949 link = (instance * NVSWITCH_LINKS_PER_NVLIPT_LS10) + localLinkIdx; 7950 bit = NVBIT(localLinkIdx); 7951 7952 // read in the interrupt register for the given link 7953 linkIntr = NVSWITCH_MINION_LINK_RD32_LS10(device, link, _MINION, _NVLINK_LINK_INTR(localLinkIdx)); 7954 minionLinkIntr.regData = linkIntr; 7955 7956 // _STATE must be set for _CODE to be valid 7957 if (!DRF_VAL(_MINION, _NVLINK_LINK_INTR, _STATE, linkIntr)) 7958 { 7959 continue; 7960 } 7961 7962 NVSWITCH_PRINT(device, INFO, 7963 "%s: link[%d] {%d, %d} linkIntr = 0x%x\n", 7964 __FUNCTION__, link, instance, localLinkIdx, linkIntr); 7965 7966 // 7967 // _MINION_INTR_LINK is a read-only register field for the host 7968 // Host must write 1 to _NVLINK_LINK_INTR_STATE to clear the interrupt on the link 7969 // 7970 reg = DRF_NUM(_MINION, _NVLINK_LINK_INTR, _STATE, 1); 7971 NVSWITCH_MINION_WR32_LS10(device, instance, _MINION, _NVLINK_LINK_INTR(localLinkIdx), reg); 7972 7973 report.data[0] = linkIntr; 7974 7975 switch(DRF_VAL(_MINION, _NVLINK_LINK_INTR, _CODE, linkIntr)) 7976 { 7977 case NV_MINION_NVLINK_LINK_INTR_CODE_NA: 7978 case NV_MINION_NVLINK_LINK_INTR_CODE_DLCMDFAULT: 7979 case NV_MINION_NVLINK_LINK_INTR_CODE_LOCAL_CONFIG_ERR: 7980 case NV_MINION_NVLINK_LINK_INTR_CODE_NEGOTIATION_CONFIG_ERR: 7981 case NV_MINION_NVLINK_LINK_INTR_CODE_BADINIT: 7982 case NV_MINION_NVLINK_LINK_INTR_CODE_PMFAIL: 7983 case NV_MINION_NVLINK_LINK_INTR_CODE_NOINIT: 7984 chip_device->deferredLinkErrors[link].data.fatalIntrMask.minionLinkIntr = 7985 minionLinkIntr; 7986 _nvswitch_create_deferred_link_errors_task_ls10(device, instance, link); 7987 break; 7988 case NV_MINION_NVLINK_LINK_INTR_CODE_SWREQ: 7989 NVSWITCH_PRINT(device, INFO, 7990 "%s: Received MINION Link SW Generate interrupt on MINION %d : link %d.\n", 7991 __FUNCTION__, instance, link); 7992 break; 7993 case NV_MINION_NVLINK_LINK_INTR_CODE_DLREQ: 7994 case NV_MINION_NVLINK_LINK_INTR_CODE_PMDISABLED: 7995 case NV_MINION_NVLINK_LINK_INTR_CODE_TLREQ: 7996 chip_device->deferredLinkErrors[link].data.nonFatalIntrMask.minionLinkIntr = 7997 minionLinkIntr; 7998 _nvswitch_create_deferred_link_errors_task_ls10(device, instance, link); 7999 case NV_MINION_NVLINK_LINK_INTR_CODE_NOTIFY: 8000 NVSWITCH_PRINT(device, INFO, 8001 "%s: Received MINION NOTIFY interrupt on MINION %d : link %d.\n", 8002 __FUNCTION__, instance, link); 8003 break; 8004 case NV_MINION_NVLINK_LINK_INTR_CODE_INBAND_BUFFER_AVAILABLE: 8005 { 8006 NVSWITCH_PRINT(device, INFO, 8007 "Received INBAND_BUFFER_AVAILABLE interrupt on MINION %d,\n", instance); 8008 nvswitch_minion_receive_inband_data_ls10(device, link); 8009 break; 8010 } 8011 default: 8012 NVSWITCH_REPORT_FATAL(_HW_MINION_FATAL_LINK_INTR, "Minion Interrupt code unknown", NV_FALSE); 8013 } 8014 nvswitch_clear_flags(&unhandled, bit); 8015 } 8016 FOR_EACH_INDEX_IN_MASK_END; 8017 8018 NVSWITCH_UNHANDLED_CHECK(device, unhandled); 8019 8020 if (unhandled != 0) 8021 { 8022 return -NVL_MORE_PROCESSING_REQUIRED; 8023 } 8024 8025 return NVL_SUCCESS; 8026 } 8027