1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2017-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "core/core.h" 25 #include "kernel/gpu/intr/intr.h" 26 #include "gpu/gpu.h" 27 #include "kernel/gpu/intr/engine_idx.h" 28 #include "gpu/bif/kernel_bif.h" 29 #include "objtmr.h" 30 #include "gpu/uvm/uvm.h" 31 #include "os/os.h" 32 #include "vgpu/vgpu_events.h" 33 #include "vgpu/rpc.h" 34 #include "gpu/mmu/kern_gmmu.h" 35 #include "libraries/nvport/nvport.h" 36 #include "gpu/disp/kern_disp.h" 37 38 #include "published/turing/tu102/dev_ctrl.h" 39 #include "published/turing/tu102/dev_vm.h" 40 #include "published/turing/tu102/dev_vm_addendum.h" 41 /*! 42 * @brief Get the base interrupt vector to use when indexing engine nonstall 43 * interrupts 44 * 45 * @param[in] pGpu OBJGPU pointer 46 * @param[in] pIntr Intr pointer 47 * 48 * @returns the base interrupt vector for engine nonstall interrupts 49 */ 50 NvU32 51 intrGetNonStallBaseVector_TU102 52 ( 53 OBJGPU *pGpu, 54 Intr *pIntr 55 ) 56 { 57 NvU32 base = 0; 58 59 if (!IS_VIRTUAL(pGpu)) 60 { 61 base = GPU_REG_RD32(pGpu, NV_CTRL_LEGACY_ENGINE_NONSTALL_INTR_BASE_VECTORID); 62 } 63 else 64 { 65 NV_STATUS status = NV_OK; 66 NV_RM_RPC_VGPU_PF_REG_READ32(pGpu, NV_CTRL_LEGACY_ENGINE_NONSTALL_INTR_BASE_VECTORID, &base, status); 67 } 68 return base; 69 } 70 71 // 72 // Static interface functions 73 // 74 static NvU32 _intrGetUvmLeafMask_TU102(OBJGPU *, Intr *); 75 static void _intrEnableStall_TU102(OBJGPU *, Intr *, THREAD_STATE_NODE *pThreadState); 76 static void _intrDisableStall_TU102(OBJGPU *, Intr *, THREAD_STATE_NODE *pThreadState); 77 static void _intrClearLeafEnables_TU102(OBJGPU *pGpu, Intr *pIntr); 78 79 // Compile time asserts to make sure we don't write beyond the leaf register array 80 81 ct_assert(NV_CPU_INTR_STALL_SUBTREE_START < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF__SIZE_1); 82 ct_assert(NV_CPU_INTR_STALL_SUBTREE_LAST < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF__SIZE_1); 83 ct_assert(NV_CPU_INTR_STALL_SUBTREE_START < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_SET__SIZE_1); 84 ct_assert(NV_CPU_INTR_STALL_SUBTREE_LAST < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_SET__SIZE_1); 85 ct_assert(NV_CPU_INTR_STALL_SUBTREE_START < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_CLEAR__SIZE_1); 86 ct_assert(NV_CPU_INTR_STALL_SUBTREE_LAST < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_CLEAR__SIZE_1); 87 88 ct_assert(NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF__SIZE_1 == NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_SET__SIZE_1); 89 ct_assert(NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF__SIZE_1 == NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_CLEAR__SIZE_1); 90 91 // 92 // Few design issues and intentions stated upfront: 93 // Q: Why are interrupts being enabled/ disabled here instead of their respective HALs? 94 // A: The intent is to keep the "alternate tree" (nonstall tree) invisible from the rest of RM. 95 // 96 // Q: Then how does a HAL register its interrupts on this alternate tree? 97 // A: It does not. The alternate tree is an aberration of nature meant to service **non stall interrupts** 98 // without using locking. 99 // 100 // Q: If the alternate tree does not respect locks taken by osAcquireRmSema then how do we prevent 101 // race conditions? 102 // A: We dont!! The plan here is to *manually* inspect every piece of code that gets executed on the ISR/DPC 103 // for this tree and make sure concurrent actions from elsewhere do not lead us in an inconsistent state. 104 // In future before adding code to this tree, **carefully inspect it yourself**. 105 // 106 // A final note, if and when RM gets fine grained locks in the main interrupt tree, it might be worthwhile 107 // getting rid of this. More code is more complexity!! 108 // 109 NV_STATUS 110 intrStateLoad_TU102 111 ( 112 OBJGPU *pGpu, 113 Intr *pIntr, 114 NvU32 flags 115 ) 116 { 117 NV_STATUS status = NV_OK; 118 InterruptTable *pIntrTable; 119 InterruptTableIter iter; 120 121 NV_ASSERT_OK_OR_RETURN(intrGetInterruptTable_HAL(pGpu, pIntr, &pIntrTable)); 122 123 // 124 // Make sure all leaf nodes are disabled before we enable them. Older drivers 125 // and versions of mods leave them enabled. Bug 3299004. 126 // 127 _intrClearLeafEnables_TU102(pGpu, pIntr); 128 129 // 130 // Enable interrupts either in legacy NV_PMC_INTR tree or new NV_CTRL tree 131 // as per the MC interrupt vector table. 132 // 133 // We have to make an exception for the TMR engine though, since for now, 134 // it reports into both PMC and dev_ctrl. We need the PTIMER alarm in 135 // PMC, which is the only place where it reports, and we need it in 136 // dev_ctrl for the countdown/callback timer, which we use in the PF 137 // and all the VFs 138 // 139 pGpu->pmcRmOwnsIntrMask = INTERRUPT_MASK_DISABLED; 140 for (iter = vectIterAll(pIntrTable); vectIterNext(&iter);) 141 { 142 INTR_TABLE_ENTRY *pEntry = iter.pValue; 143 if (pEntry->pmcIntrMask != NV_PMC_INTR_INVALID_MASK) 144 { 145 pGpu->pmcRmOwnsIntrMask |= pEntry->pmcIntrMask; 146 147 if (pEntry->mcEngine != MC_ENGINE_IDX_TMR) 148 continue; 149 } 150 151 if (pEntry->intrVector != NV_INTR_VECTOR_INVALID) 152 { 153 intrEnableLeaf_HAL(pGpu, pIntr, pEntry->intrVector); 154 } 155 156 if ((pEntry->intrVectorNonStall != NV_INTR_VECTOR_INVALID) 157 ) 158 { 159 intrEnableLeaf_HAL(pGpu, pIntr, pEntry->intrVectorNonStall); 160 } 161 } 162 163 status = intrCacheIntrFields_HAL(pGpu, pIntr); 164 if (status != NV_OK) 165 { 166 goto exit; 167 } 168 169 exit: 170 if (pIntr->getProperty(pIntr, PDB_PROP_INTR_ENABLE_DETAILED_LOGS)) 171 { 172 intrDumpState_HAL(pGpu, pIntr); 173 } 174 175 return status; 176 } 177 178 NV_STATUS 179 intrStateUnload_TU102 180 ( 181 OBJGPU *pGpu, 182 Intr *pIntr, 183 NvU32 flags 184 ) 185 { 186 // Disable all interrupts since we're unloading 187 188 intrWriteRegTopEnClear_HAL(pGpu, pIntr, 0, 0xFFFFFFFF, NULL); 189 intrWriteRegTopEnClear_HAL(pGpu, pIntr, 1, 0xFFFFFFFF, NULL); 190 191 _intrClearLeafEnables_TU102(pGpu, pIntr); 192 193 return NV_OK; 194 } 195 196 /*! 197 * @brief Cache few Intr fields for ease of use in interrupt or RPC context. 198 * 199 * @param[in] pGpu OBJGPU pointer 200 * @param[in] pIntr Intr pointer 201 */ 202 NV_STATUS 203 intrCacheIntrFields_TU102 204 ( 205 OBJGPU *pGpu, 206 Intr *pIntr 207 ) 208 { 209 NV_STATUS status = NV_OK; 210 211 { 212 NV2080_INTR_CATEGORY_SUBTREE_MAP uvmShared; 213 NV_ASSERT_OK_OR_RETURN( 214 intrGetSubtreeRange(pIntr, 215 NV2080_INTR_CATEGORY_UVM_SHARED, 216 &uvmShared)); 217 // 218 // Assert to make sure we have only one client shared subtree. 219 // The below code assumes that. 220 // 221 NV_ASSERT_OR_RETURN(uvmShared.subtreeStart == uvmShared.subtreeEnd, 222 NV_ERR_INVALID_STATE); 223 224 // Now cache the leaf enable mask for the subtree shared with the client 225 NvU32 leafEnHi = intrReadRegLeafEnSet_HAL(pGpu, pIntr, 226 NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_START(uvmShared.subtreeStart), 227 NULL); 228 NvU32 leafEnLo = intrReadRegLeafEnSet_HAL(pGpu, pIntr, 229 NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_END(uvmShared.subtreeStart), 230 NULL); 231 232 pIntr->uvmSharedCpuLeafEn = ((NvU64)(leafEnHi) << 32) | leafEnLo; 233 pIntr->uvmSharedCpuLeafEnDisableMask = 234 intrGetUvmSharedLeafEnDisableMask_HAL(pGpu, pIntr); 235 } 236 237 // 238 // Cache the CPU_INTR_TOP_EN mask to clear when disabling stall 239 // interrupts (other interrupts are either not disabled or disabled 240 // selectively at leaf level) 241 // 242 pIntr->intrTopEnMask |= intrGetIntrTopLockedMask(pGpu, pIntr); 243 244 OBJDISP *pDisp = GPU_GET_DISP(pGpu); 245 246 // Cache client owned, shared interrupt, and display vectors for ease of use later 247 pIntr->accessCntrIntrVector = intrGetVectorFromEngineId(pGpu, pIntr, MC_ENGINE_IDX_ACCESS_CNTR, NV_FALSE); 248 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 249 { 250 pIntr->replayableFaultIntrVector = intrGetVectorFromEngineId(pGpu, pIntr, MC_ENGINE_IDX_REPLAYABLE_FAULT, NV_FALSE); 251 } 252 else 253 { 254 pIntr->replayableFaultIntrVector = intrGetVectorFromEngineId(pGpu, pIntr, MC_ENGINE_IDX_REPLAYABLE_FAULT_CPU, NV_FALSE); 255 } 256 if (pDisp != NULL) 257 { 258 pIntr->displayIntrVector = intrGetVectorFromEngineId(pGpu, pIntr, MC_ENGINE_IDX_DISP, NV_FALSE); 259 } 260 else 261 { 262 pIntr->displayIntrVector = NV_INTR_VECTOR_INVALID; 263 } 264 265 // 266 // Ensure that both UVM vectors are in the same leaf register (check right 267 // now so we don't have to check later in latency critical paths where this 268 // is assumed to be true) 269 // 270 if (pIntr->replayableFaultIntrVector != NV_INTR_VECTOR_INVALID && pIntr->accessCntrIntrVector != NV_INTR_VECTOR_INVALID) 271 { 272 if (NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(pIntr->replayableFaultIntrVector) != NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(pIntr->accessCntrIntrVector)) 273 { 274 NV_PRINTF(LEVEL_ERROR, "UVM interrupt vectors for replayable fault 0x%x " 275 "and access counter 0x%x are in different CPU_INTR_LEAF registers\n", 276 pIntr->replayableFaultIntrVector, pIntr->accessCntrIntrVector); 277 DBG_BREAKPOINT(); 278 status = NV_ERR_GENERIC; 279 goto exit; 280 } 281 } 282 283 { 284 // 285 // Now ensure that they're in the expected subtree (check right now so 286 // we don't have to check later in latency critical paths where this is 287 // assumed to be true) 288 // 289 NV2080_INTR_CATEGORY_SUBTREE_MAP uvmOwned; 290 NvU32 accessCntrSubtree = NV_CTRL_INTR_GPU_VECTOR_TO_SUBTREE( 291 pIntr->accessCntrIntrVector); 292 NV_ASSERT_OK_OR_RETURN( 293 intrGetSubtreeRange(pIntr, 294 NV2080_INTR_CATEGORY_UVM_OWNED, 295 &uvmOwned)); 296 if (!(uvmOwned.subtreeStart <= accessCntrSubtree && 297 accessCntrSubtree <= uvmOwned.subtreeEnd)) 298 { 299 NV_PRINTF(LEVEL_ERROR, 300 "UVM owned interrupt vector for access counter is in an unexpected subtree\n" 301 "Expected range = [0x%x, 0x%x], actual = 0x%x\n", 302 uvmOwned.subtreeStart, uvmOwned.subtreeEnd, accessCntrSubtree); 303 DBG_BREAKPOINT(); 304 status = NV_ERR_GENERIC; 305 goto exit; 306 } 307 } 308 309 exit: 310 311 return status; 312 } 313 314 /*! 315 * @brief Get the base interrupt vector to use when indexing engine stall 316 * interrupts 317 * 318 * @param[in] pGpu OBJGPU pointer 319 * @param[in] pIntr Intr pointer 320 * 321 * @returns the base interrupt vector for engine stall interrupts 322 */ 323 NvU32 324 intrGetStallBaseVector_TU102 325 ( 326 OBJGPU *pGpu, 327 Intr *pIntr 328 ) 329 { 330 NvU32 base = GPU_REG_RD32(pGpu, NV_CTRL_LEGACY_ENGINE_STALL_INTR_BASE_VECTORID); 331 return base; 332 } 333 334 /*! 335 * @brief Enable a given interrupt vector in dev_ctrl at leaf level 336 * 337 * @param[in] pGpu OBJGPU pointer 338 * @param[in] pIntr Intr pointer 339 * @param[in] intrVector nonstall interrupt vector to enable 340 */ 341 void 342 intrEnableLeaf_TU102 343 ( 344 OBJGPU *pGpu, 345 Intr *pIntr, 346 NvU32 intrVector 347 ) 348 { 349 NvU32 reg = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVector); 350 NvU32 leafBit = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVector); 351 NvU32 intrLeafEnSetSize = intrGetLeafSize_HAL(pGpu, pIntr); 352 353 if (reg >= intrLeafEnSetSize) 354 { 355 NV_PRINTF(LEVEL_ERROR, "Exceeding the range of INTR leaf registers. " 356 "intrVector = 0x%x, Reg = 0x%x\n", intrVector, reg); 357 NV_ASSERT(0); 358 return; 359 } 360 361 intrWriteRegLeafEnSet_HAL(pGpu, pIntr, reg, NVBIT(leafBit), NULL); 362 } 363 364 /*! 365 * @brief Disable a given interrupt vector in dev_ctrl at leaf level 366 * 367 * @param[in] pGpu OBJGPU pointer 368 * @param[in] pIntr Intr pointer 369 * @param[in] intrVector nonstall interrupt vector to enable 370 */ 371 void 372 intrDisableLeaf_TU102 373 ( 374 OBJGPU *pGpu, 375 Intr *pIntr, 376 NvU32 intrVector 377 ) 378 { 379 NvU32 reg = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVector); 380 NvU32 leafBit = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVector); 381 NvU32 intrLeafEnClearSize = intrGetLeafSize_HAL(pGpu, pIntr); 382 383 if (reg >= intrLeafEnClearSize) 384 { 385 NV_PRINTF(LEVEL_ERROR, "Exceeding the range of INTR leaf registers. " 386 "intrVector = 0x%x, Reg = 0x%x\n", intrVector, reg); 387 NV_ASSERT(0); 388 return; 389 } 390 391 intrWriteRegLeafEnClear_HAL(pGpu, pIntr, reg, NVBIT(leafBit), NULL); 392 } 393 394 /*! 395 * @brief Disable/Enable stall interrupts in dev_ctrl 396 * 397 * @param[in] pGpu OBJGPU pointer 398 * @param[in] pIntr Intr pointer 399 * @param[in] pThreadState thread state node pointer 400 */ 401 void 402 intrSetStall_TU102 403 ( 404 OBJGPU *pGpu, 405 Intr *pIntr, 406 NvU32 intrType, 407 THREAD_STATE_NODE *pThreadState 408 ) 409 { 410 // dev_ctrl tree is not used for legacy-vGPU 411 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu)) 412 { 413 return; 414 } 415 416 if (intrType == INTERRUPT_TYPE_DISABLED) 417 { 418 _intrDisableStall_TU102(pGpu, pIntr, pThreadState); 419 } 420 else 421 { 422 _intrEnableStall_TU102(pGpu, pIntr, pThreadState); 423 } 424 } 425 426 /*! 427 * @brief Clear all interrupt leaf nodes 428 * 429 * @param[in] pGpu OBJGPU pointer 430 * @param[in] pIntr Intr pointer 431 */ 432 static void _intrClearLeafEnables_TU102 433 ( 434 OBJGPU *pGpu, 435 Intr *pIntr 436 ) 437 { 438 NvU32 i; 439 NvU32 intrLeafSize = intrGetLeafSize_HAL(pGpu, pIntr); 440 441 for (i = 0; i < intrLeafSize; i++) 442 { 443 intrWriteRegLeafEnClear_HAL(pGpu, pIntr, i, 0xFFFFFFFF, NULL); 444 } 445 } 446 447 /*! 448 * @brief Enable all stall interrupts in dev_ctrl 449 * 450 * @param[in] pGpu OBJGPU pointer 451 * @param[in] pIntr Intr pointer 452 * @param[in] pThreadState thread state node pointer 453 */ 454 static void 455 _intrEnableStall_TU102 456 ( 457 OBJGPU *pGpu, 458 Intr *pIntr, 459 THREAD_STATE_NODE *pThreadState 460 ) 461 { 462 NvU32 idx; 463 464 // 465 // 1. Enable the UVM interrupts that RM currently owns at INTR_LEAF 466 // level. 467 // 468 NvU32 val = _intrGetUvmLeafMask_TU102(pGpu, pIntr); 469 idx = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(pIntr->accessCntrIntrVector); 470 if (val != 0) 471 { 472 intrWriteRegLeafEnSet_HAL(pGpu, pIntr, idx, val, pThreadState); 473 } 474 475 // 476 // 2. Enable all interrupts in the client shared subtree at INTR_LEAF 477 // level, based on the cached value. 478 // 479 480 { 481 NV2080_INTR_CATEGORY_SUBTREE_MAP uvmShared; 482 NV_ASSERT_OK(intrGetSubtreeRange(pIntr, 483 NV2080_INTR_CATEGORY_UVM_SHARED, 484 &uvmShared)); 485 // 486 // Assert to make sure we have only one client shared subtree. 487 // The below code assumes that. 488 // 489 NV_ASSERT(uvmShared.subtreeStart == uvmShared.subtreeEnd); 490 idx = uvmShared.subtreeStart; 491 } 492 493 if (NvU64_HI32(pIntr->uvmSharedCpuLeafEn) != 0) 494 { 495 intrWriteRegLeafEnSet_HAL(pGpu, pIntr, 496 NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_START(idx), 497 NvU64_HI32(pIntr->uvmSharedCpuLeafEn), 498 pThreadState); 499 } 500 if (NvU64_LO32(pIntr->uvmSharedCpuLeafEn) != 0) 501 { 502 intrWriteRegLeafEnSet_HAL(pGpu, pIntr, 503 NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_END(idx), 504 NvU64_LO32(pIntr->uvmSharedCpuLeafEn), 505 pThreadState); 506 } 507 508 // We use the assumption that 1 == ENABLE below 509 ct_assert(NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_SET_SUBTREE_ENABLE == 1); 510 511 { 512 // 513 // 3. Enable all interrupt subtrees (except nonstall) at top level. 514 // Nonstall enablement is handled by a different function. 515 // 516 NvU64 mask = NV_U64_MAX; 517 518 mask &= ~intrGetIntrTopNonStallMask_HAL(pGpu, pIntr); 519 520 if (NvU64_LO32(mask) != 0) 521 { 522 intrWriteRegTopEnSet_HAL(pGpu, pIntr, 523 0, 524 NvU64_LO32(mask), 525 pThreadState); 526 } 527 if (NvU64_HI32(mask) != 0) 528 { 529 intrWriteRegTopEnSet_HAL(pGpu, pIntr, 530 1, 531 NvU64_HI32(mask), 532 pThreadState); 533 } 534 } 535 } 536 537 /*! 538 * @brief Disable all stall interrupts in dev_ctrl 539 * 540 * @param[in] pGpu OBJGPU pointer 541 * @param[in] pIntr Intr pointer 542 * @param[in] pThreadState thread state node pointer 543 */ 544 static void 545 _intrDisableStall_TU102 546 ( 547 OBJGPU *pGpu, 548 Intr *pIntr, 549 THREAD_STATE_NODE *pThreadState 550 ) 551 { 552 NvU32 idx; 553 554 NvU32 val; 555 556 // 1. Disable the UVM interrupts that RM currently owns at INTR_LEAF level 557 idx = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(pIntr->accessCntrIntrVector); 558 val = _intrGetUvmLeafMask_TU102(pGpu, pIntr); 559 if (val != 0) 560 { 561 intrWriteRegLeafEnClear_HAL(pGpu, pIntr, idx, val, pThreadState); 562 } 563 564 // 565 // 2. Disable all interrupts in the client shared subtree at INTR_LEAF 566 // level, except the ones that can be handled outside the GPU lock. 567 // 568 569 { 570 NV2080_INTR_CATEGORY_SUBTREE_MAP uvmShared; 571 NV_ASSERT_OK(intrGetSubtreeRange(pIntr, 572 NV2080_INTR_CATEGORY_UVM_SHARED, 573 &uvmShared)); 574 // 575 // Assert to make sure we have only one client shared subtree. 576 // The below code assumes that. 577 // 578 NV_ASSERT(uvmShared.subtreeStart == uvmShared.subtreeEnd); 579 idx = uvmShared.subtreeStart; 580 } 581 582 if (!gpuIsStateLoaded(pGpu)) 583 { 584 // 585 // If GPU state load has not finished, there is nothing we can or want to 586 // do here, since our cached state of interrupt vectors isn't valid yet 587 // anyway. 588 // 589 intrWriteRegLeafEnClear_HAL(pGpu, pIntr, 590 NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_START(idx), 591 0xFFFFFFFF, pThreadState); 592 intrWriteRegLeafEnClear_HAL(pGpu, pIntr, 593 NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_END(idx), 594 0xFFFFFFFF, pThreadState); 595 } 596 else 597 { 598 if ((NvU64_HI32(pIntr->uvmSharedCpuLeafEnDisableMask) != 0) && 599 (NvU64_HI32(pIntr->uvmSharedCpuLeafEn) != 0)) 600 { 601 // 602 // Only write to the register is both the enable mask and the 603 // disable mask is non-zero. If there are no interrupts we're 604 // interested in handling in one of the leafs, the enable mask will 605 // be zero and the disable mask will be all 0xFs. There's no point 606 // writing the register in that case since interrupts are already 607 // not enabled. Using the cached value helps us avoid a register 608 // read in latency critical paths. 609 // 610 intrWriteRegLeafEnClear_HAL(pGpu, pIntr, 611 NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_START(idx), 612 NvU64_HI32(pIntr->uvmSharedCpuLeafEnDisableMask), 613 pThreadState); 614 } 615 if ((NvU64_LO32(pIntr->uvmSharedCpuLeafEnDisableMask) != 0) && 616 (NvU64_LO32(pIntr->uvmSharedCpuLeafEn) != 0)) 617 { 618 // 619 // Only write to the register is both the enable mask and the 620 // disable mask is non-zero. If there are no interrupts we're 621 // interested in handling in one of the leafs, the enable mask will 622 // be zero and the disable mask will be all 0xFs. There's no point 623 // writing the register in that case since interrupts are already 624 // not enabled. Using the cached value helps us avoid a register 625 // read in latency critical paths. 626 // 627 intrWriteRegLeafEnClear_HAL(pGpu, pIntr, 628 NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_END(idx), 629 NvU64_LO32(pIntr->uvmSharedCpuLeafEnDisableMask), 630 pThreadState); 631 } 632 } 633 634 // 635 // 3. Disable some interrupt subtrees at top level (information about which 636 // ones to disable is cached in pIntr->intrTopEnMask) 637 // 638 if (NvU64_LO32(pIntr->intrTopEnMask) != 0) 639 { 640 intrWriteRegTopEnClear_HAL(pGpu, pIntr, 641 0, 642 NvU64_LO32(pIntr->intrTopEnMask), 643 pThreadState); 644 } 645 if (NvU64_HI32(pIntr->intrTopEnMask) != 0) 646 { 647 intrWriteRegTopEnClear_HAL(pGpu, pIntr, 648 1, 649 NvU64_HI32(pIntr->intrTopEnMask), 650 pThreadState); 651 } 652 } 653 654 /*! 655 * @brief Clears a given interrupt vector at the dev_ctrl LEAF level 656 * 657 * @param[in] pGpu OBJGPU pointer 658 * @param[in] pIntr Intr pointer 659 * @param[in] intrVector interrupt vector to clear 660 * @param[in] pThreadState thread state node pointer 661 */ 662 void 663 intrClearLeafVector_TU102 664 ( 665 OBJGPU *pGpu, 666 Intr *pIntr, 667 NvU32 intrVector, 668 THREAD_STATE_NODE *pThreadState 669 ) 670 { 671 NvU32 reg = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVector); 672 NvU32 bit = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVector); 673 674 intrWriteRegLeaf_HAL(pGpu, pIntr, reg, NVBIT(bit), pThreadState); 675 } 676 677 /*! 678 * @brief Checks if the given interrupt vector is pending at the dev_ctrl LEAF level 679 * 680 * @param[in] pGpu OBJGPU pointer 681 * @param[in] pIntr Intr pointer 682 * @param[in] intrVector interrupt vector to check 683 * @param[in] pThreadState thread state node pointer 684 */ 685 NvBool 686 intrIsVectorPending_TU102 687 ( 688 OBJGPU *pGpu, 689 Intr *pIntr, 690 NvU32 intrVector, 691 THREAD_STATE_NODE *pThreadState 692 ) 693 { 694 NvU32 reg = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVector); 695 NvU32 bit = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVector); 696 NvU32 val = intrReadRegLeaf_HAL(pGpu, pIntr, reg, pThreadState); 697 698 if (val & NVBIT(bit)) 699 return NV_TRUE; 700 return NV_FALSE; 701 } 702 703 /*! 704 * @brief Returns the INTR_LEAF mask for RM owned client interrupts. 705 * 706 * NOTE: Must be called after @intrStateLoad_TU102. This code assumes that the 707 * client owned interrupts are in the same leaf register. We would have checked 708 * whether that assumption is true in @intrStateLoad_TU102 and if it was 709 * violated, we'd have failed state load. 710 */ 711 static NvU32 712 _intrGetUvmLeafMask_TU102 713 ( 714 OBJGPU *pGpu, 715 Intr *pIntr 716 ) 717 { 718 NvU32 val = 0; 719 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 720 if (pKernelGmmu != NULL) 721 { 722 NvBool bRmOwnsReplayableFault = !!(pKernelGmmu->uvmSharedIntrRmOwnsMask & RM_UVM_SHARED_INTR_MASK_MMU_REPLAYABLE_FAULT_NOTIFY); 723 NvBool bRmOwnsAccessCntr = !!(pKernelGmmu->uvmSharedIntrRmOwnsMask & RM_UVM_SHARED_INTR_MASK_HUB_ACCESS_COUNTER_NOTIFY); 724 725 if (bRmOwnsReplayableFault) 726 { 727 val |= NVBIT(NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(pIntr->replayableFaultIntrVector)); 728 } 729 if (bRmOwnsAccessCntr) 730 { 731 val |= NVBIT(NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(pIntr->accessCntrIntrVector)); 732 } 733 } 734 return val; 735 } 736 737 /*! 738 * @brief Returns a 64 bit mask, where all the bits set to 0 are the ones we 739 * intend to leave enabled in the client shared subtree even when we disable 740 * interrupts (for example, when we take the GPU lock). 741 * 742 * The non-replayable fault interrupt is shared with the client, and in the 743 * top half of the interrupt handler, as such, we only copy fault packets from 744 * the HW buffer to the appropriate SW buffers. 745 * The fifo non-stall interrupt is used for runlist events, which also does not 746 * need to be blocked by the GPU lock (existing codepaths already ascertain that 747 * this is safe, so we're maintaining that behavior in NV_CTRL). 748 */ 749 NvU64 750 intrGetUvmSharedLeafEnDisableMask_TU102 751 ( 752 OBJGPU *pGpu, 753 Intr *pIntr 754 ) 755 { 756 NvU32 intrVectorNonReplayableFault; 757 NvU32 intrVectorFifoNonstall = NV_INTR_VECTOR_INVALID; 758 NvU64 mask = 0; 759 NV2080_INTR_CATEGORY_SUBTREE_MAP uvmShared; 760 761 // GSP RM services both MMU non-replayable fault and FIFO interrupts 762 if (IS_GSP_CLIENT(pGpu)) 763 { 764 return ~mask; 765 } 766 767 intrVectorNonReplayableFault = intrGetVectorFromEngineId(pGpu, pIntr, MC_ENGINE_IDX_NON_REPLAYABLE_FAULT, NV_FALSE); 768 769 if (!IS_VIRTUAL(pGpu)) 770 { 771 intrVectorFifoNonstall = intrGetVectorFromEngineId(pGpu, pIntr, MC_ENGINE_IDX_FIFO, NV_TRUE); 772 } 773 774 if (intrVectorFifoNonstall != NV_INTR_VECTOR_INVALID) 775 { 776 // Ascertain that they're in the same subtree and same leaf 777 NV_ASSERT(NV_CTRL_INTR_GPU_VECTOR_TO_SUBTREE(intrVectorNonReplayableFault) == 778 NV_CTRL_INTR_GPU_VECTOR_TO_SUBTREE(intrVectorFifoNonstall)); 779 NV_ASSERT(NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVectorNonReplayableFault) == 780 NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVectorFifoNonstall)); 781 } 782 783 NV_ASSERT_OK(intrGetSubtreeRange(pIntr, 784 NV2080_INTR_CATEGORY_UVM_SHARED, 785 &uvmShared)); 786 // 787 // Ascertain that we only have 1 client subtree (we assume 788 // this since we cache only 64 bits). 789 // 790 NV_ASSERT(uvmShared.subtreeStart == uvmShared.subtreeEnd); 791 792 // 793 // Ascertain that we only have 2 subtrees as this is what we currently 794 // support by only caching 64 bits 795 // 796 NV_ASSERT( 797 (NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_END(uvmShared.subtreeEnd) - 1) == 798 NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_START(uvmShared.subtreeStart)); 799 800 801 // Ascertain that they're in the first leaf 802 NV_ASSERT( 803 NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVectorNonReplayableFault) == 804 NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_START(uvmShared.subtreeStart)); 805 806 mask |= NVBIT64(NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVectorNonReplayableFault)); 807 808 if (intrVectorFifoNonstall != NV_INTR_VECTOR_INVALID) 809 { 810 mask |= NVBIT64(NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVectorFifoNonstall)); 811 } 812 813 mask <<= 32; 814 815 return ~mask; 816 } 817 818 /*! 819 * @brief Gets list of engines with pending stalling interrupts as per the interrupt trees 820 * 821 * @param[in] pGpu 822 * @param[in] pIntr 823 * @param[out] pEngines List of engines that have pending stall interrupts 824 * @param[in] pThreadState 825 * 826 * @return NV_OK if the list of engines that have pending stall interrupts was retrieved 827 */ 828 NV_STATUS 829 intrGetPendingStallEngines_TU102 830 ( 831 OBJGPU *pGpu, 832 Intr *pIntr, 833 MC_ENGINE_BITVECTOR *pEngines, 834 THREAD_STATE_NODE *pThreadState 835 ) 836 { 837 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 838 InterruptTable *pIntrTable; 839 InterruptTableIter iter; 840 NvU64 sanityCheckSubtreeMask = 0; 841 NvU32 numIntrLeaves = intrGetNumLeaves_HAL(pGpu, pIntr); 842 NV_ASSERT(numIntrLeaves <= NV_MAX_INTR_LEAVES); 843 NvU32 intrLeafValues[NV_MAX_INTR_LEAVES]; 844 845 sanityCheckSubtreeMask = intrGetIntrTopLegacyStallMask(pIntr); 846 847 portMemSet(intrLeafValues, 0, numIntrLeaves * sizeof(NvU32)); 848 bitVectorClrAll(pEngines); 849 850 // dev_ctrl tree is not used for legacy-vGPU 851 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu)) 852 { 853 return NV_OK; 854 } 855 856 NV_ASSERT_OK_OR_RETURN(intrGetLeafStatus_HAL(pGpu, pIntr, intrLeafValues, pThreadState)); 857 NV_ASSERT_OK_OR_RETURN(intrGetInterruptTable_HAL(pGpu, pIntr, &pIntrTable)); 858 859 for (iter = vectIterAll(pIntrTable); vectIterNext(&iter);) 860 { 861 INTR_TABLE_ENTRY *pEntry = iter.pValue; 862 NvU32 intrVector = pEntry->intrVector; 863 NvU32 leaf; 864 NvU32 leafIndex; 865 NvU32 leafBit; 866 867 // Check if this engine has a valid stalling interrupt vector in the new tree 868 if (intrVector == NV_INTR_VECTOR_INVALID) 869 { 870 continue; 871 } 872 873 leafIndex = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVector); 874 leafBit = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVector); 875 876 // 877 // The leafIndex must be within the stall tree. Try to catch this on 878 // pre-release drivers. Don't need this on release drivers since this 879 // is only to catch issues during code development. Should never happen 880 // in practice 881 // 882 883 if ((sanityCheckSubtreeMask & 884 NVBIT64(NV_CTRL_INTR_LEAF_IDX_TO_SUBTREE(leafIndex))) == 0) 885 { 886 NV_PRINTF(LEVEL_ERROR, 887 "MC_ENGINE_IDX %u has invalid stall intr vector %u\n", 888 pEntry->mcEngine, 889 intrVector); 890 DBG_BREAKPOINT(); 891 continue; 892 } 893 // 894 // Check if interrupt is pending. We skip checking if it is enabled in 895 // the leaf register since we mess around with the leaf enables in 896 // the interrupt disable path and will need special casing to handle it 897 // In the transition period from NV_PMC to NV_CTRL, the interrupt vector 898 // for engines that haven't yet switched would be INVALID, so we'd never 899 // get here anyway. 900 // 901 leaf = intrLeafValues[leafIndex] & NVBIT(leafBit); 902 903 if (leaf == 0) 904 { 905 continue; 906 } 907 908 // Add engine to bitvector 909 bitVectorSet(pEngines, pEntry->mcEngine); 910 } 911 912 if (pKernelGmmu != NULL) 913 { 914 NvBool bRmOwnsReplayableFault = !!(pKernelGmmu->uvmSharedIntrRmOwnsMask & RM_UVM_SHARED_INTR_MASK_MMU_REPLAYABLE_FAULT_NOTIFY); 915 NvBool bRmOwnsAccessCntr = !!(pKernelGmmu->uvmSharedIntrRmOwnsMask & RM_UVM_SHARED_INTR_MASK_HUB_ACCESS_COUNTER_NOTIFY); 916 917 // 918 // Add non replayable fault engine if there is something in the shadow buffer, 919 // as the interrupt itself is cleared earlier. 920 // 921 if (portAtomicOrS32(kgmmuGetFatalFaultIntrPendingState(pKernelGmmu, GPU_GFID_PF), 0)) 922 { 923 bitVectorSet(pEngines, MC_ENGINE_IDX_NON_REPLAYABLE_FAULT); 924 } 925 926 // If AM doesn't own either UVM interrupt, remove it from the pending mask 927 if (!bRmOwnsReplayableFault) 928 { 929 bitVectorClr(pEngines, MC_ENGINE_IDX_REPLAYABLE_FAULT); 930 } 931 932 if (!bRmOwnsAccessCntr) 933 { 934 bitVectorClr(pEngines, MC_ENGINE_IDX_ACCESS_CNTR); 935 } 936 } 937 938 return NV_OK; 939 } 940 941 /*! 942 * @brief Checks and services MMU non=replayable fault interrupts that may not 943 * have been queued as DPC if we didn't get the GPU lock in the top half. 944 * 945 * If the MMU non-replayable fault interrupt was the only interrupt pending and 946 * we were unable to get the GPU lock in the top half, a DPC would not have 947 * been scheduled, but the non-replayable fault interrupt packet(s) would have 948 * been copied into the SW buffers. Try to do the bottom-half servicing of 949 * interrupts that could have been cleared in the top half. 950 * 951 * @param[in] pGpu OBJGPU pointer 952 * @param[in] pIntr Intr pointer 953 * @param[in] pThreadState THREAD_STATE_NODE pointer 954 */ 955 NV_STATUS 956 intrCheckAndServiceNonReplayableFault_TU102 957 ( 958 OBJGPU *pGpu, 959 Intr *pIntr, 960 THREAD_STATE_NODE *pThreadState 961 ) 962 { 963 NV_STATUS status = NV_OK; 964 return status; 965 } 966 967 /*! 968 * @brief Retrigger interrupts by toggling enables of those subtrees not 969 * toggled at top level in GPU lock acquire/release. Subtrees that are toggled 970 * at top level will be implicitly re-armed when the GPU lock is released. 971 * 972 * @param[in] pGpu GPU Object 973 * @param[in] pIntr Intr Object 974 */ 975 void 976 intrRetriggerTopLevel_TU102 977 ( 978 OBJGPU *pGpu, 979 Intr *pIntr 980 ) 981 { 982 NvU64 mask = 0; 983 984 // We use the assumption that 1 == DISABLE below 985 ct_assert(NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_CLEAR_SUBTREE_DISABLE == 1); 986 987 // 988 // Toggle the top level interrupt enables for all interrupts whose top 989 // level enables are not toggled during RM lock acquire/release. 990 // 991 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_ALTERNATE_TREE_HANDLE_LOCKLESS) && 992 pGpu->getProperty(pGpu, PDB_PROP_GPU_ALTERNATE_TREE_ENABLED)) 993 { 994 // 995 // 1. If the alternate tree (nonstall tree) is handled "lockless", it 996 // is not disabled during RM lock acquire, so needs re-arming. 997 // 998 mask |= intrGetIntrTopNonStallMask_HAL(pGpu, pIntr); 999 } 1000 1001 // 2. UVM-owned interrupt tree (never disabled at top level) 1002 mask |= intrGetIntrTopCategoryMask(pIntr, NV2080_INTR_CATEGORY_UVM_OWNED); 1003 1004 // 3. UVM/RM shared interrupt tree (never disabled at top level) 1005 mask |= intrGetIntrTopCategoryMask(pIntr, NV2080_INTR_CATEGORY_UVM_SHARED); 1006 1007 // 1008 // Bypass GPU_REG_WR32 that requires the GPU lock to be held (for some 1009 // register filters) by using the OS interface directly. 1010 // 1011 // Clear all first, then set 1012 // 1013 if (NvU64_LO32(mask) != 0 && 1014 0 < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_CLEAR__SIZE_1) 1015 { 1016 osGpuWriteReg032(pGpu, 1017 GPU_GET_VREG_OFFSET(pGpu, 1018 NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_CLEAR(0)), 1019 NvU64_LO32(mask)); 1020 } 1021 if (NvU64_HI32(mask) != 0 && 1022 1 < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_CLEAR__SIZE_1) 1023 { 1024 osGpuWriteReg032(pGpu, 1025 GPU_GET_VREG_OFFSET(pGpu, 1026 NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_CLEAR(1)), 1027 NvU64_HI32(mask)); 1028 } 1029 if (NvU64_LO32(mask) != 0 && 1030 0 < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_SET__SIZE_1) 1031 { 1032 osGpuWriteReg032(pGpu, 1033 GPU_GET_VREG_OFFSET(pGpu, 1034 NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_SET(0)), 1035 NvU64_LO32(mask)); 1036 } 1037 if (NvU64_HI32(mask) != 0 && 1038 1 < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_SET__SIZE_1) 1039 { 1040 osGpuWriteReg032(pGpu, 1041 GPU_GET_VREG_OFFSET(pGpu, 1042 NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_SET(1)), 1043 NvU64_HI32(mask)); 1044 } 1045 } 1046 1047 /*! 1048 * @brief read all leaf interrupt registers into an array 1049 * 1050 * @param[in] pGpu OBJGPU pointer 1051 * @param[in] pIntr Intr pointer 1052 * @param[out] pLeafVals array that leaf values will be returned in. 1053 * assumes that it is sufficiently large 1054 */ 1055 NV_STATUS 1056 intrGetLeafStatus_TU102 1057 ( 1058 OBJGPU *pGpu, 1059 Intr *pIntr, 1060 NvU32 *pLeafVals, 1061 THREAD_STATE_NODE *pThreadState 1062 ) 1063 { 1064 NvU32 subtreeIndex; 1065 NvU32 leafIndex; 1066 1067 FOR_EACH_INDEX_IN_MASK(64, subtreeIndex, 1068 intrGetIntrTopLegacyStallMask(pIntr)) 1069 { 1070 leafIndex = NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_START(subtreeIndex); 1071 if (pIntr->getProperty(pIntr, PDB_PROP_INTR_READ_ONLY_EVEN_NUMBERED_INTR_LEAF_REGS)) 1072 { 1073 // 1074 // Since we know that on Turing, only one leaf per subtree has valid 1075 // interrupts, optimize to only read those leaf registers. 1076 // 1077 pLeafVals[leafIndex] = intrReadRegLeaf_HAL(pGpu, pIntr, leafIndex, pThreadState); 1078 } 1079 else 1080 { 1081 for (; leafIndex <= NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_END(subtreeIndex); leafIndex++) 1082 { 1083 pLeafVals[leafIndex] = intrReadRegLeaf_HAL(pGpu, pIntr, leafIndex, pThreadState); 1084 } 1085 } 1086 } FOR_EACH_INDEX_IN_MASK_END 1087 1088 return NV_OK; 1089 } 1090 1091 /*! 1092 * @brief Returns a bitfield with only MC_ENGINE_IDX_DISP set if it's pending 1093 * On Turing+, there are multiple stall interrupt registers, and reading them 1094 * all in the top half would be expensive. To saitsfy bug 3220319, only find out 1095 * if display interrupt is pending. Fix this in bug 3279300 1096 * 1097 * @param[in] pGpu 1098 * @param[in] pMc 1099 * @param[out] pEngines List of engines that have pending stall interrupts 1100 * @param[in] pThreadState 1101 * 1102 * @return NV_OK if the list of engines that have pending stall interrupts was retrieved 1103 */ 1104 NV_STATUS 1105 intrGetPendingDisplayIntr_TU102 1106 ( 1107 OBJGPU *pGpu, 1108 Intr *pIntr, 1109 PMC_ENGINE_BITVECTOR pEngines, 1110 THREAD_STATE_NODE *pThreadState 1111 ) 1112 { 1113 KernelDisplay *pKernelDisplay = GPU_GET_KERNEL_DISPLAY(pGpu); 1114 1115 bitVectorClrAll(pEngines); 1116 1117 if (IS_GPU_GC6_STATE_ENTERED(pGpu)) 1118 { 1119 return NV_ERR_GPU_NOT_FULL_POWER; 1120 } 1121 1122 if (!API_GPU_ATTACHED_SANITY_CHECK(pGpu)) 1123 { 1124 return NV_ERR_GPU_IS_LOST; 1125 } 1126 1127 if (pKernelDisplay != NULL && kdispGetDeferredVblankHeadMask(pKernelDisplay)) 1128 { 1129 // Deferred vblank is pending which we need to handle 1130 bitVectorSet(pEngines, MC_ENGINE_IDX_DISP); 1131 // Nothing else to set here, return early 1132 return NV_OK; 1133 } 1134 1135 if (pIntr->displayIntrVector == NV_INTR_VECTOR_INVALID) 1136 { 1137 return NV_OK; 1138 } 1139 else if (intrIsVectorPending_TU102(pGpu, pIntr, pIntr->displayIntrVector, pThreadState)) 1140 { 1141 bitVectorSet(pEngines, MC_ENGINE_IDX_DISP); 1142 } 1143 1144 return NV_OK; 1145 } 1146 1147 1148 /** 1149 * @brief Enable or disable the display interrupt. 1150 * This implements the missing functionality of PDB_PROP_INTR_USE_INTR_MASK_FOR_LOCKING 1151 * for Turing+: The ability to leave display interrrupts unmasked while the GPU lock is held 1152 * The PMC_INTR_MASK HW registers were deprecated in Pascal, but the Pascal-Volta interrupt 1153 * code still emulates them in SW. The Turing+ code did not implement any of the masking code, 1154 * but as seen in bug 3152190, the ability to leave the display interupt unmasked is still 1155 * needed. The ability to unmask the interrupts to enable them to show up in interrupt registers 1156 * is not needed, so this call is not needed at callsites that just do that 1157 * (_intrEnterCriticalSection / _intrExitCriticalSection) 1158 * This whole interrupts code mess needs refactored - bug 3279300 1159 * 1160 * @param[in] pGpu 1161 * @param[in] pIntr 1162 * @param[in] bEnable 1163 * @param[in] pThreadState - Needed for bypassing register filters in unlocked top half 1164 * 1165 */ 1166 void 1167 intrSetDisplayInterruptEnable_TU102 1168 ( 1169 OBJGPU *pGpu, 1170 Intr *pIntr, 1171 NvBool bEnable, 1172 THREAD_STATE_NODE *pThreadState 1173 ) 1174 { 1175 if (pIntr->displayIntrVector == NV_INTR_VECTOR_INVALID) 1176 { 1177 return; 1178 } 1179 1180 NvU32 reg = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(pIntr->displayIntrVector); 1181 NvU32 leafBit = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(pIntr->displayIntrVector); 1182 1183 if (bEnable) 1184 { 1185 intrWriteRegLeafEnSet_HAL(pGpu, pIntr, reg, NVBIT(leafBit), pThreadState); 1186 } 1187 else 1188 { 1189 intrWriteRegLeafEnClear_HAL(pGpu, pIntr, reg, NVBIT(leafBit), pThreadState); 1190 } 1191 } 1192 1193 /*! 1194 * @brief Dumps interrupt state (registers, vector table) for debugging purpose. 1195 * 1196 * @param[in] pGpu OBJGPU pointer 1197 * @param[in] pIntr Intr pointer 1198 */ 1199 void 1200 intrDumpState_TU102 1201 ( 1202 OBJGPU *pGpu, 1203 Intr *pIntr 1204 ) 1205 { 1206 InterruptTable *pIntrTable; 1207 InterruptTableIter iter; 1208 NvU32 i; 1209 NvU32 intrLeafSize = intrGetLeafSize_HAL(pGpu, pIntr); 1210 1211 NV_PRINTF(LEVEL_INFO, "Interrupt registers:\n"); 1212 for (i = 0; i < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP__SIZE_1; i++) 1213 { 1214 NV_PRINTF(LEVEL_INFO, "INTR_TOP_EN_SET(%u)=0x%x\n", i, 1215 intrReadRegTopEnSet_HAL(pGpu, pIntr, i, NULL)); 1216 } 1217 1218 for (i = 0; i < intrLeafSize; i++) 1219 { 1220 NV_PRINTF(LEVEL_INFO, "INTR_LEAF_EN_SET(%u)=0x%x\n", i, 1221 intrReadRegLeafEnSet_HAL(pGpu, pIntr, i, NULL)); 1222 } 1223 1224 NV_PRINTF(LEVEL_INFO, "MC Interrupt table:\n"); 1225 intrGetInterruptTable_HAL(pGpu, pIntr, &pIntrTable); 1226 1227 for (i = 0, iter = vectIterAll(pIntrTable); vectIterNext(&iter); i++) 1228 { 1229 INTR_TABLE_ENTRY *pEntry = iter.pValue; 1230 PORT_UNREFERENCED_VARIABLE(pEntry); 1231 1232 NV_PRINTF(LEVEL_INFO, 1233 "%2u: mcEngineIdx=%-4u intrVector=%-10u intrVectorNonStall=%-10u\n", 1234 i, 1235 pEntry->mcEngine, 1236 pEntry->intrVector, 1237 pEntry->intrVectorNonStall); 1238 } 1239 } 1240 1241 1242 NV_STATUS 1243 intrInitSubtreeMap_TU102 1244 ( 1245 OBJGPU *pGpu, 1246 Intr *pIntr 1247 ) 1248 { 1249 NV2080_INTR_CATEGORY_SUBTREE_MAP *pCategoryEngine = 1250 &pIntr->subtreeMap[NV2080_INTR_CATEGORY_ESCHED_DRIVEN_ENGINE]; 1251 pCategoryEngine->subtreeStart = NV_CPU_INTR_STALL_SUBTREE_START; 1252 pCategoryEngine->subtreeEnd = NV_CPU_INTR_STALL_SUBTREE_LAST; 1253 1254 NV2080_INTR_CATEGORY_SUBTREE_MAP *pCategoryEngineNotification = 1255 &pIntr->subtreeMap[NV2080_INTR_CATEGORY_ESCHED_DRIVEN_ENGINE_NOTIFICATION]; 1256 pCategoryEngineNotification->subtreeStart = NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_SUBTREE(0); 1257 pCategoryEngineNotification->subtreeEnd = NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_SUBTREE(0); 1258 1259 NV2080_INTR_CATEGORY_SUBTREE_MAP *pCategoryUvmOwned = 1260 &pIntr->subtreeMap[NV2080_INTR_CATEGORY_UVM_OWNED]; 1261 pCategoryUvmOwned->subtreeStart = NV_CPU_INTR_UVM_SUBTREE_START; 1262 pCategoryUvmOwned->subtreeEnd = NV_CPU_INTR_UVM_SUBTREE_LAST; 1263 1264 NV2080_INTR_CATEGORY_SUBTREE_MAP *pCategoryUvmShared = 1265 &pIntr->subtreeMap[NV2080_INTR_CATEGORY_UVM_SHARED]; 1266 pCategoryUvmShared->subtreeStart = NV_CPU_INTR_UVM_SHARED_SUBTREE_START; 1267 pCategoryUvmShared->subtreeEnd = NV_CPU_INTR_UVM_SHARED_SUBTREE_LAST; 1268 1269 return NV_OK; 1270 } 1271 1272 1273 /*! 1274 * @brief Gets the number of leaf registers used 1275 */ 1276 NvU32 1277 intrGetNumLeaves_TU102(OBJGPU *pGpu, Intr *pIntr) 1278 { 1279 ct_assert((NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_END(NV_CPU_INTR_STALL_SUBTREE_LAST) + 1) <= NV_MAX_INTR_LEAVES); 1280 return (NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_END(NV_CPU_INTR_STALL_SUBTREE_LAST) + 1); 1281 } 1282 1283 /*! 1284 * @brief Gets the value of VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF__SIZE_1 1285 */ 1286 NvU32 1287 intrGetLeafSize_TU102(OBJGPU *pGpu, Intr *pIntr) 1288 { 1289 return NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF__SIZE_1; 1290 } 1291 1292 1293 NvU64 1294 intrGetIntrTopNonStallMask_TU102 1295 ( 1296 OBJGPU *pGpu, 1297 Intr *pIntr 1298 ) 1299 { 1300 // TODO Bug 3823562 Remove these asserts 1301 // Compile-time assert against the highest set bit that will be returned 1302 #define NV_CPU_INTR_NOSTALL_SUBTREE_HIGHEST NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_SUBTREE(0) 1303 1304 ct_assert(NV_CPU_INTR_NOSTALL_SUBTREE_HIGHEST < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF__SIZE_1); 1305 ct_assert(NV_CPU_INTR_NOSTALL_SUBTREE_HIGHEST < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_SET__SIZE_1); 1306 ct_assert(NV_CPU_INTR_NOSTALL_SUBTREE_HIGHEST < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_CLEAR__SIZE_1); 1307 1308 NvU64 mask = intrGetIntrTopCategoryMask(pIntr, 1309 NV2080_INTR_CATEGORY_ESCHED_DRIVEN_ENGINE_NOTIFICATION); 1310 1311 // 1312 // On all Ampere+ that use this TU102 HAL, Esched notification interrupts 1313 // are also included in this if PDB_PROP_GPU_SWRL_GRANULAR_LOCKING is set. 1314 // 1315 1316 // Sanity check that Intr.subtreeMap is initialized 1317 NV_ASSERT(mask != 0); 1318 return mask; 1319 } 1320 1321 1322 /*! 1323 * @brief Decode the interrupt mode for SW to use 1324 * 1325 * @param[in] pIntr Intr Object 1326 * @param[in] intrEn the enable value to decode 1327 * 1328 * @returns the value of the decoded interrupt 1329 * 1330 */ 1331 NvU32 1332 intrDecodeStallIntrEn_TU102 1333 ( 1334 OBJGPU *pGpu, 1335 Intr *pIntr, 1336 NvU32 intrEn 1337 ) 1338 { 1339 // mask with interrupts that RM owns 1340 if (pGpu->pmcRmOwnsIntrMask != 0) 1341 { 1342 intrEn &= pGpu->pmcRmOwnsIntrMask; 1343 } 1344 1345 switch (intrEn) 1346 { 1347 case INTERRUPT_MASK_DISABLED: 1348 return INTERRUPT_TYPE_DISABLED; 1349 case INTERRUPT_MASK_HARDWARE: 1350 return INTERRUPT_TYPE_HARDWARE; 1351 case INTERRUPT_MASK_SOFTWARE: 1352 return INTERRUPT_TYPE_SOFTWARE; 1353 default: 1354 return INTERRUPT_TYPE_MULTI; 1355 } 1356 } 1357