1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2017-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "core/core.h"
25 #include "kernel/gpu/intr/intr.h"
26 #include "gpu/gpu.h"
27 #include "kernel/gpu/intr/engine_idx.h"
28 #include "gpu/bif/kernel_bif.h"
29 #include "objtmr.h"
30 #include "gpu/uvm/uvm.h"
31 #include "os/os.h"
32 #include "vgpu/vgpu_events.h"
33 #include "vgpu/rpc.h"
34 #include "gpu/mmu/kern_gmmu.h"
35 #include "libraries/nvport/nvport.h"
36 #include "gpu/disp/kern_disp.h"
37 
38 #include "published/turing/tu102/dev_ctrl.h"
39 #include "published/turing/tu102/dev_vm.h"
40 #include "published/turing/tu102/dev_vm_addendum.h"
41 /*!
42  * @brief Get the base interrupt vector to use when indexing engine nonstall
43  *        interrupts
44  *
45  * @param[in]   pGpu    OBJGPU pointer
46  * @param[in]   pIntr   Intr pointer
47  *
48  * @returns  the base interrupt vector for engine nonstall interrupts
49  */
50 NvU32
51 intrGetNonStallBaseVector_TU102
52 (
53     OBJGPU     *pGpu,
54     Intr       *pIntr
55 )
56 {
57     NvU32 base = 0;
58 
59     if (!IS_VIRTUAL(pGpu))
60     {
61         base = GPU_REG_RD32(pGpu, NV_CTRL_LEGACY_ENGINE_NONSTALL_INTR_BASE_VECTORID);
62     }
63     else
64     {
65         NV_STATUS status = NV_OK;
66         NV_RM_RPC_VGPU_PF_REG_READ32(pGpu, NV_CTRL_LEGACY_ENGINE_NONSTALL_INTR_BASE_VECTORID, &base, status);
67     }
68     return base;
69 }
70 
71 //
72 // Static interface functions
73 //
74 static NvU32     _intrGetUvmLeafMask_TU102(OBJGPU *, Intr *);
75 static void      _intrEnableStall_TU102(OBJGPU *, Intr *, THREAD_STATE_NODE *pThreadState);
76 static void      _intrDisableStall_TU102(OBJGPU *, Intr *, THREAD_STATE_NODE *pThreadState);
77 static void      _intrClearLeafEnables_TU102(OBJGPU *pGpu, Intr *pIntr);
78 
79 // Compile time asserts to make sure we don't write beyond the leaf register array
80 
81 ct_assert(NV_CPU_INTR_STALL_SUBTREE_START   < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF__SIZE_1);
82 ct_assert(NV_CPU_INTR_STALL_SUBTREE_LAST    < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF__SIZE_1);
83 ct_assert(NV_CPU_INTR_STALL_SUBTREE_START   < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_SET__SIZE_1);
84 ct_assert(NV_CPU_INTR_STALL_SUBTREE_LAST    < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_SET__SIZE_1);
85 ct_assert(NV_CPU_INTR_STALL_SUBTREE_START   < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_CLEAR__SIZE_1);
86 ct_assert(NV_CPU_INTR_STALL_SUBTREE_LAST    < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_CLEAR__SIZE_1);
87 
88 ct_assert(NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF__SIZE_1 == NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_SET__SIZE_1);
89 ct_assert(NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF__SIZE_1 == NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_CLEAR__SIZE_1);
90 
91 //
92 // Few design issues and intentions stated upfront:
93 // Q: Why are interrupts being enabled/ disabled here instead of their respective HALs?
94 // A: The intent is to keep the "alternate tree" (nonstall tree) invisible from the rest of RM.
95 //
96 // Q: Then how does a HAL register its interrupts on this alternate tree?
97 // A: It does not. The alternate tree is an aberration of nature meant to service **non stall interrupts**
98 //    without using locking.
99 //
100 // Q: If the alternate tree does not respect locks taken by osAcquireRmSema then how do we prevent
101 //    race conditions?
102 // A: We dont!! The plan here is to *manually* inspect every piece of code that gets executed on the ISR/DPC
103 //    for this tree and make sure concurrent actions from elsewhere do not lead us in an inconsistent state.
104 //    In future before adding code to this tree, **carefully inspect it yourself**.
105 //
106 // A final note, if and when RM gets fine grained locks in the main interrupt tree, it might be worthwhile
107 // getting rid of this. More code is more complexity!!
108 //
109 NV_STATUS
110 intrStateLoad_TU102
111 (
112     OBJGPU  *pGpu,
113     Intr *pIntr,
114     NvU32    flags
115 )
116 {
117     NV_STATUS status = NV_OK;
118     InterruptTable    *pIntrTable;
119     InterruptTableIter iter;
120 
121     NV_ASSERT_OK_OR_RETURN(intrGetInterruptTable_HAL(pGpu, pIntr, &pIntrTable));
122 
123     //
124     // Make sure all leaf nodes are disabled before we enable them.  Older drivers
125     // and versions of mods leave them enabled.  Bug 3299004.
126     //
127     _intrClearLeafEnables_TU102(pGpu, pIntr);
128 
129     //
130     // Enable interrupts either in legacy NV_PMC_INTR tree or new NV_CTRL tree
131     // as per the MC interrupt vector table.
132     //
133     // We have to make an exception for the TMR engine though, since for now,
134     // it reports into both PMC and dev_ctrl. We need the PTIMER alarm in
135     // PMC, which is the only place where it reports, and we need it in
136     // dev_ctrl for the countdown/callback timer, which we use in the PF
137     // and all the VFs
138     //
139     pGpu->pmcRmOwnsIntrMask = INTERRUPT_MASK_DISABLED;
140     for (iter = vectIterAll(pIntrTable); vectIterNext(&iter);)
141     {
142         INTR_TABLE_ENTRY *pEntry = iter.pValue;
143         if (pEntry->pmcIntrMask != NV_PMC_INTR_INVALID_MASK)
144         {
145             pGpu->pmcRmOwnsIntrMask |= pEntry->pmcIntrMask;
146 
147             if (pEntry->mcEngine != MC_ENGINE_IDX_TMR)
148                 continue;
149         }
150 
151         if (pEntry->intrVector != NV_INTR_VECTOR_INVALID)
152         {
153             intrEnableLeaf_HAL(pGpu, pIntr, pEntry->intrVector);
154         }
155 
156         if ((pEntry->intrVectorNonStall != NV_INTR_VECTOR_INVALID)
157             )
158         {
159             intrEnableLeaf_HAL(pGpu, pIntr, pEntry->intrVectorNonStall);
160         }
161     }
162 
163     status = intrCacheIntrFields_HAL(pGpu, pIntr);
164     if (status != NV_OK)
165     {
166         goto exit;
167     }
168 
169 exit:
170     if (pIntr->getProperty(pIntr, PDB_PROP_INTR_ENABLE_DETAILED_LOGS))
171     {
172         intrDumpState_HAL(pGpu, pIntr);
173     }
174 
175     return status;
176 }
177 
178 NV_STATUS
179 intrStateUnload_TU102
180 (
181     OBJGPU  *pGpu,
182     Intr *pIntr,
183     NvU32    flags
184 )
185 {
186     // Disable all interrupts since we're unloading
187 
188     intrWriteRegTopEnClear_HAL(pGpu, pIntr, 0, 0xFFFFFFFF, NULL);
189     intrWriteRegTopEnClear_HAL(pGpu, pIntr, 1, 0xFFFFFFFF, NULL);
190 
191     _intrClearLeafEnables_TU102(pGpu, pIntr);
192 
193     return NV_OK;
194 }
195 
196 /*!
197  * @brief Cache few Intr fields for ease of use in interrupt or RPC context.
198  *
199  * @param[in]   pGpu    OBJGPU pointer
200  * @param[in]   pIntr   Intr pointer
201  */
202 NV_STATUS
203 intrCacheIntrFields_TU102
204 (
205     OBJGPU            *pGpu,
206     Intr              *pIntr
207 )
208 {
209     NV_STATUS status = NV_OK;
210 
211     {
212         NV2080_INTR_CATEGORY_SUBTREE_MAP uvmShared;
213         NV_ASSERT_OK_OR_RETURN(
214             intrGetSubtreeRange(pIntr,
215                                 NV2080_INTR_CATEGORY_UVM_SHARED,
216                                 &uvmShared));
217         //
218         // Assert to make sure we have only one client shared subtree.
219         // The below code assumes that.
220         //
221         NV_ASSERT_OR_RETURN(uvmShared.subtreeStart == uvmShared.subtreeEnd,
222                             NV_ERR_INVALID_STATE);
223 
224         // Now cache the leaf enable mask for the subtree shared with the client
225         NvU32 leafEnHi = intrReadRegLeafEnSet_HAL(pGpu, pIntr,
226             NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_START(uvmShared.subtreeStart),
227             NULL);
228         NvU32 leafEnLo = intrReadRegLeafEnSet_HAL(pGpu, pIntr,
229             NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_END(uvmShared.subtreeStart),
230             NULL);
231 
232         pIntr->uvmSharedCpuLeafEn = ((NvU64)(leafEnHi) << 32) | leafEnLo;
233         pIntr->uvmSharedCpuLeafEnDisableMask =
234             intrGetUvmSharedLeafEnDisableMask_HAL(pGpu, pIntr);
235     }
236 
237     //
238     // Cache the CPU_INTR_TOP_EN mask to clear when disabling stall
239     // interrupts (other interrupts are either not disabled or disabled
240     // selectively at leaf level)
241     //
242     pIntr->intrTopEnMask |= intrGetIntrTopLockedMask(pGpu, pIntr);
243 
244     OBJDISP *pDisp = GPU_GET_DISP(pGpu);
245 
246     // Cache client owned, shared interrupt, and display vectors for ease of use later
247     pIntr->accessCntrIntrVector      = intrGetVectorFromEngineId(pGpu, pIntr, MC_ENGINE_IDX_ACCESS_CNTR,      NV_FALSE);
248     if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
249     {
250         pIntr->replayableFaultIntrVector = intrGetVectorFromEngineId(pGpu, pIntr, MC_ENGINE_IDX_REPLAYABLE_FAULT, NV_FALSE);
251     }
252     else
253     {
254         pIntr->replayableFaultIntrVector = intrGetVectorFromEngineId(pGpu, pIntr, MC_ENGINE_IDX_REPLAYABLE_FAULT_CPU, NV_FALSE);
255     }
256     if (pDisp != NULL)
257     {
258         pIntr->displayIntrVector     = intrGetVectorFromEngineId(pGpu, pIntr, MC_ENGINE_IDX_DISP,             NV_FALSE);
259     }
260     else
261     {
262         pIntr->displayIntrVector     = NV_INTR_VECTOR_INVALID;
263     }
264 
265     //
266     // Ensure that both UVM vectors are in the same leaf register (check right
267     // now so we don't have to check later in latency critical paths where this
268     // is assumed to be true)
269     //
270     if (pIntr->replayableFaultIntrVector != NV_INTR_VECTOR_INVALID && pIntr->accessCntrIntrVector != NV_INTR_VECTOR_INVALID)
271     {
272         if (NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(pIntr->replayableFaultIntrVector) != NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(pIntr->accessCntrIntrVector))
273         {
274             NV_PRINTF(LEVEL_ERROR, "UVM interrupt vectors for replayable fault 0x%x "
275                 "and access counter 0x%x are in different CPU_INTR_LEAF registers\n",
276                 pIntr->replayableFaultIntrVector, pIntr->accessCntrIntrVector);
277             DBG_BREAKPOINT();
278             status = NV_ERR_GENERIC;
279             goto exit;
280         }
281     }
282 
283     {
284         //
285         // Now ensure that they're in the expected subtree (check right now so
286         // we don't have to check later in latency critical paths where this is
287         // assumed to be true)
288         //
289         NV2080_INTR_CATEGORY_SUBTREE_MAP uvmOwned;
290         NvU32 accessCntrSubtree = NV_CTRL_INTR_GPU_VECTOR_TO_SUBTREE(
291             pIntr->accessCntrIntrVector);
292         NV_ASSERT_OK_OR_RETURN(
293             intrGetSubtreeRange(pIntr,
294                                 NV2080_INTR_CATEGORY_UVM_OWNED,
295                                 &uvmOwned));
296         if (!(uvmOwned.subtreeStart <= accessCntrSubtree &&
297               accessCntrSubtree     <= uvmOwned.subtreeEnd))
298         {
299             NV_PRINTF(LEVEL_ERROR,
300                 "UVM owned interrupt vector for access counter is in an unexpected subtree\n"
301                 "Expected range = [0x%x, 0x%x], actual = 0x%x\n",
302                 uvmOwned.subtreeStart, uvmOwned.subtreeEnd, accessCntrSubtree);
303             DBG_BREAKPOINT();
304             status = NV_ERR_GENERIC;
305             goto exit;
306         }
307     }
308 
309 exit:
310 
311     return status;
312 }
313 
314 /*!
315  * @brief Get the base interrupt vector to use when indexing engine stall
316  *        interrupts
317  *
318  * @param[in]   pGpu    OBJGPU pointer
319  * @param[in]   pIntr   Intr pointer
320  *
321  * @returns  the base interrupt vector for engine stall interrupts
322  */
323 NvU32
324 intrGetStallBaseVector_TU102
325 (
326     OBJGPU     *pGpu,
327     Intr       *pIntr
328 )
329 {
330     NvU32 base = GPU_REG_RD32(pGpu, NV_CTRL_LEGACY_ENGINE_STALL_INTR_BASE_VECTORID);
331     return base;
332 }
333 
334 /*!
335  * @brief Enable a given interrupt vector in dev_ctrl at leaf level
336  *
337  * @param[in]   pGpu          OBJGPU pointer
338  * @param[in]   pIntr         Intr pointer
339  * @param[in]   intrVector    nonstall interrupt vector to enable
340  */
341 void
342 intrEnableLeaf_TU102
343 (
344     OBJGPU     *pGpu,
345     Intr       *pIntr,
346     NvU32       intrVector
347 )
348 {
349     NvU32 reg = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVector);
350     NvU32 leafBit = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVector);
351     NvU32 intrLeafEnSetSize = intrGetLeafSize_HAL(pGpu, pIntr);
352 
353     if (reg >= intrLeafEnSetSize)
354     {
355         NV_PRINTF(LEVEL_ERROR, "Exceeding the range of INTR leaf registers. "
356             "intrVector = 0x%x, Reg = 0x%x\n", intrVector, reg);
357         NV_ASSERT(0);
358         return;
359     }
360 
361     intrWriteRegLeafEnSet_HAL(pGpu, pIntr, reg, NVBIT(leafBit), NULL);
362 }
363 
364 /*!
365  * @brief Disable a given interrupt vector in dev_ctrl at leaf level
366  *
367  * @param[in]   pGpu          OBJGPU pointer
368  * @param[in]   pIntr         Intr pointer
369  * @param[in]   intrVector    nonstall interrupt vector to enable
370  */
371 void
372 intrDisableLeaf_TU102
373 (
374     OBJGPU     *pGpu,
375     Intr       *pIntr,
376     NvU32       intrVector
377 )
378 {
379     NvU32 reg = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVector);
380     NvU32 leafBit = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVector);
381     NvU32 intrLeafEnClearSize = intrGetLeafSize_HAL(pGpu, pIntr);
382 
383     if (reg >= intrLeafEnClearSize)
384     {
385         NV_PRINTF(LEVEL_ERROR, "Exceeding the range of INTR leaf registers. "
386             "intrVector = 0x%x, Reg = 0x%x\n", intrVector, reg);
387         NV_ASSERT(0);
388         return;
389     }
390 
391     intrWriteRegLeafEnClear_HAL(pGpu, pIntr, reg, NVBIT(leafBit), NULL);
392 }
393 
394 /*!
395  * @brief Disable/Enable stall interrupts in dev_ctrl
396  *
397  * @param[in]   pGpu          OBJGPU pointer
398  * @param[in]   pIntr         Intr pointer
399  * @param[in]   pThreadState  thread state node pointer
400  */
401 void
402 intrSetStall_TU102
403 (
404     OBJGPU            *pGpu,
405     Intr              *pIntr,
406     NvU32              intrType,
407     THREAD_STATE_NODE *pThreadState
408 )
409 {
410     // dev_ctrl tree is not used for legacy-vGPU
411     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu))
412     {
413         return;
414     }
415 
416     if (intrType == INTERRUPT_TYPE_DISABLED)
417     {
418         _intrDisableStall_TU102(pGpu, pIntr, pThreadState);
419     }
420     else
421     {
422         _intrEnableStall_TU102(pGpu, pIntr, pThreadState);
423     }
424 }
425 
426 /*!
427  * @brief Clear all interrupt leaf nodes
428  *
429  * @param[in]   pGpu          OBJGPU pointer
430  * @param[in]   pIntr         Intr pointer
431  */
432 static void _intrClearLeafEnables_TU102
433 (
434     OBJGPU  *pGpu,
435     Intr *pIntr
436 )
437 {
438     NvU32 i;
439     NvU32 intrLeafSize = intrGetLeafSize_HAL(pGpu, pIntr);
440 
441     for (i = 0; i < intrLeafSize; i++)
442     {
443         intrWriteRegLeafEnClear_HAL(pGpu, pIntr, i, 0xFFFFFFFF, NULL);
444     }
445 }
446 
447 /*!
448  * @brief Enable all stall interrupts in dev_ctrl
449  *
450  * @param[in]   pGpu          OBJGPU pointer
451  * @param[in]   pIntr         Intr pointer
452  * @param[in]   pThreadState  thread state node pointer
453  */
454 static void
455 _intrEnableStall_TU102
456 (
457     OBJGPU            *pGpu,
458     Intr              *pIntr,
459     THREAD_STATE_NODE *pThreadState
460 )
461 {
462     NvU32 idx;
463 
464     //
465     // 1. Enable the UVM interrupts that RM currently owns at INTR_LEAF
466     // level.
467     //
468     NvU32 val = _intrGetUvmLeafMask_TU102(pGpu, pIntr);
469     idx = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(pIntr->accessCntrIntrVector);
470     if (val != 0)
471     {
472         intrWriteRegLeafEnSet_HAL(pGpu, pIntr, idx, val, pThreadState);
473     }
474 
475     //
476     // 2. Enable all interrupts in the client shared subtree at INTR_LEAF
477     // level, based on the cached value.
478     //
479 
480     {
481         NV2080_INTR_CATEGORY_SUBTREE_MAP uvmShared;
482         NV_ASSERT_OK(intrGetSubtreeRange(pIntr,
483                                          NV2080_INTR_CATEGORY_UVM_SHARED,
484                                          &uvmShared));
485         //
486         // Assert to make sure we have only one client shared subtree.
487         // The below code assumes that.
488         //
489         NV_ASSERT(uvmShared.subtreeStart == uvmShared.subtreeEnd);
490         idx = uvmShared.subtreeStart;
491     }
492 
493     if (NvU64_HI32(pIntr->uvmSharedCpuLeafEn) != 0)
494     {
495         intrWriteRegLeafEnSet_HAL(pGpu, pIntr,
496                                   NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_START(idx),
497                                   NvU64_HI32(pIntr->uvmSharedCpuLeafEn),
498                                   pThreadState);
499     }
500     if (NvU64_LO32(pIntr->uvmSharedCpuLeafEn) != 0)
501     {
502         intrWriteRegLeafEnSet_HAL(pGpu, pIntr,
503                                   NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_END(idx),
504                                   NvU64_LO32(pIntr->uvmSharedCpuLeafEn),
505                                   pThreadState);
506     }
507 
508     // We use the assumption that 1 == ENABLE below
509     ct_assert(NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_SET_SUBTREE_ENABLE == 1);
510 
511     {
512         //
513         // 3. Enable all interrupt subtrees (except nonstall) at top level.
514         // Nonstall enablement is handled by a different function.
515         //
516         NvU64 mask = NV_U64_MAX;
517 
518         mask &= ~intrGetIntrTopNonStallMask_HAL(pGpu, pIntr);
519 
520         if (NvU64_LO32(mask) != 0)
521         {
522             intrWriteRegTopEnSet_HAL(pGpu, pIntr,
523                                      0,
524                                      NvU64_LO32(mask),
525                                      pThreadState);
526         }
527         if (NvU64_HI32(mask) != 0)
528         {
529             intrWriteRegTopEnSet_HAL(pGpu, pIntr,
530                                      1,
531                                      NvU64_HI32(mask),
532                                      pThreadState);
533         }
534     }
535 }
536 
537 /*!
538  * @brief Disable all stall interrupts in dev_ctrl
539  *
540  * @param[in]   pGpu          OBJGPU pointer
541  * @param[in]   pIntr         Intr pointer
542  * @param[in]   pThreadState  thread state node pointer
543  */
544 static void
545 _intrDisableStall_TU102
546 (
547     OBJGPU            *pGpu,
548     Intr              *pIntr,
549     THREAD_STATE_NODE *pThreadState
550 )
551 {
552     NvU32 idx;
553 
554     NvU32 val;
555 
556     // 1. Disable the UVM interrupts that RM currently owns at INTR_LEAF level
557     idx = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(pIntr->accessCntrIntrVector);
558     val = _intrGetUvmLeafMask_TU102(pGpu, pIntr);
559     if (val != 0)
560     {
561         intrWriteRegLeafEnClear_HAL(pGpu, pIntr, idx, val, pThreadState);
562     }
563 
564     //
565     // 2. Disable all interrupts in the client shared subtree at INTR_LEAF
566     // level, except the ones that can be handled outside the GPU lock.
567     //
568 
569     {
570         NV2080_INTR_CATEGORY_SUBTREE_MAP uvmShared;
571         NV_ASSERT_OK(intrGetSubtreeRange(pIntr,
572                                          NV2080_INTR_CATEGORY_UVM_SHARED,
573                                          &uvmShared));
574         //
575         // Assert to make sure we have only one client shared subtree.
576         // The below code assumes that.
577         //
578         NV_ASSERT(uvmShared.subtreeStart == uvmShared.subtreeEnd);
579         idx = uvmShared.subtreeStart;
580     }
581 
582     if (!gpuIsStateLoaded(pGpu))
583     {
584         //
585         // If GPU state load has not finished, there is nothing we can or want to
586         // do here, since our cached state of interrupt vectors isn't valid yet
587         // anyway.
588         //
589         intrWriteRegLeafEnClear_HAL(pGpu, pIntr,
590                                     NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_START(idx),
591                                     0xFFFFFFFF, pThreadState);
592         intrWriteRegLeafEnClear_HAL(pGpu, pIntr,
593                                     NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_END(idx),
594                                     0xFFFFFFFF, pThreadState);
595     }
596     else
597     {
598         if ((NvU64_HI32(pIntr->uvmSharedCpuLeafEnDisableMask) != 0) &&
599             (NvU64_HI32(pIntr->uvmSharedCpuLeafEn) != 0))
600         {
601             //
602             // Only write to the register is both the enable mask and the
603             // disable mask is non-zero. If there are no interrupts we're
604             // interested in handling in one of the leafs, the enable mask will
605             // be zero and the disable mask will be all 0xFs. There's no point
606             // writing the register in that case since interrupts are already
607             // not enabled. Using the cached value helps us avoid a register
608             // read in latency critical paths.
609             //
610             intrWriteRegLeafEnClear_HAL(pGpu, pIntr,
611                                         NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_START(idx),
612                                         NvU64_HI32(pIntr->uvmSharedCpuLeafEnDisableMask),
613                                         pThreadState);
614         }
615         if ((NvU64_LO32(pIntr->uvmSharedCpuLeafEnDisableMask) != 0) &&
616             (NvU64_LO32(pIntr->uvmSharedCpuLeafEn) != 0))
617         {
618             //
619             // Only write to the register is both the enable mask and the
620             // disable mask is non-zero. If there are no interrupts we're
621             // interested in handling in one of the leafs, the enable mask will
622             // be zero and the disable mask will be all 0xFs. There's no point
623             // writing the register in that case since interrupts are already
624             // not enabled. Using the cached value helps us avoid a register
625             // read in latency critical paths.
626             //
627             intrWriteRegLeafEnClear_HAL(pGpu, pIntr,
628                                         NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_END(idx),
629                                         NvU64_LO32(pIntr->uvmSharedCpuLeafEnDisableMask),
630                                         pThreadState);
631         }
632     }
633 
634     //
635     // 3. Disable some interrupt subtrees at top level (information about which
636     // ones to disable is cached in pIntr->intrTopEnMask)
637     //
638     if (NvU64_LO32(pIntr->intrTopEnMask) != 0)
639     {
640         intrWriteRegTopEnClear_HAL(pGpu, pIntr,
641                                    0,
642                                    NvU64_LO32(pIntr->intrTopEnMask),
643                                    pThreadState);
644     }
645     if (NvU64_HI32(pIntr->intrTopEnMask) != 0)
646     {
647         intrWriteRegTopEnClear_HAL(pGpu, pIntr,
648                                    1,
649                                    NvU64_HI32(pIntr->intrTopEnMask),
650                                    pThreadState);
651     }
652 }
653 
654 /*!
655  * @brief Clears a given interrupt vector at the dev_ctrl LEAF level
656  *
657  * @param[in]   pGpu          OBJGPU pointer
658  * @param[in]   pIntr         Intr pointer
659  * @param[in]   intrVector    interrupt vector to clear
660  * @param[in]   pThreadState  thread state node pointer
661  */
662 void
663 intrClearLeafVector_TU102
664 (
665     OBJGPU            *pGpu,
666     Intr              *pIntr,
667     NvU32              intrVector,
668     THREAD_STATE_NODE *pThreadState
669 )
670 {
671     NvU32 reg = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVector);
672     NvU32 bit = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVector);
673 
674     intrWriteRegLeaf_HAL(pGpu, pIntr, reg, NVBIT(bit), pThreadState);
675 }
676 
677 /*!
678  * @brief Checks if the given interrupt vector is pending at the dev_ctrl LEAF level
679  *
680  * @param[in]   pGpu          OBJGPU pointer
681  * @param[in]   pIntr         Intr pointer
682  * @param[in]   intrVector    interrupt vector to check
683  * @param[in]   pThreadState  thread state node pointer
684  */
685 NvBool
686 intrIsVectorPending_TU102
687 (
688     OBJGPU            *pGpu,
689     Intr              *pIntr,
690     NvU32              intrVector,
691     THREAD_STATE_NODE *pThreadState
692 )
693 {
694     NvU32 reg = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVector);
695     NvU32 bit = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVector);
696     NvU32 val = intrReadRegLeaf_HAL(pGpu, pIntr, reg, pThreadState);
697 
698     if (val & NVBIT(bit))
699         return NV_TRUE;
700     return NV_FALSE;
701 }
702 
703 /*!
704 * @brief Returns the INTR_LEAF mask for RM owned client interrupts.
705 *
706 * NOTE: Must be called after @intrStateLoad_TU102. This code assumes that the
707 * client owned interrupts are in the same leaf register. We would have checked
708 * whether that assumption is true in @intrStateLoad_TU102 and if it was
709 * violated, we'd have failed state load.
710 */
711 static NvU32
712 _intrGetUvmLeafMask_TU102
713 (
714     OBJGPU *pGpu,
715     Intr *pIntr
716 )
717 {
718     NvU32 val = 0;
719     KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
720     if (pKernelGmmu != NULL)
721     {
722         NvBool bRmOwnsReplayableFault = !!(pKernelGmmu->uvmSharedIntrRmOwnsMask & RM_UVM_SHARED_INTR_MASK_MMU_REPLAYABLE_FAULT_NOTIFY);
723         NvBool bRmOwnsAccessCntr      = !!(pKernelGmmu->uvmSharedIntrRmOwnsMask & RM_UVM_SHARED_INTR_MASK_HUB_ACCESS_COUNTER_NOTIFY);
724 
725         if (bRmOwnsReplayableFault)
726         {
727             val |= NVBIT(NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(pIntr->replayableFaultIntrVector));
728         }
729         if (bRmOwnsAccessCntr)
730         {
731             val |= NVBIT(NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(pIntr->accessCntrIntrVector));
732         }
733     }
734     return val;
735 }
736 
737 /*!
738 * @brief Returns a 64 bit mask, where all the bits set to 0 are the ones we
739 * intend to leave enabled in the client shared subtree even when we disable
740 * interrupts (for example, when we take the GPU lock).
741 *
742 * The non-replayable fault interrupt is shared with the client, and in the
743 * top half of the interrupt handler, as such, we only copy fault packets from
744 * the HW buffer to the appropriate SW buffers.
745 * The fifo non-stall interrupt is used for runlist events, which also does not
746 * need to be blocked by the GPU lock (existing codepaths already ascertain that
747 * this is safe, so we're maintaining that behavior in NV_CTRL).
748 */
749 NvU64
750 intrGetUvmSharedLeafEnDisableMask_TU102
751 (
752     OBJGPU *pGpu,
753     Intr *pIntr
754 )
755 {
756     NvU32 intrVectorNonReplayableFault;
757     NvU32 intrVectorFifoNonstall = NV_INTR_VECTOR_INVALID;
758     NvU64 mask = 0;
759     NV2080_INTR_CATEGORY_SUBTREE_MAP uvmShared;
760 
761     // GSP RM services both MMU non-replayable fault and FIFO interrupts
762     if (IS_GSP_CLIENT(pGpu))
763     {
764         return ~mask;
765     }
766 
767     intrVectorNonReplayableFault = intrGetVectorFromEngineId(pGpu, pIntr, MC_ENGINE_IDX_NON_REPLAYABLE_FAULT, NV_FALSE);
768 
769     if (!IS_VIRTUAL(pGpu))
770     {
771         intrVectorFifoNonstall = intrGetVectorFromEngineId(pGpu, pIntr, MC_ENGINE_IDX_FIFO, NV_TRUE);
772     }
773 
774     if (intrVectorFifoNonstall != NV_INTR_VECTOR_INVALID)
775     {
776         // Ascertain that they're in the same subtree and same leaf
777         NV_ASSERT(NV_CTRL_INTR_GPU_VECTOR_TO_SUBTREE(intrVectorNonReplayableFault) ==
778                 NV_CTRL_INTR_GPU_VECTOR_TO_SUBTREE(intrVectorFifoNonstall));
779         NV_ASSERT(NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVectorNonReplayableFault) ==
780                 NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVectorFifoNonstall));
781     }
782 
783     NV_ASSERT_OK(intrGetSubtreeRange(pIntr,
784                                      NV2080_INTR_CATEGORY_UVM_SHARED,
785                                      &uvmShared));
786     //
787     // Ascertain that we only have 1 client subtree (we assume
788     // this since we cache only 64 bits).
789     //
790     NV_ASSERT(uvmShared.subtreeStart == uvmShared.subtreeEnd);
791 
792     //
793     // Ascertain that we only have 2 subtrees as this is what we currently
794     // support by only caching 64 bits
795     //
796     NV_ASSERT(
797         (NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_END(uvmShared.subtreeEnd) - 1) ==
798         NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_START(uvmShared.subtreeStart));
799 
800 
801     // Ascertain that they're in the first leaf
802     NV_ASSERT(
803         NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVectorNonReplayableFault) ==
804         NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_START(uvmShared.subtreeStart));
805 
806     mask |= NVBIT64(NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVectorNonReplayableFault));
807 
808     if (intrVectorFifoNonstall != NV_INTR_VECTOR_INVALID)
809     {
810         mask |= NVBIT64(NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVectorFifoNonstall));
811     }
812 
813     mask <<= 32;
814 
815     return ~mask;
816 }
817 
818 /*!
819  * @brief Gets list of engines with pending stalling interrupts as per the interrupt trees
820  *
821  * @param[in]  pGpu
822  * @param[in]  pIntr
823  * @param[out] pEngines     List of engines that have pending stall interrupts
824  * @param[in]  pThreadState
825  *
826  * @return NV_OK if the list of engines that have pending stall interrupts was retrieved
827  */
828 NV_STATUS
829 intrGetPendingStallEngines_TU102
830 (
831     OBJGPU              *pGpu,
832     Intr                *pIntr,
833     MC_ENGINE_BITVECTOR *pEngines,
834     THREAD_STATE_NODE   *pThreadState
835 )
836 {
837     KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
838     InterruptTable    *pIntrTable;
839     InterruptTableIter iter;
840     NvU64 sanityCheckSubtreeMask = 0;
841     NvU32 numIntrLeaves = intrGetNumLeaves_HAL(pGpu, pIntr);
842     NV_ASSERT(numIntrLeaves <= NV_MAX_INTR_LEAVES);
843     NvU32 intrLeafValues[NV_MAX_INTR_LEAVES];
844 
845     sanityCheckSubtreeMask = intrGetIntrTopLegacyStallMask(pIntr);
846 
847     portMemSet(intrLeafValues, 0, numIntrLeaves * sizeof(NvU32));
848     bitVectorClrAll(pEngines);
849 
850     // dev_ctrl tree is not used for legacy-vGPU
851     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu))
852     {
853         return NV_OK;
854     }
855 
856     NV_ASSERT_OK_OR_RETURN(intrGetLeafStatus_HAL(pGpu, pIntr, intrLeafValues, pThreadState));
857     NV_ASSERT_OK_OR_RETURN(intrGetInterruptTable_HAL(pGpu, pIntr, &pIntrTable));
858 
859     for (iter = vectIterAll(pIntrTable); vectIterNext(&iter);)
860     {
861         INTR_TABLE_ENTRY *pEntry     = iter.pValue;
862         NvU32             intrVector = pEntry->intrVector;
863         NvU32             leaf;
864         NvU32             leafIndex;
865         NvU32             leafBit;
866 
867         // Check if this engine has a valid stalling interrupt vector in the new tree
868         if (intrVector == NV_INTR_VECTOR_INVALID)
869         {
870             continue;
871         }
872 
873         leafIndex = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVector);
874         leafBit = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVector);
875 
876         //
877         // The leafIndex must be within the stall tree. Try to catch this on
878         // pre-release drivers. Don't need this on release drivers since this
879         // is only to catch issues during code development. Should never happen
880         // in practice
881         //
882 
883         if ((sanityCheckSubtreeMask &
884              NVBIT64(NV_CTRL_INTR_LEAF_IDX_TO_SUBTREE(leafIndex))) == 0)
885         {
886             NV_PRINTF(LEVEL_ERROR,
887                       "MC_ENGINE_IDX %u has invalid stall intr vector %u\n",
888                       pEntry->mcEngine,
889                       intrVector);
890             DBG_BREAKPOINT();
891             continue;
892         }
893         //
894         // Check if interrupt is pending. We skip checking if it is enabled in
895         // the leaf register since we mess around with the leaf enables in
896         // the interrupt disable path and will need special casing to handle it
897         // In the transition period from NV_PMC to NV_CTRL, the interrupt vector
898         // for engines that haven't yet switched would be INVALID, so we'd never
899         // get here anyway.
900         //
901         leaf = intrLeafValues[leafIndex] & NVBIT(leafBit);
902 
903         if (leaf == 0)
904         {
905             continue;
906         }
907 
908         // Add engine to bitvector
909         bitVectorSet(pEngines, pEntry->mcEngine);
910     }
911 
912     if (pKernelGmmu != NULL)
913     {
914         NvBool bRmOwnsReplayableFault = !!(pKernelGmmu->uvmSharedIntrRmOwnsMask & RM_UVM_SHARED_INTR_MASK_MMU_REPLAYABLE_FAULT_NOTIFY);
915         NvBool bRmOwnsAccessCntr      = !!(pKernelGmmu->uvmSharedIntrRmOwnsMask & RM_UVM_SHARED_INTR_MASK_HUB_ACCESS_COUNTER_NOTIFY);
916 
917         //
918         // Add non replayable fault engine if there is something in the shadow buffer,
919         // as the interrupt itself is cleared earlier.
920         //
921         if (portAtomicOrS32(kgmmuGetFatalFaultIntrPendingState(pKernelGmmu, GPU_GFID_PF), 0))
922         {
923             bitVectorSet(pEngines, MC_ENGINE_IDX_NON_REPLAYABLE_FAULT);
924         }
925 
926          // If AM doesn't own either UVM interrupt, remove it from the pending mask
927         if (!bRmOwnsReplayableFault)
928         {
929             bitVectorClr(pEngines, MC_ENGINE_IDX_REPLAYABLE_FAULT);
930         }
931 
932         if (!bRmOwnsAccessCntr)
933         {
934             bitVectorClr(pEngines, MC_ENGINE_IDX_ACCESS_CNTR);
935         }
936     }
937 
938     return NV_OK;
939 }
940 
941 /*!
942  * @brief Checks and services MMU non=replayable fault interrupts that may not
943  * have been queued as DPC if we didn't get the GPU lock in the top half.
944  *
945  * If the MMU non-replayable fault interrupt was the only interrupt pending and
946  * we were unable to get the GPU lock in the top half, a DPC would not have
947  * been scheduled, but the non-replayable fault interrupt packet(s) would have
948  * been copied into the SW buffers. Try to do the bottom-half servicing of
949  * interrupts that could have been cleared in the top half.
950  *
951  * @param[in]   pGpu          OBJGPU pointer
952  * @param[in]   pIntr         Intr pointer
953  * @param[in]   pThreadState  THREAD_STATE_NODE pointer
954  */
955 NV_STATUS
956 intrCheckAndServiceNonReplayableFault_TU102
957 (
958     OBJGPU            *pGpu,
959     Intr              *pIntr,
960     THREAD_STATE_NODE *pThreadState
961 )
962 {
963     NV_STATUS status = NV_OK;
964     return status;
965 }
966 
967 /*!
968  * @brief Retrigger interrupts by toggling enables of those subtrees not
969  * toggled at top level in GPU lock acquire/release. Subtrees that are toggled
970  * at top level will be implicitly re-armed when the GPU lock is released.
971  *
972  * @param[in]   pGpu        GPU Object
973  * @param[in]   pIntr       Intr Object
974  */
975 void
976 intrRetriggerTopLevel_TU102
977 (
978     OBJGPU  *pGpu,
979     Intr *pIntr
980 )
981 {
982     NvU64 mask = 0;
983 
984     // We use the assumption that 1 == DISABLE below
985     ct_assert(NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_CLEAR_SUBTREE_DISABLE == 1);
986 
987     //
988     // Toggle the top level interrupt enables for all interrupts whose top
989     // level enables are not toggled during RM lock acquire/release.
990     //
991     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_ALTERNATE_TREE_HANDLE_LOCKLESS) &&
992         pGpu->getProperty(pGpu, PDB_PROP_GPU_ALTERNATE_TREE_ENABLED))
993     {
994         //
995         // 1. If the alternate tree (nonstall tree) is handled "lockless", it
996         // is not disabled during RM lock acquire, so needs re-arming.
997         //
998         mask |= intrGetIntrTopNonStallMask_HAL(pGpu, pIntr);
999     }
1000 
1001     // 2. UVM-owned interrupt tree (never disabled at top level)
1002     mask |= intrGetIntrTopCategoryMask(pIntr, NV2080_INTR_CATEGORY_UVM_OWNED);
1003 
1004     // 3. UVM/RM shared interrupt tree (never disabled at top level)
1005     mask |= intrGetIntrTopCategoryMask(pIntr, NV2080_INTR_CATEGORY_UVM_SHARED);
1006 
1007     //
1008     // Bypass GPU_REG_WR32 that requires the GPU lock to be held (for some
1009     // register filters) by using the OS interface directly.
1010     //
1011     // Clear all first, then set
1012     //
1013     if (NvU64_LO32(mask) != 0 &&
1014         0 < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_CLEAR__SIZE_1)
1015     {
1016         osGpuWriteReg032(pGpu,
1017             GPU_GET_VREG_OFFSET(pGpu,
1018                 NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_CLEAR(0)),
1019             NvU64_LO32(mask));
1020     }
1021     if (NvU64_HI32(mask) != 0 &&
1022         1 < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_CLEAR__SIZE_1)
1023     {
1024         osGpuWriteReg032(pGpu,
1025             GPU_GET_VREG_OFFSET(pGpu,
1026                 NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_CLEAR(1)),
1027             NvU64_HI32(mask));
1028     }
1029     if (NvU64_LO32(mask) != 0 &&
1030         0 < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_SET__SIZE_1)
1031     {
1032         osGpuWriteReg032(pGpu,
1033             GPU_GET_VREG_OFFSET(pGpu,
1034                 NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_SET(0)),
1035             NvU64_LO32(mask));
1036     }
1037     if (NvU64_HI32(mask) != 0 &&
1038         1 < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_SET__SIZE_1)
1039     {
1040         osGpuWriteReg032(pGpu,
1041             GPU_GET_VREG_OFFSET(pGpu,
1042                 NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_EN_SET(1)),
1043             NvU64_HI32(mask));
1044     }
1045 }
1046 
1047 /*!
1048  * @brief read all leaf interrupt registers into an array
1049  *
1050  * @param[in]   pGpu       OBJGPU pointer
1051  * @param[in]   pIntr      Intr pointer
1052  * @param[out]  pLeafVals  array that leaf values will be returned in.
1053  *                         assumes that it is sufficiently large
1054  */
1055 NV_STATUS
1056 intrGetLeafStatus_TU102
1057 (
1058     OBJGPU             *pGpu,
1059     Intr               *pIntr,
1060     NvU32              *pLeafVals,
1061     THREAD_STATE_NODE  *pThreadState
1062 )
1063 {
1064     NvU32 subtreeIndex;
1065     NvU32 leafIndex;
1066 
1067     FOR_EACH_INDEX_IN_MASK(64, subtreeIndex,
1068                            intrGetIntrTopLegacyStallMask(pIntr))
1069     {
1070         leafIndex = NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_START(subtreeIndex);
1071         if (pIntr->getProperty(pIntr, PDB_PROP_INTR_READ_ONLY_EVEN_NUMBERED_INTR_LEAF_REGS))
1072         {
1073             //
1074             // Since we know that on Turing, only one leaf per subtree has valid
1075             // interrupts, optimize to only read those leaf registers.
1076             //
1077             pLeafVals[leafIndex] = intrReadRegLeaf_HAL(pGpu, pIntr, leafIndex, pThreadState);
1078         }
1079         else
1080         {
1081             for (; leafIndex <= NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_END(subtreeIndex); leafIndex++)
1082             {
1083                 pLeafVals[leafIndex] = intrReadRegLeaf_HAL(pGpu, pIntr, leafIndex, pThreadState);
1084             }
1085         }
1086     } FOR_EACH_INDEX_IN_MASK_END
1087 
1088     return NV_OK;
1089 }
1090 
1091 /*!
1092  * @brief Returns a bitfield with only MC_ENGINE_IDX_DISP set if it's pending
1093  *        On Turing+, there are multiple stall interrupt registers, and reading them
1094  *        all in the top half would be expensive. To saitsfy bug 3220319, only find out
1095  *        if display interrupt is pending. Fix this in bug 3279300
1096  *
1097  * @param[in]  pGpu
1098  * @param[in]  pMc
1099  * @param[out] pEngines     List of engines that have pending stall interrupts
1100  * @param[in]  pThreadState
1101  *
1102  * @return NV_OK if the list of engines that have pending stall interrupts was retrieved
1103  */
1104 NV_STATUS
1105 intrGetPendingDisplayIntr_TU102
1106 (
1107     OBJGPU              *pGpu,
1108     Intr                *pIntr,
1109     PMC_ENGINE_BITVECTOR pEngines,
1110     THREAD_STATE_NODE   *pThreadState
1111 )
1112 {
1113     KernelDisplay  *pKernelDisplay = GPU_GET_KERNEL_DISPLAY(pGpu);
1114 
1115     bitVectorClrAll(pEngines);
1116 
1117     if (IS_GPU_GC6_STATE_ENTERED(pGpu))
1118     {
1119         return NV_ERR_GPU_NOT_FULL_POWER;
1120     }
1121 
1122     if (!API_GPU_ATTACHED_SANITY_CHECK(pGpu))
1123     {
1124         return NV_ERR_GPU_IS_LOST;
1125     }
1126 
1127     if (pKernelDisplay != NULL && kdispGetDeferredVblankHeadMask(pKernelDisplay))
1128     {
1129         // Deferred vblank is pending which we need to handle
1130         bitVectorSet(pEngines, MC_ENGINE_IDX_DISP);
1131         // Nothing else to set here, return early
1132         return NV_OK;
1133     }
1134 
1135     if (pIntr->displayIntrVector == NV_INTR_VECTOR_INVALID)
1136     {
1137         return NV_OK;
1138     }
1139     else if (intrIsVectorPending_TU102(pGpu, pIntr, pIntr->displayIntrVector, pThreadState))
1140     {
1141         bitVectorSet(pEngines, MC_ENGINE_IDX_DISP);
1142     }
1143 
1144     return NV_OK;
1145 }
1146 
1147 
1148 /**
1149  * @brief Enable or disable the display interrupt.
1150  * This implements the missing functionality of PDB_PROP_INTR_USE_INTR_MASK_FOR_LOCKING
1151  * for Turing+: The ability to leave display interrrupts unmasked while the GPU lock is held
1152  * The PMC_INTR_MASK HW registers were deprecated in Pascal, but the Pascal-Volta interrupt
1153  * code still emulates them in SW. The Turing+ code did not implement any of the masking code,
1154  * but as seen in bug 3152190, the ability to leave the display interupt unmasked is still
1155  * needed. The ability to unmask the interrupts to enable them to show up in interrupt registers
1156  * is not needed, so this call is not needed at callsites that just do that
1157  * (_intrEnterCriticalSection / _intrExitCriticalSection)
1158  * This whole interrupts code mess needs refactored - bug 3279300
1159  *
1160  * @param[in] pGpu
1161  * @param[in] pIntr
1162  * @param[in] bEnable
1163  * @param[in] pThreadState - Needed for bypassing register filters in unlocked top half
1164  *
1165  */
1166 void
1167 intrSetDisplayInterruptEnable_TU102
1168 (
1169     OBJGPU            *pGpu,
1170     Intr              *pIntr,
1171     NvBool             bEnable,
1172     THREAD_STATE_NODE *pThreadState
1173 )
1174 {
1175     if (pIntr->displayIntrVector == NV_INTR_VECTOR_INVALID)
1176     {
1177         return;
1178     }
1179 
1180     NvU32 reg = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(pIntr->displayIntrVector);
1181     NvU32 leafBit = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(pIntr->displayIntrVector);
1182 
1183     if (bEnable)
1184     {
1185         intrWriteRegLeafEnSet_HAL(pGpu, pIntr, reg, NVBIT(leafBit), pThreadState);
1186     }
1187     else
1188     {
1189         intrWriteRegLeafEnClear_HAL(pGpu, pIntr, reg, NVBIT(leafBit), pThreadState);
1190     }
1191 }
1192 
1193 /*!
1194  * @brief Dumps interrupt state (registers, vector table) for debugging purpose.
1195  *
1196  * @param[in] pGpu   OBJGPU pointer
1197  * @param[in] pIntr  Intr pointer
1198  */
1199 void
1200 intrDumpState_TU102
1201 (
1202     OBJGPU  *pGpu,
1203     Intr *pIntr
1204 )
1205 {
1206     InterruptTable    *pIntrTable;
1207     InterruptTableIter iter;
1208     NvU32              i;
1209     NvU32              intrLeafSize = intrGetLeafSize_HAL(pGpu, pIntr);
1210 
1211     NV_PRINTF(LEVEL_INFO, "Interrupt registers:\n");
1212     for (i = 0; i < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP__SIZE_1; i++)
1213     {
1214         NV_PRINTF(LEVEL_INFO, "INTR_TOP_EN_SET(%u)=0x%x\n", i,
1215                   intrReadRegTopEnSet_HAL(pGpu, pIntr, i, NULL));
1216     }
1217 
1218     for (i = 0; i < intrLeafSize; i++)
1219     {
1220         NV_PRINTF(LEVEL_INFO, "INTR_LEAF_EN_SET(%u)=0x%x\n", i,
1221                   intrReadRegLeafEnSet_HAL(pGpu, pIntr, i, NULL));
1222     }
1223 
1224     NV_PRINTF(LEVEL_INFO, "MC Interrupt table:\n");
1225     intrGetInterruptTable_HAL(pGpu, pIntr, &pIntrTable);
1226 
1227     for (i = 0, iter = vectIterAll(pIntrTable); vectIterNext(&iter); i++)
1228     {
1229         INTR_TABLE_ENTRY *pEntry = iter.pValue;
1230         PORT_UNREFERENCED_VARIABLE(pEntry);
1231 
1232         NV_PRINTF(LEVEL_INFO,
1233             "%2u: mcEngineIdx=%-4u intrVector=%-10u intrVectorNonStall=%-10u\n",
1234             i,
1235             pEntry->mcEngine,
1236             pEntry->intrVector,
1237             pEntry->intrVectorNonStall);
1238     }
1239 }
1240 
1241 
1242 NV_STATUS
1243 intrInitSubtreeMap_TU102
1244 (
1245     OBJGPU *pGpu,
1246     Intr   *pIntr
1247 )
1248 {
1249     NV2080_INTR_CATEGORY_SUBTREE_MAP *pCategoryEngine =
1250         &pIntr->subtreeMap[NV2080_INTR_CATEGORY_ESCHED_DRIVEN_ENGINE];
1251     pCategoryEngine->subtreeStart = NV_CPU_INTR_STALL_SUBTREE_START;
1252     pCategoryEngine->subtreeEnd   = NV_CPU_INTR_STALL_SUBTREE_LAST;
1253 
1254     NV2080_INTR_CATEGORY_SUBTREE_MAP *pCategoryEngineNotification =
1255         &pIntr->subtreeMap[NV2080_INTR_CATEGORY_ESCHED_DRIVEN_ENGINE_NOTIFICATION];
1256     pCategoryEngineNotification->subtreeStart = NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_SUBTREE(0);
1257     pCategoryEngineNotification->subtreeEnd   = NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_SUBTREE(0);
1258 
1259     NV2080_INTR_CATEGORY_SUBTREE_MAP *pCategoryUvmOwned =
1260         &pIntr->subtreeMap[NV2080_INTR_CATEGORY_UVM_OWNED];
1261     pCategoryUvmOwned->subtreeStart = NV_CPU_INTR_UVM_SUBTREE_START;
1262     pCategoryUvmOwned->subtreeEnd   = NV_CPU_INTR_UVM_SUBTREE_LAST;
1263 
1264     NV2080_INTR_CATEGORY_SUBTREE_MAP *pCategoryUvmShared =
1265         &pIntr->subtreeMap[NV2080_INTR_CATEGORY_UVM_SHARED];
1266     pCategoryUvmShared->subtreeStart = NV_CPU_INTR_UVM_SHARED_SUBTREE_START;
1267     pCategoryUvmShared->subtreeEnd   = NV_CPU_INTR_UVM_SHARED_SUBTREE_LAST;
1268 
1269     return NV_OK;
1270 }
1271 
1272 
1273 /*!
1274  * @brief Gets the number of leaf registers used
1275  */
1276 NvU32
1277 intrGetNumLeaves_TU102(OBJGPU *pGpu, Intr *pIntr)
1278 {
1279     ct_assert((NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_END(NV_CPU_INTR_STALL_SUBTREE_LAST) + 1) <= NV_MAX_INTR_LEAVES);
1280     return (NV_CTRL_INTR_SUBTREE_TO_LEAF_IDX_END(NV_CPU_INTR_STALL_SUBTREE_LAST) + 1);
1281 }
1282 
1283 /*!
1284  * @brief Gets the value of VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF__SIZE_1
1285  */
1286 NvU32
1287 intrGetLeafSize_TU102(OBJGPU *pGpu, Intr *pIntr)
1288 {
1289     return NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF__SIZE_1;
1290 }
1291 
1292 
1293 NvU64
1294 intrGetIntrTopNonStallMask_TU102
1295 (
1296     OBJGPU *pGpu,
1297     Intr   *pIntr
1298 )
1299 {
1300     // TODO Bug 3823562 Remove these asserts
1301     // Compile-time assert against the highest set bit that will be returned
1302     #define NV_CPU_INTR_NOSTALL_SUBTREE_HIGHEST NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_TOP_SUBTREE(0)
1303 
1304     ct_assert(NV_CPU_INTR_NOSTALL_SUBTREE_HIGHEST < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF__SIZE_1);
1305     ct_assert(NV_CPU_INTR_NOSTALL_SUBTREE_HIGHEST < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_SET__SIZE_1);
1306     ct_assert(NV_CPU_INTR_NOSTALL_SUBTREE_HIGHEST < NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_CLEAR__SIZE_1);
1307 
1308     NvU64 mask = intrGetIntrTopCategoryMask(pIntr,
1309         NV2080_INTR_CATEGORY_ESCHED_DRIVEN_ENGINE_NOTIFICATION);
1310 
1311     //
1312     // On all Ampere+ that use this TU102 HAL, Esched notification interrupts
1313     // are also included in this if PDB_PROP_GPU_SWRL_GRANULAR_LOCKING is set.
1314     //
1315 
1316     // Sanity check that Intr.subtreeMap is initialized
1317     NV_ASSERT(mask != 0);
1318     return mask;
1319 }
1320 
1321 
1322 /*!
1323  * @brief Decode the interrupt mode for SW to use
1324  *
1325  * @param[in]   pIntr       Intr Object
1326  * @param[in]   intrEn      the enable value to decode
1327  *
1328  * @returns the value of the decoded interrupt
1329  *
1330  */
1331 NvU32
1332 intrDecodeStallIntrEn_TU102
1333 (
1334     OBJGPU  *pGpu,
1335     Intr *pIntr,
1336     NvU32    intrEn
1337 )
1338 {
1339     // mask with interrupts that RM owns
1340     if (pGpu->pmcRmOwnsIntrMask != 0)
1341     {
1342         intrEn &= pGpu->pmcRmOwnsIntrMask;
1343     }
1344 
1345     switch (intrEn)
1346     {
1347         case INTERRUPT_MASK_DISABLED:
1348             return INTERRUPT_TYPE_DISABLED;
1349         case INTERRUPT_MASK_HARDWARE:
1350             return INTERRUPT_TYPE_HARDWARE;
1351         case INTERRUPT_MASK_SOFTWARE:
1352             return INTERRUPT_TYPE_SOFTWARE;
1353         default:
1354             return INTERRUPT_TYPE_MULTI;
1355     }
1356 }
1357