1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #define NVOC_KERN_GMMU_H_PRIVATE_ACCESS_ALLOWED
25 
26 #include "gpu/mmu/kern_gmmu.h"
27 #include "gpu/mem_mgr/mem_mgr.h"
28 #include "vgpu/vgpu_events.h"
29 #include "nv_sriov_defines.h"
30 #include "kernel/gpu/intr/intr.h"
31 #include "kernel/gpu/gsp/kernel_gsp.h"
32 #include "kernel/gpu/conf_compute/ccsl.h"
33 
34 #include "mmu/gmmu_fmt.h"
35 #include "published/hopper/gh100/dev_mmu.h"
36 #include "published/hopper/gh100/dev_fault.h"
37 #include "published/hopper/gh100/dev_vm.h"
38 #include "published/hopper/gh100/dev_vm_addendum.h"
39 
40 /*!
41  * Check if a specific GMMU format version is supported.
42  */
43 NvBool
44 kgmmuFmtIsVersionSupported_GH10X(KernelGmmu *pKernelGmmu, NvU32 version)
45 {
46     return (version == GMMU_FMT_VERSION_3);
47 }
48 
49 /*!
50  * Initialize the GMMU format families.
51  */
52 NV_STATUS
53 kgmmuFmtFamiliesInit_GH100(OBJGPU *pGpu, KernelGmmu* pKernelGmmu)
54 {
55     NvU32            i;
56     NvU32            pdePcfHw = 0;
57     NvU32            pdePcfSw = 0;
58     NvU32            ptePcfHw = 0;
59     NvU32            ptePcfSw = 0;
60 
61     // Initialize the sparse encoding in the PDE PCF field for V3
62     GMMU_FMT_FAMILY *pFam = pKernelGmmu->pFmtFamilies[GMMU_FMT_VERSION_3 - 1];
63 
64     if (pFam != NULL)
65     {
66         // 1.Initialize sparsePde
67         pdePcfSw |= (1 << SW_MMU_PCF_SPARSE_IDX);
68         pdePcfSw |= (1 << SW_MMU_PCF_ATS_ALLOWED_IDX);
69         NV_ASSERT_OR_RETURN((kgmmuTranslatePdePcfFromSw_HAL(pKernelGmmu, pdePcfSw, &pdePcfHw) == NV_OK),
70                              NV_ERR_INVALID_ARGUMENT);
71         gmmuFieldSetAperture(&pFam->pde.fldAperture, GMMU_APERTURE_INVALID,
72                              pFam->sparsePde.v8);
73         nvFieldSet32(&pFam->pde.fldPdePcf, pdePcfHw, pFam->sparsePde.v8);
74 
75         // 2.Initialize sparsePdeMulti
76         for (i = 0; i < MMU_FMT_MAX_SUB_LEVELS; ++i)
77         {
78             const GMMU_FMT_PDE *pPdeFmt = &pFam->pdeMulti.subLevels[i];
79             gmmuFieldSetAperture(&pPdeFmt->fldAperture, GMMU_APERTURE_INVALID,
80                                  pFam->sparsePdeMulti.v8);
81             // Set PDE PCF sparse bit only for sub-level 0 for PdeMulti
82             if (i == 0)
83             {
84                 nvFieldSet32(&pPdeFmt->fldPdePcf, pdePcfHw, pFam->sparsePdeMulti.v8);
85             }
86         }
87 
88         // 3.Initialize nv4kPte
89         ptePcfSw |= (1 << SW_MMU_PCF_NV4K_IDX);
90         nvFieldSetBool(&pFam->pte.fldValid, NV_FALSE, pFam->nv4kPte.v8);
91         NV_ASSERT_OR_RETURN((kgmmuTranslatePtePcfFromSw_HAL(pKernelGmmu, ptePcfSw, &ptePcfHw) == NV_OK),
92                              NV_ERR_INVALID_ARGUMENT);
93         nvFieldSet32(&pFam->pte.fldPtePcf, ptePcfHw, pFam->nv4kPte.v8);
94 
95         // 4.Initialize sparsePte
96         ptePcfSw = (1 << SW_MMU_PCF_SPARSE_IDX);
97         nvFieldSetBool(&pFam->pte.fldValid, NV_FALSE, pFam->sparsePte.v8);
98         NV_ASSERT_OR_RETURN((kgmmuTranslatePtePcfFromSw_HAL(pKernelGmmu, ptePcfSw, &ptePcfHw) == NV_OK),
99                              NV_ERR_INVALID_ARGUMENT);
100         nvFieldSet32(&pFam->pte.fldPtePcf, ptePcfHw, pFam->sparsePte.v8);
101     }
102 
103     return NV_OK;
104 }
105 
106 #define PTE_PCF_INVALID_LIST(fn) \
107         fn(INVALID) \
108         fn(NO_VALID_4KB_PAGE) \
109         fn(SPARSE) \
110         fn(MAPPING_NOWHERE)
111 
112 #define PTE_PCF_VALID_LIST(fn) \
113         fn(PRIVILEGE_RW_ATOMIC_CACHED_ACD) \
114         fn(PRIVILEGE_RW_ATOMIC_CACHED_ACE) \
115         fn(PRIVILEGE_RW_ATOMIC_UNCACHED_ACD) \
116         fn(PRIVILEGE_RW_ATOMIC_UNCACHED_ACE) \
117         fn(PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACE) \
118         fn(PRIVILEGE_RW_NO_ATOMIC_CACHED_ACE) \
119         fn(PRIVILEGE_RO_ATOMIC_UNCACHED_ACE) \
120         fn(PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACE) \
121         fn(PRIVILEGE_RO_NO_ATOMIC_CACHED_ACE) \
122         fn(REGULAR_RW_ATOMIC_CACHED_ACD) \
123         fn(REGULAR_RW_ATOMIC_CACHED_ACE) \
124         fn(REGULAR_RW_ATOMIC_UNCACHED_ACD) \
125         fn(REGULAR_RW_ATOMIC_UNCACHED_ACE) \
126         fn(REGULAR_RW_NO_ATOMIC_CACHED_ACD) \
127         fn(REGULAR_RW_NO_ATOMIC_CACHED_ACE) \
128         fn(REGULAR_RW_NO_ATOMIC_UNCACHED_ACD) \
129         fn(REGULAR_RW_NO_ATOMIC_UNCACHED_ACE) \
130         fn(REGULAR_RO_ATOMIC_CACHED_ACD) \
131         fn(REGULAR_RO_ATOMIC_CACHED_ACE) \
132         fn(REGULAR_RO_ATOMIC_UNCACHED_ACD) \
133         fn(REGULAR_RO_ATOMIC_UNCACHED_ACE) \
134         fn(REGULAR_RO_NO_ATOMIC_CACHED_ACD) \
135         fn(REGULAR_RO_NO_ATOMIC_CACHED_ACE) \
136         fn(REGULAR_RO_NO_ATOMIC_UNCACHED_ACD) \
137         fn(REGULAR_RO_NO_ATOMIC_UNCACHED_ACE)
138 
139 #define PTE_PCF_HW_FROM_SW(name) \
140         case (SW_MMU_PTE_PCF_##name): \
141         { \
142             *pPtePcfHw = NV_MMU_VER3_PTE_PCF_##name; \
143             break; \
144         }
145 
146 #define PTE_PCF_SW_FROM_HW(name) \
147         case (NV_MMU_VER3_PTE_PCF_##name): \
148         { \
149             *pPtePcfSw = SW_MMU_PTE_PCF_##name; \
150             break; \
151         }
152 
153 //
154 // Takes a SW PTE PCF and translates to HW PTE PCF
155 // If bit patterns is not supported by HW, return NV_ERR_NOT_SUPPORTED
156 //
157 NV_STATUS
158 kgmmuTranslatePtePcfFromSw_GH100
159 (
160     KernelGmmu *pKernelGmmu,
161     NvU32    ptePcfSw,
162     NvU32   *pPtePcfHw
163 )
164 {
165     switch (ptePcfSw)
166     {
167         PTE_PCF_INVALID_LIST(PTE_PCF_HW_FROM_SW)
168         PTE_PCF_VALID_LIST(PTE_PCF_HW_FROM_SW)
169 
170         default:
171         {
172             NV_PRINTF(LEVEL_ERROR, "Unsupported SW PTE PCF pattern requested : %x\n", ptePcfSw);
173             return NV_ERR_NOT_SUPPORTED;
174         }
175     }
176 
177     return NV_OK;
178 }
179 
180 NV_STATUS
181 kgmmuTranslatePtePcfFromHw_GH100
182 (
183     KernelGmmu *pKernelGmmu,
184     NvU32       ptePcfHw,
185     NvBool      bPteValid,
186     NvU32      *pPtePcfSw
187 )
188 {
189     if (!bPteValid)
190     {
191         switch (ptePcfHw)
192         {
193             PTE_PCF_INVALID_LIST(PTE_PCF_SW_FROM_HW)
194 
195             default: return NV_ERR_NOT_SUPPORTED;
196         }
197     }
198     else
199     {
200         switch (ptePcfHw)
201         {
202             PTE_PCF_VALID_LIST(PTE_PCF_SW_FROM_HW)
203 
204             default:
205             {
206                 NV_PRINTF(LEVEL_ERROR, "Unsupported HW PTE PCF pattern requested : %x\n", ptePcfHw);
207                 return NV_ERR_NOT_SUPPORTED;
208             }
209         }
210     }
211 
212     return NV_OK;
213 }
214 
215 #define PDE_PCF_INVALID_LIST(fn) \
216         fn(INVALID_ATS_ALLOWED) \
217         fn(SPARSE_ATS_ALLOWED) \
218         fn(INVALID_ATS_NOT_ALLOWED) \
219         fn(SPARSE_ATS_NOT_ALLOWED)
220 
221 #define PDE_PCF_VALID_LIST(fn) \
222         fn(VALID_CACHED_ATS_ALLOWED) \
223         fn(VALID_CACHED_ATS_NOT_ALLOWED) \
224         fn(VALID_UNCACHED_ATS_ALLOWED) \
225         fn(VALID_UNCACHED_ATS_NOT_ALLOWED)
226 
227 #define PDE_PCF_HW_FROM_SW(name) \
228         case (SW_MMU_PDE_PCF_##name): \
229         { \
230             *pPdePcfHw = NV_MMU_VER3_PDE_PCF_##name; \
231             break; \
232         }
233 
234 #define PDE_PCF_SW_FROM_HW(name) \
235         case (NV_MMU_VER3_PDE_PCF_##name): \
236         { \
237             *pPdePcfSw = SW_MMU_PDE_PCF_##name; \
238             break; \
239         }
240 
241 //
242 // Takes a SW PDE PCF and translates to HW PDE PCF
243 // If a bit pattern is not supported by HW, return NV_ERR_NOT_SUPPORTED
244 //
245 NV_STATUS
246 kgmmuTranslatePdePcfFromSw_GH100
247 (
248     KernelGmmu *pKernelGmmu,
249     NvU32       pdePcfSw,
250     NvU32      *pPdePcfHw
251 )
252 {
253     switch (pdePcfSw)
254     {
255         PDE_PCF_INVALID_LIST(PDE_PCF_HW_FROM_SW)
256         PDE_PCF_VALID_LIST(PDE_PCF_HW_FROM_SW)
257 
258         default: return NV_ERR_NOT_SUPPORTED;
259     }
260 
261     return NV_OK;
262 }
263 
264 //
265 // Takes a HW PDE PCF and translates to SW PDE PCF
266 // If a bit pattern is not supported by SW, return NV_ERR_NOT_SUPPORTED
267 //
268 NV_STATUS
269 kgmmuTranslatePdePcfFromHw_GH100
270 (
271     KernelGmmu     *pKernelGmmu,
272     NvU32           pdePcfHw,
273     GMMU_APERTURE   aperture,
274     NvU32          *pPdePcfSw
275 )
276 {
277     if (aperture == GMMU_APERTURE_INVALID)
278     {
279         switch (pdePcfHw)
280         {
281             PDE_PCF_INVALID_LIST(PDE_PCF_SW_FROM_HW)
282 
283             default: return NV_ERR_NOT_SUPPORTED;
284         }
285     }
286     else
287     {
288         switch (pdePcfHw)
289         {
290             PDE_PCF_VALID_LIST(PDE_PCF_SW_FROM_HW)
291 
292             default: return NV_ERR_NOT_SUPPORTED;
293         }
294     }
295 
296     return NV_OK;
297 }
298 
299 /*
300  * @brief   Validates fabric base address.
301  *
302  * @param   pKernelGmmu
303  * @param   fabricBaseAddr
304  *
305  * @returns On success, NV_OK.
306  *          On failure, returns NV_ERR_XXX.
307  */
308 NV_STATUS
309 kgmmuValidateFabricBaseAddress_GH100
310 (
311     KernelGmmu *pKernelGmmu,
312     NvU64       fabricBaseAddr
313 )
314 {
315     OBJGPU        *pGpu = ENG_GET_GPU(pKernelGmmu);
316     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
317     NvU64 fbSizeBytes;
318 
319     fbSizeBytes = pMemoryManager->Ram.fbTotalMemSizeMb << 20;
320 
321     //
322     // Hopper SKUs will be paired with NVSwitches (Laguna Seca) supporting 2K
323     // mapslots that can cover 512GB each. Make sure that the fabric base
324     // address being used is valid to cover whole frame buffer.
325     //
326 
327     // Check if fabric address is aligned to mapslot size.
328     if (fabricBaseAddr & (NVBIT64(39) - 1))
329     {
330         return NV_ERR_INVALID_ARGUMENT;
331     }
332 
333     // Align fbSize to mapslot size.
334     fbSizeBytes = RM_ALIGN_UP(fbSizeBytes, NVBIT64(39));
335 
336     return NV_OK;
337 }
338 
339 /*!
340  * @brief Get the engine ID associated with the Graphics Engine
341  */
342 NvU32
343 kgmmuGetGraphicsEngineId_GH100
344 (
345     KernelGmmu *pKernelGmmu
346 )
347 {
348     return NV_PFAULT_MMU_ENG_ID_GRAPHICS;
349 }
350 
351 NV_STATUS
352 kgmmuGetFaultRegisterMappings_GH100
353 (
354     OBJGPU     *pGpu,
355     KernelGmmu *pKernelGmmu,
356     NvU32       index,
357     NvP64      *pFaultBufferGet,
358     NvP64      *pFaultBufferPut,
359     NvP64      *pFaultBufferInfo,
360     NvP64      *pHubIntr,
361     NvP64      *pHubIntrEnSet,
362     NvP64      *pHubIntrEnClear,
363     NvU32      *faultMask,
364     NvP64      *pPrefetchCtrl
365 )
366 {
367     DEVICE_MAPPING *pMapping    = gpuGetDeviceMapping(pGpu, DEVICE_INDEX_GPU, 0);
368     NvP64           bar0Mapping = NV_PTR_TO_NvP64(pMapping->gpuNvAddr);
369 
370     NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
371 
372     //
373     // If Hopper CC is not enabled or GSP doesn't entirely own the HW fault buffers
374     // use the Turing HAL
375     //
376     if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
377     {
378         return kgmmuGetFaultRegisterMappings_TU102(pGpu, pKernelGmmu, index,
379                                                    pFaultBufferGet, pFaultBufferPut,
380                                                    pFaultBufferInfo, pHubIntr,
381                                                    pHubIntrEnSet, pHubIntrEnClear,
382                                                    faultMask, pPrefetchCtrl);
383     }
384 
385     *pFaultBufferGet = 0;
386     *pFaultBufferInfo = 0;
387     *pHubIntr = 0;
388     *pHubIntrEnSet = 0;
389     *pHubIntrEnClear = 0;
390     *faultMask = 0;
391     *pPrefetchCtrl = 0;
392 
393     //
394     // When Hopper CC is enabled, we repurpose the access counter registers to
395     // hold the PUT pointer of the shadow buffers. Only GSP-RM can write the
396     // PUT pointer to these PRIs. CPU has read-only access to these PRIs
397     //
398     if (index == REPLAYABLE_FAULT_BUFFER)
399     {
400         Intr *pIntr      = GPU_GET_INTR(pGpu);
401         NvU32 intrVector = intrGetVectorFromEngineId(pGpu, pIntr, MC_ENGINE_IDX_REPLAYABLE_FAULT_CPU, NV_FALSE);
402         struct GMMU_FAULT_BUFFER *pFaultBuffer;
403         GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuf;
404         FAULT_BUFFER_SHARED_MEMORY *pFaultBufSharedMem;
405         NvU32 leafReg;
406         NvU32 leafBit;
407 
408         leafReg = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVector);
409         leafBit = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVector);
410 
411         pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF];
412         pClientShadowFaultBuf =
413             KERNEL_POINTER_FROM_NvP64(GMMU_CLIENT_SHADOW_FAULT_BUFFER *,
414                         pFaultBuffer->pClientShadowFaultBuffer[index]);
415 
416         pFaultBufSharedMem =
417             KERNEL_POINTER_FROM_NvP64(FAULT_BUFFER_SHARED_MEMORY *,
418                         pClientShadowFaultBuf->pFaultBufferSharedMemoryAddress);
419 
420         *pHubIntr = NvP64_PLUS_OFFSET(bar0Mapping,
421                      GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF(leafReg)));
422         *pHubIntrEnSet = NvP64_PLUS_OFFSET(bar0Mapping,
423                      GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_SET(leafReg)));
424         *pHubIntrEnClear = NvP64_PLUS_OFFSET(bar0Mapping,
425                      GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_CLEAR(leafReg)));
426         *faultMask = NVBIT(leafBit);
427         *pFaultBufferGet = (NvU32*) &(pFaultBufSharedMem->swGetIndex);
428         *pFaultBufferPut = NvP64_PLUS_OFFSET(bar0Mapping,
429                      GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_PRIV_ACCESS_COUNTER_NOTIFY_BUFFER_HI));
430     }
431     else if (index == NON_REPLAYABLE_FAULT_BUFFER)
432     {
433         *pFaultBufferPut = NvP64_PLUS_OFFSET(bar0Mapping,
434                      GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_PRIV_ACCESS_COUNTER_NOTIFY_BUFFER_LO));
435     }
436     else
437     {
438         NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
439     }
440 
441     return NV_OK;
442 }
443 
444 NV_STATUS
445 kgmmuFaultBufferAllocSharedMemory_GH100
446 (
447     OBJGPU *pGpu,
448     KernelGmmu *pKernelGmmu,
449     FAULT_BUFFER_TYPE index
450 )
451 {
452     NV_STATUS status;
453     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
454     MEMORY_DESCRIPTOR *pMemDesc;
455     NvU64 flags = MEMDESC_FLAGS_NONE;
456 
457     if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
458     {
459         NV_PRINTF(LEVEL_ERROR, "Fault-Buffer is disabled. Flush Seq memory cannot be created\n");
460         NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_STATE);
461     }
462 
463     if (index != REPLAYABLE_FAULT_BUFFER)
464     {
465         return NV_OK;
466     }
467 
468     if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
469     {
470         return NV_OK;
471     }
472 
473     //
474     // On systems with SEV enabled, the fault buffer flush sequence memory should be allocated
475     // in unprotected sysmem as GSP will be reading this location to check whether the Replayable buffer is full.
476     //
477     flags |= MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY;
478 
479     pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index];
480     status = memdescCreate(&pMemDesc, pGpu,
481                            sizeof(FAULT_BUFFER_SHARED_MEMORY), RM_PAGE_SIZE,
482                            NV_FALSE, ADDR_SYSMEM, NV_MEMORY_UNCACHED,
483                            flags);
484     if (status != NV_OK)
485     {
486         return status;
487     }
488 
489     memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_131,
490                     pMemDesc);
491     if (status != NV_OK)
492     {
493         goto destroy_memdesc;
494     }
495 
496     status = memdescMap(pMemDesc, 0,
497                         memdescGetSize(pMemDesc),
498                         NV_TRUE, NV_PROTECT_READ_WRITE,
499                         &pClientShadowFaultBuffer->pFaultBufferSharedMemoryAddress,
500                         &pClientShadowFaultBuffer->pFaultBufferSharedMemoryPriv);
501     if (status != NV_OK)
502     {
503         goto free_memory;
504     }
505 
506     pClientShadowFaultBuffer->pFaultBufferSharedMemDesc = pMemDesc;
507 
508     return NV_OK;
509 
510 free_memory:
511     memdescFree(pMemDesc);
512 
513 destroy_memdesc:
514     memdescDestroy(pMemDesc);
515 
516     return status;
517 }
518 
519 void
520 kgmmuFaultBufferFreeSharedMemory_GH100
521 (
522     OBJGPU *pGpu,
523     KernelGmmu *pKernelGmmu,
524     FAULT_BUFFER_TYPE index
525 )
526 {
527     MEMORY_DESCRIPTOR *pMemDesc;
528     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
529 
530     if (index != REPLAYABLE_FAULT_BUFFER)
531     {
532         return;
533     }
534 
535     if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
536     {
537         return;
538     }
539 
540     pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index];
541     pMemDesc = pClientShadowFaultBuffer->pFaultBufferSharedMemDesc;
542 
543     memdescUnmap(pMemDesc,
544                  NV_TRUE, osGetCurrentProcess(),
545                  pClientShadowFaultBuffer->pFaultBufferSharedMemoryAddress,
546                  pClientShadowFaultBuffer->pFaultBufferSharedMemoryPriv);
547 
548     memdescFree(pMemDesc);
549     memdescDestroy(pMemDesc);
550     return;
551 }
552 
553 /*
554  * @brief GSP client can use this function to initiate a replayable fault buffer flush when the
555  * HW fault buffer is owned by GSP.
556  */
557 NV_STATUS
558 kgmmuIssueReplayableFaultBufferFlush_GH100
559 (
560     OBJGPU *pGpu,
561     KernelGmmu *pKernelGmmu
562 )
563 {
564     KernelGsp *pKernelGsp = GPU_GET_KERNEL_GSP(pGpu);
565 
566     if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu) || !IS_GSP_CLIENT(pGpu))
567     {
568         return NV_ERR_NOT_SUPPORTED;
569     }
570 
571     return kgspIssueNotifyOp_HAL(pGpu, pKernelGsp, GSP_NOTIFY_OP_FLUSH_REPLAYABLE_FAULT_BUFFER_OPCODE, NULL, 0);
572 }
573 
574 /*
575  * @brief The GSP client can use this function to toggle the prefetch ctrl register state.
576  * The write of the register will be performed by GSP.
577  *
578  * @param[in]  pGpu         OBJGPU pointer
579  * @param[in]  pKernelGmmu  KernelGmmu pointer
580  * @param[in]  bEnable      Enable/Disable fault on prefetch.
581  */
582 NV_STATUS
583 kgmmuToggleFaultOnPrefetch_GH100
584 (
585     OBJGPU *pGpu,
586     KernelGmmu *pKernelGmmu,
587     NvBool bEnable
588 )
589 {
590     KernelGsp *pKernelGsp = GPU_GET_KERNEL_GSP(pGpu);
591     NvU32 arg = !!bEnable;
592 
593     if (!IS_GSP_CLIENT(pGpu))
594     {
595         return NV_ERR_NOT_SUPPORTED;
596     }
597 
598     return kgspIssueNotifyOp_HAL(pGpu, pKernelGsp, GSP_NOTIFY_OP_TOGGLE_FAULT_ON_PREFETCH_OPCODE, &arg, 1 /* argc */);
599 }
600 
601 /*
602  * @brief When Hopper Confidential Compute is enabled, the put index of the
603  *        client replayable/non-replayable shadow buffers gets stored in the
604  *        access counter PRIs. This function is used by Kernel RM to read the put index.
605  *
606  * @param[in]  pGpu         OBJGPU pointer
607  * @param[in]  pKernelGmmu  KernelGmmu pointer
608  * @param[in]  type         Replayable/Non-replayable fault buffer
609  *
610  * @returns NvU32
611  */
612 NvU32
613 kgmmuReadShadowBufPutIndex_GH100
614 (
615     OBJGPU            *pGpu,
616     KernelGmmu        *pKernelGmmu,
617     FAULT_BUFFER_TYPE  type
618 )
619 {
620     NvU32 val;
621     if (type == REPLAYABLE_FAULT_BUFFER)
622     {
623         val = GPU_VREG_RD32(pGpu, NV_VIRTUAL_FUNCTION_PRIV_REPLAYABLE_FAULT_SHADOW_BUFFER_PUT);
624     }
625     else
626     {
627         val = GPU_VREG_RD32(pGpu, NV_VIRTUAL_FUNCTION_PRIV_NON_REPLAYABLE_FAULT_SHADOW_BUFFER_PUT);
628         val = DRF_VAL(_VIRTUAL_FUNCTION_PRIV, _NON_REPLAYABLE_FAULT_SHADOW_BUFFER_PUT, _PTR, val);
629     }
630     return val;
631 }
632 
633 /*!
634  * @brief Check if the given engineID is BAR1
635  *
636  * @param[in] pKernelGmmu  KernelGmmu object
637  * @param[in] engineID     Engine ID
638  *
639  * @return True if BAR1
640  */
641 NvBool
642 kgmmuIsFaultEngineBar1_GH100
643 (
644     KernelGmmu *pKernelGmmu,
645     NvU32       engineID
646 )
647 {
648     return (engineID == NV_PFAULT_MMU_ENG_ID_BAR1);
649 }
650 
651 /*!
652  * @brief Check if the given engineID is BAR2
653  *
654  * @param[in] pKernelGmmu  KernelGmmu object
655  * @param[in] engineID     Engine ID
656  *
657  * @return True if BAR2
658  */
659 NvBool
660 kgmmuIsFaultEngineBar2_GH100
661 (
662     KernelGmmu *pKernelGmmu,
663     NvU32       engineID
664 )
665 {
666     return (engineID == NV_PFAULT_MMU_ENG_ID_BAR2);
667 }
668 
669 /*!
670  * @brief Check if the given engineID is PHYSICAL
671  *
672  * @param[in] pKernelGmmu  KernelGmmu object
673  * @param[in] engineID     Engine ID
674  *
675  * @return True if PHYSICAL
676  */
677 NvBool
678 kgmmuIsFaultEnginePhysical_GH100
679 (
680     KernelGmmu *pKernelGmmu,
681     NvU32       engineID
682 )
683 {
684     return (engineID == NV_PFAULT_MMU_ENG_ID_PHYSICAL);
685 }
686 
687 NvU32
688 kgmmuReadClientShadowBufPutIndex_GH100
689 (
690     OBJGPU            *pGpu,
691     KernelGmmu        *pKernelGmmu,
692     NvU32              gfid,
693     FAULT_BUFFER_TYPE  type
694 )
695 {
696     return 0;
697 }
698 
699 void
700 kgmmuWriteClientShadowBufPutIndex_GH100
701 (
702     OBJGPU            *pGpu,
703     KernelGmmu        *pKernelGmmu,
704     NvU32              gfid,
705     FAULT_BUFFER_TYPE  type,
706     NvU32              putIndex
707 )
708 {
709 }
710 
711 /*
712  * @brief Copies a single fault packet from the replayable/non-replayable
713  *        HW fault buffer to the corresponding client shadow buffer
714  *
715  * @param[in]  pFaultBuffer        Pointer to GMMU_FAULT_BUFFER
716  * @param[in]  type                Replayable/Non-replayable fault type
717  * @param[in]  getIndex            Get pointer of the HW fault buffer
718  * @param[in]  shadowBufPutIndex   Put pointer of the shadow buffer
719  * @param[in]  maxBufferEntries    Maximum possible entries in the HW buffer
720  * @param[in]  pThreadState        Pointer to THREAD_STATE_NODE
721  * @param[out] pFaultsCopied       Number of fault packets copied by the function
722  *
723  * @returns NV_STATUS
724  */
725 NV_STATUS
726 kgmmuCopyFaultPacketToClientShadowBuffer_GH100
727 (
728     OBJGPU                   *pGpu,
729     KernelGmmu               *pKernelGmmu,
730     struct GMMU_FAULT_BUFFER *pFaultBuffer,
731     FAULT_BUFFER_TYPE         type,
732     NvU32                     getIndex,
733     NvU32                     shadowBufPutIndex,
734     NvU32                     maxBufferEntries,
735     THREAD_STATE_NODE        *pThreadState,
736     NvU32                    *pFaultsCopied
737 )
738 {
739     struct HW_FAULT_BUFFER *pHwFaultBuffer = NULL;
740     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuf = NULL;
741     GMMU_FAULT_PACKET faultPacket;
742     NvU32 faultPacketsPerPage;
743     NvU32 faultPacketPageIndex;
744     NvU32 faultPacketPageOffset;
745     void *pSrc;
746     NvU8 *pDst;
747     NV_STATUS status;
748     NvU8 *pDstMetadata;
749     NvU32 metadataStartIndex;
750     NvU32 metadataPerPage;
751     NvU32 metadataPageIndex;
752     NvU32 metadataPageOffset;
753     NvU8  validBit = 1;
754     void *pCslCtx = NULL;
755 
756     if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
757     {
758         return kgmmuCopyFaultPacketToClientShadowBuffer_GV100(pGpu, pKernelGmmu,
759                                                               pFaultBuffer,
760                                                               type,
761                                                               getIndex,
762                                                               shadowBufPutIndex,
763                                                               maxBufferEntries,
764                                                               pThreadState,
765                                                               pFaultsCopied);
766     }
767 
768     *pFaultsCopied = 0;
769 
770     pHwFaultBuffer = &pFaultBuffer->hwFaultBuffers[type];
771     pClientShadowFaultBuf = pFaultBuffer->pClientShadowFaultBuffer[type];
772 
773     // Read the fault packet from HW buffer
774     pSrc = kgmmuFaultBufferGetFault_HAL(pGpu, pKernelGmmu, pHwFaultBuffer, getIndex);
775     portMemCopy(&faultPacket, sizeof(GMMU_FAULT_PACKET), pSrc, sizeof(GMMU_FAULT_PACKET));
776 
777     //
778     // The following is the sequence to be followed for replayable faults
779     // as per production design when Hopper CC is enabled
780     //
781     if (type == REPLAYABLE_FAULT_BUFFER)
782     {
783         NvU32 nextGetIndex;
784 
785         kgmmuFaultBufferClearPackets_HAL(pGpu, pKernelGmmu, pHwFaultBuffer, getIndex, 1);
786 
787         //
788         // Ensure all writes to the current entry are completed before updating the
789         // GET pointer.
790         //
791         portAtomicMemoryFenceStore();
792 
793         nextGetIndex = (getIndex + 1) % maxBufferEntries;
794 
795         // Update cached GET to a valid value.
796         pHwFaultBuffer->cachedGetIndex = nextGetIndex;
797 
798         // Increment the GET pointer to enable HW to write new fault packets
799         kgmmuWriteFaultBufferGetPtr_HAL(pGpu, pKernelGmmu, type, pHwFaultBuffer->cachedGetIndex, pThreadState);
800 
801         // Check if there is space in the shadow buffer
802         if (kgmmuIsReplayableShadowFaultBufferFull_HAL(pGpu, pKernelGmmu,
803                                                        pClientShadowFaultBuf,
804                                                        shadowBufPutIndex,
805                                                        maxBufferEntries))
806         {
807             // The design allows the SW Repalyable shadow fault buffer to overflow.
808             return NV_OK;
809         }
810     }
811 
812     faultPacketsPerPage = RM_PAGE_SIZE / sizeof(GMMU_FAULT_PACKET);
813     faultPacketPageIndex = shadowBufPutIndex / faultPacketsPerPage;
814     faultPacketPageOffset = shadowBufPutIndex % faultPacketsPerPage;
815 
816     pDst = KERNEL_POINTER_FROM_NvP64(NvU8 *,
817                pClientShadowFaultBuf->pBufferPages[faultPacketPageIndex].pAddress);
818     pDst += (faultPacketPageOffset * sizeof(GMMU_FAULT_PACKET));
819 
820     //
821     // Metadata is packed at the end of the buffer.
822     // Calculate the page index and offset at which RM needs to fill the metadata
823     // and copy it over.
824     //
825     metadataStartIndex = pClientShadowFaultBuf->metadataStartIndex;
826     metadataPerPage = RM_PAGE_SIZE / sizeof(GMMU_FAULT_PACKET_METADATA);
827     metadataPageIndex = shadowBufPutIndex / metadataPerPage;
828     metadataPageOffset = shadowBufPutIndex % faultPacketsPerPage;
829 
830     pDstMetadata = KERNEL_POINTER_FROM_NvP64(NvU8 *,
831                    pClientShadowFaultBuf->pBufferPages[metadataStartIndex + metadataPageIndex].pAddress);
832     pDstMetadata += (metadataPageOffset * sizeof(GMMU_FAULT_PACKET_METADATA));
833 
834     // Sanity check client reset the Valid bit.
835     if (pDstMetadata[GMMU_FAULT_PACKET_METADATA_VALID_IDX] != 0)
836     {
837         NV_PRINTF(LEVEL_ERROR, "Plaintext valid bit not reset by client.\n");
838         return NV_ERR_INVALID_STATE;
839     }
840 
841     pCslCtx = kgmmuGetShadowFaultBufferCslContext(pGpu, pKernelGmmu, type);
842     if (pCslCtx == NULL)
843     {
844         NV_PRINTF(LEVEL_ERROR, "CSL context for type 0x%x unexpectedtly NULL\n", type);
845         return NV_ERR_INVALID_STATE;
846     }
847 
848     status = ccslEncrypt(pCslCtx,
849                          sizeof(GMMU_FAULT_PACKET),
850                          (NvU8*) &faultPacket,
851                          &validBit,
852                          GMMU_FAULT_PACKET_METADATA_VALID_SIZE,
853                          pDst,
854                          &pDstMetadata[GMMU_FAULT_PACKET_METADATA_AUTHTAG_IDX]);
855     if (status != NV_OK)
856     {
857         if (status == NV_ERR_INSUFFICIENT_RESOURCES)
858         {
859             // IV overflow is considered fatal.
860             NV_PRINTF(LEVEL_ERROR, "Fatal error detected in fault buffer packet encryption: IV overflow!\n");
861             confComputeSetErrorState(pGpu, GPU_GET_CONF_COMPUTE(pGpu));
862         }
863         else
864         {
865             NV_PRINTF(LEVEL_ERROR, "Error detected in fault buffer packet encryption: 0x%x\n", status);
866         }
867         return status;
868     }
869 
870     //
871     // Ensure that the encrypted packet and authTag have reached point of coherence
872     // before writing the plaintext valid bit.
873     //
874     portAtomicMemoryFenceStore();
875 
876     // Write the valid bit and increment the number of faults copied.
877     portMemCopy((void*)&pDstMetadata[GMMU_FAULT_PACKET_METADATA_VALID_IDX],
878                 GMMU_FAULT_PACKET_METADATA_VALID_SIZE,
879                 &validBit,
880                 GMMU_FAULT_PACKET_METADATA_VALID_SIZE);
881 
882     *pFaultsCopied = 1;
883 
884     return NV_OK;
885 }
886 
887 /*
888  * @brief Checks if the client shadow buffer has space
889  *
890  * @param[in]  pClientShadowFaultBuf  Pointer to the shadow buffer
891  * @param[in]  shadowBufPutIndex      Put index inside shadow buffer
892  * @param[in]  maxBufferEntries       Maximum possible entries in the HW buffer
893  *
894  * @returns NV_TRUE/NV_FALSE
895  */
896 NvBool
897 kgmmuIsReplayableShadowFaultBufferFull_GH100
898 (
899     OBJGPU                          *pGpu,
900     KernelGmmu                      *pKernelGmmu,
901     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuf,
902     NvU32                            shadowBufPutIndex,
903     NvU32                            maxBufferEntries
904 )
905 {
906     FAULT_BUFFER_SHARED_MEMORY *pFaultBufSharedMem;
907 
908     pFaultBufSharedMem =
909         KERNEL_POINTER_FROM_NvP64(FAULT_BUFFER_SHARED_MEMORY *,
910                         pClientShadowFaultBuf->pFaultBufferSharedMemoryAddress);
911 
912     return (pFaultBufSharedMem->swGetIndex ==
913             ((shadowBufPutIndex + 1) % maxBufferEntries)) ? NV_TRUE : NV_FALSE;
914 }
915 
916 /*!
917  * @brief Get the engine ID associated with the min CE
918  *
919  * @param[in] pKenrelGmmu  KernelGmmu object
920  *
921  * return engine ID of the min CE
922  */
923 NvU32
924 kgmmuGetMinCeEngineId_GH100
925 (
926     KernelGmmu *pKernelGmmu
927 )
928 {
929     return NV_PFAULT_MMU_ENG_ID_CE0;
930 }
931 
932 /*!
933  * @brief Get the engine ID associated with the max CE
934  *
935  * @param[in] pGpu         OBJGPU object
936  * @param[in] pKenrelGmmu  KernelGmmu object
937  *
938  * return engine ID of the max CE
939  */
940 NvU32
941 kgmmuGetMaxCeEngineId_GH100
942 (
943     OBJGPU     *pGpu,
944     KernelGmmu *pKernelGmmu
945 )
946 {
947     return NV_PFAULT_MMU_ENG_ID_CE9;
948 }
949 
950 /**
951   * @brief  Sign extend a fault address to a supported width as per UVM requirements
952   */
953 void
954 kgmmuSignExtendFaultAddress_GH100
955 (
956     OBJGPU     *pGpu,
957     KernelGmmu *pKernelGmmu,
958     NvU64      *pMmuFaultAddress
959 )
960 {
961     NvU32 cpuAddrShift   = osGetCpuVaAddrShift();
962     NvU32 gpuVaAddrShift = portUtilCountTrailingZeros64(pKernelGmmu->maxVASize);
963 
964     // Sign extend VA to ensure it's in canonical form if required
965     if (gpuVaAddrShift >= cpuAddrShift)
966     {
967         switch (pGpu->busInfo.oorArch)
968         {
969             case OOR_ARCH_X86_64:
970             case OOR_ARCH_ARM:
971             case OOR_ARCH_AARCH64:
972                 *pMmuFaultAddress = (NvU64)(((NvS64)*pMmuFaultAddress << (64 - 57)) >>
973                                             (64 - 57));
974                 break;
975             case OOR_ARCH_PPC64LE:
976                 break;
977             case OOR_ARCH_NONE:
978                 NV_ASSERT_FAILED("Invalid oor address mode type.");
979                 break;
980         }
981     }
982     else
983     {
984         NV_PRINTF(LEVEL_ERROR, "UVM has not defined what to do here, doing nothing\n");
985         NV_ASSERT(0);
986     }
987 }
988