1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24  /******************************************************************************
25 *
26 *       Kernel GMMU module header
27 *       Defines and structures used on CPU RM for the GMMU object.
28 *
29 ******************************************************************************/
30 
31 #define NVOC_KERN_GMMU_H_PRIVATE_ACCESS_ALLOWED
32 
33 #include "gpu/bif/kernel_bif.h"
34 #include "gpu/mmu/kern_gmmu.h"
35 #include "gpu/bus/kern_bus.h"
36 #include "gpu/nvlink/kernel_nvlink.h"
37 #include "gpu/mem_sys/kern_mem_sys.h"
38 #include "gpu/mem_mgr/mem_mgr.h"
39 #include "vgpu/vgpu_events.h"
40 #include "gpu/mem_mgr/mem_desc.h"
41 #include "os/os.h"
42 #include "rmapi/rmapi.h"
43 #include "gpu/gpu.h"
44 #include "nvRmReg.h"
45 #include "vgpu/rpc.h"
46 #include "kernel/gpu/intr/engine_idx.h"
47 
48 #include "kernel/gpu/conf_compute/ccsl.h"
49 
50 static void _kgmmuInitRegistryOverrides(OBJGPU *pGpu, KernelGmmu *pKernelGmmu);
51 
52 /*!
53  * KERNEL_GMMU constructor
54  *
55  * @param[in]  pGpu
56  * @param[in]  pKernelGmmu
57  * @param[in]  engDesc       Engine descriptor
58  *
59  * @return NV_OK on success, pertinent error code on failure.
60  */
61 NV_STATUS
62 kgmmuConstructEngine_IMPL(OBJGPU *pGpu, KernelGmmu *pKernelGmmu, ENGDESCRIPTOR engDesc)
63 {
64     NvU32  v;
65 
66     kgmmuDetermineMaxVASize_HAL(pGpu, pKernelGmmu);
67 
68     if (gpuIsCacheOnlyModeEnabled(pGpu))
69     {
70         pKernelGmmu->bHugePageSupported      = NV_FALSE;
71         pKernelGmmu->bPageSize512mbSupported = NV_FALSE;
72     }
73 
74     // Allocate and init MMU format families.
75     kgmmuFmtInitPdeApertures_HAL(pKernelGmmu, pKernelGmmu->pdeApertures);
76     kgmmuFmtInitPteApertures_HAL(pKernelGmmu, pKernelGmmu->pteApertures);
77 
78     for (v = 0; v < GMMU_FMT_MAX_VERSION_COUNT; ++v)
79     {
80         const NvU32 ver = g_gmmuFmtVersions[v];
81         if (kgmmuFmtIsVersionSupported_HAL(pKernelGmmu, ver))
82         {
83             GMMU_FMT_FAMILY *pFam = NULL;
84 
85             // Alloc version struct.
86             pFam = portMemAllocNonPaged(sizeof(*pFam));
87             NV_ASSERT_OR_RETURN((pFam != NULL), NV_ERR_NO_MEMORY);
88             portMemSet(pFam, 0, sizeof(*pFam));
89             pKernelGmmu->pFmtFamilies[v] = pFam;
90 
91             // Init PDE/PTE formats.
92             kgmmuFmtInitPdeMulti_HAL(pKernelGmmu, &pFam->pdeMulti, ver, pKernelGmmu->pdeApertures);
93             kgmmuFmtInitPde_HAL(pKernelGmmu, &pFam->pde, ver, pKernelGmmu->pdeApertures);
94             kgmmuFmtInitPte_HAL(pKernelGmmu, &pFam->pte, ver, pKernelGmmu->pteApertures,
95                 gpuIsUnifiedMemorySpaceEnabled(pGpu));
96 
97             kgmmuFmtInitPteComptagLine_HAL(pKernelGmmu, &pFam->pte, ver);
98         }
99         else
100         {
101             pKernelGmmu->pFmtFamilies[v] = NULL;
102         }
103     }
104 
105     NV_ASSERT_OK_OR_RETURN(kgmmuFmtInit(pKernelGmmu));
106 
107     portMemSet(&pKernelGmmu->mmuFaultBuffer, 0, sizeof(pKernelGmmu->mmuFaultBuffer));
108 
109     // Default placement for PDEs is in vidmem.
110     pKernelGmmu->PDEAperture = ADDR_FBMEM;
111     pKernelGmmu->PDEAttr = NV_MEMORY_WRITECOMBINED;
112     pKernelGmmu->PDEBAR1Aperture = ADDR_FBMEM;
113     pKernelGmmu->PDEBAR1Attr = NV_MEMORY_WRITECOMBINED;
114 
115     // Default placement for PTEs is in vidmem.
116     pKernelGmmu->PTEAperture = ADDR_FBMEM;
117     pKernelGmmu->PTEAttr = NV_MEMORY_WRITECOMBINED;
118     pKernelGmmu->PTEBAR1Aperture = ADDR_FBMEM;
119     pKernelGmmu->PTEBAR1Attr = NV_MEMORY_WRITECOMBINED;
120 
121     _kgmmuInitRegistryOverrides(pGpu, pKernelGmmu);
122 
123     return NV_OK;
124 }
125 
126 static NV_STATUS
127 _kgmmuInitStaticInfo
128 (
129     OBJGPU *pGpu,
130     KernelGmmu *pKernelGmmu
131 )
132 {
133     NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo;
134     NV_STATUS status;
135 
136     //
137     // On vGPU, all hardware management is done by the host except for full SR-IOV.
138     // Thus, only do any further HW initialization on the host.
139     //
140     if (!(IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
141           (IS_VIRTUAL_WITH_SRIOV(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu))))
142     {
143         // Init HAL specific features.
144         NV_ASSERT_OK_OR_RETURN(kgmmuFmtFamiliesInit_HAL(pGpu, pKernelGmmu));
145     }
146 
147     pStaticInfo = portMemAllocNonPaged(sizeof(*pStaticInfo));
148     NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticInfo != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
149     portMemSet(pStaticInfo, 0, sizeof(*pStaticInfo));
150 
151     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
152         kgmmuInitStaticInfo_HAL(pGpu, pKernelGmmu, pStaticInfo),
153         fail);
154 
155     pKernelGmmu->pStaticInfo = pStaticInfo;
156 
157 fail:
158     if (status != NV_OK)
159     {
160         portMemFree(pStaticInfo);
161     }
162 
163     return status;
164 }
165 
166 /*
167  * Initialize the Kernel GMMU state.
168  *
169  * @param      pGpu
170  * @param      pKernelGmmu
171  */
172 NV_STATUS kgmmuStateInitLocked_IMPL
173 (
174     OBJGPU     *pGpu,
175     KernelGmmu *pKernelGmmu
176 )
177 {
178     KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
179     NV_STATUS  status;
180 
181     if (pKernelBif != NULL)
182     {
183         // This value shouldn't change after initialization, so cache it now
184         pKernelGmmu->sysmemBaseAddress = pKernelBif->dmaWindowStartAddress;
185     }
186 
187     status = _kgmmuInitStaticInfo(pGpu, pKernelGmmu);
188     if (status != NV_OK)
189     {
190         return status;
191     }
192 
193     return status;
194 }
195 
196 static NV_STATUS
197 _kgmmuCreateGlobalVASpace
198 (
199     OBJGPU  *pGpu,
200     KernelGmmu *pKernelGmmu,
201     NvU32 flags
202 )
203 {
204     NvU32       constructFlags = VASPACE_FLAGS_NONE;
205     OBJVASPACE *pGlobalVAS     = NULL;
206     NV_STATUS   rmStatus;
207     OBJGPUGRP  *pGpuGrp        = NULL;
208 
209     // Bail out early on sleep/suspend cases
210     if (flags & GPU_STATE_FLAGS_PRESERVING)
211         return NV_OK;
212     if (!gpumgrIsParentGPU(pGpu))
213         return NV_OK;
214 
215     //
216     // We create the device vaspace at this point. Assemble the flags needed
217     // for construction.
218     //
219 
220     // Allow PTE in SYS
221     constructFlags |= VASPACE_FLAGS_RETRY_PTE_ALLOC_IN_SYS;
222 
223     constructFlags |= VASPACE_FLAGS_DEFAULT_PARAMS;
224     constructFlags |= VASPACE_FLAGS_DEFAULT_SIZE;
225     constructFlags |= DRF_DEF(_VASPACE, _FLAGS, _BIG_PAGE_SIZE, _DEFAULT);
226 
227     pGpuGrp = gpumgrGetGpuGrpFromGpu(pGpu);
228     NV_ASSERT_OR_RETURN(pGpuGrp != NULL, NV_ERR_INVALID_DATA);
229 
230     rmStatus = gpugrpCreateGlobalVASpace(pGpuGrp, pGpu,
231                                          FERMI_VASPACE_A,
232                                          0, 0,
233                                          constructFlags,
234                                          &pGlobalVAS);
235     NV_ASSERT_OR_RETURN((NV_OK == rmStatus), rmStatus);
236 
237     return NV_OK;
238 }
239 
240 static NV_STATUS
241 _kgmmuDestroyGlobalVASpace
242 (
243     OBJGPU  *pGpu,
244     KernelGmmu *pKernelGmmu,
245     NvU32 flags
246 )
247 {
248     OBJGPUGRP *pGpuGrp = NULL;
249 
250     if (flags & GPU_STATE_FLAGS_PRESERVING)
251         return NV_OK;
252 
253     pGpuGrp = gpumgrGetGpuGrpFromGpu(pGpu);
254     return gpugrpDestroyGlobalVASpace(pGpuGrp, pGpu);
255 }
256 
257 /*
258  *  Helper function to enable ComputePeerMode
259  */
260 NV_STATUS
261 kgmmuEnableComputePeerAddressing_IMPL
262 (
263     OBJGPU *pGpu,
264     KernelGmmu *pKernelGmmu,
265     NvU32 flags
266 )
267 {
268     KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
269     OBJSYS    *pSys = SYS_GET_INSTANCE();
270     NV_STATUS status = NV_OK;
271     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
272     NvBool bComputePeerMode = NV_FALSE;
273 
274     if (pSys->getProperty(pSys, PDB_PROP_SYS_NVSWITCH_IS_PRESENT) ||
275         kbusIsFlaSupported(pKernelBus))
276     {
277         bComputePeerMode = NV_TRUE;
278     }
279 
280     if (bComputePeerMode)
281     {
282         status = kgmmuEnableNvlinkComputePeerAddressing_HAL(pKernelGmmu);
283         if (status != NV_OK)
284         {
285             NV_PRINTF(LEVEL_ERROR,
286                         "Failed to enable GMMU property compute addressing for GPU %x , status:%x\n",
287                         pGpu->gpuInstance, status);
288             return status;
289         }
290 
291         status = pRmApi->Control(pRmApi,
292                                 pGpu->hInternalClient,
293                                 pGpu->hInternalSubdevice,
294                                 NV2080_CTRL_CMD_INTERNAL_NVLINK_ENABLE_COMPUTE_PEER_ADDR,
295                                 NULL, 0);
296     }
297     return status;
298 }
299 
300 /*
301  *  State Post Load
302  */
303 NV_STATUS kgmmuStatePostLoad_IMPL
304 (
305     OBJGPU *pGpu,
306     KernelGmmu *pKernelGmmu,
307     NvU32 flags
308 )
309 {
310     NV_STATUS status = NV_OK;
311 
312     status = _kgmmuCreateGlobalVASpace(pGpu, pKernelGmmu, flags);
313 
314     if (status != NV_OK)
315     {
316         NV_PRINTF(LEVEL_ERROR,
317                     "Failed to create GVASpace, status:%x\n",
318                     status);
319         return status;
320     }
321 
322     status = kgmmuEnableComputePeerAddressing(pGpu, pKernelGmmu, flags);
323 
324     if (status != NV_OK)
325     {
326         NV_PRINTF(LEVEL_ERROR,
327                     "Failed to enable compute peer addressing, status:%x\n",
328                     status);
329         return status;
330     }
331 
332     return status;
333 }
334 
335 /*
336  *  State Pre Unload
337  */
338 NV_STATUS
339 kgmmuStatePreUnload_IMPL
340 (
341     OBJGPU *pGpu,
342     KernelGmmu *pKernelGmmu,
343     NvU32 flags
344 )
345 {
346     NV_STATUS status = NV_OK;
347 
348     status = _kgmmuDestroyGlobalVASpace(pGpu, pKernelGmmu, flags);
349 
350     if (status != NV_OK)
351     {
352         NV_PRINTF(LEVEL_ERROR,
353                     "Failed to destory GVASpace, status:%x\n",
354                     status);
355         return status;
356     }
357     return status;
358 }
359 
360 /*!
361  * KernelGmmu destructor
362  *
363  * @param[in]  pKernelGmmu KernelGmmu object pointer
364  */
365 void
366 kgmmuDestruct_IMPL(KernelGmmu *pKernelGmmu)
367 {
368     NvU32       v;
369     NvU32       b;
370 
371     // Free per big page size format and format-family storage.
372     for (v = 0; v < GMMU_FMT_MAX_VERSION_COUNT; ++v)
373     {
374         if (NULL != pKernelGmmu->pFmtFamilies[v])
375         {
376             for (b = 0; b < GMMU_FMT_MAX_BIG_PAGE_SIZES; ++b)
377             {
378                 portMemFree(pKernelGmmu->pFmtFamilies[v]->pFmts[b]);
379                 pKernelGmmu->pFmtFamilies[v]->pFmts[b] = NULL;
380             }
381             portMemFree(pKernelGmmu->pFmtFamilies[v]);
382         }
383     }
384 }
385 
386 void
387 kgmmuStateDestroy_IMPL(OBJGPU *pGpu, KernelGmmu *pKernelGmmu)
388 {
389     if (NULL != pKernelGmmu->pStaticInfo)
390     {
391         portMemFree((void *)pKernelGmmu->pStaticInfo);
392         pKernelGmmu->pStaticInfo = NULL;
393     }
394     if (NULL != pKernelGmmu->pWarSmallPageTable)
395     {
396         memdescFree(pKernelGmmu->pWarSmallPageTable);
397         memdescDestroy(pKernelGmmu->pWarSmallPageTable);
398         pKernelGmmu->pWarSmallPageTable = NULL;
399     }
400     if (NULL != pKernelGmmu->pWarPageDirectory0)
401     {
402         memdescFree(pKernelGmmu->pWarPageDirectory0);
403         memdescDestroy(pKernelGmmu->pWarPageDirectory0);
404         pKernelGmmu->pWarPageDirectory0 = NULL;
405     }
406 }
407 
408 /*!
409  * Initializes KERN_GMMU state based on registry key overrides
410  *
411  * @param[in]  pGpu
412  * @param[in]  pKernelGmmu
413  */
414 static void
415 _kgmmuInitRegistryOverrides(OBJGPU *pGpu, KernelGmmu *pKernelGmmu)
416 {
417     NvU32 data;
418 
419     memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC, _PDE, pGpu->instLocOverrides),
420                            "GMMU PDE",
421                            &pKernelGmmu->PDEAperture,
422                            &pKernelGmmu->PDEAttr);
423     memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC, _BAR_PDE, pGpu->instLocOverrides),
424                            "BAR1 PDE",
425                            &pKernelGmmu->PDEBAR1Aperture,
426                            &pKernelGmmu->PDEBAR1Attr);
427     memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC, _PTE, pGpu->instLocOverrides),
428                            "GMMU PTE",
429                            &pKernelGmmu->PTEAperture,
430                            &pKernelGmmu->PTEAttr);
431     memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC, _BAR_PTE, pGpu->instLocOverrides),
432                            "BAR1 PTE",
433                            &pKernelGmmu->PTEBAR1Aperture,
434                            &pKernelGmmu->PTEBAR1Attr);
435 
436     //
437     // Check if we want to disable big page size per address space
438     //
439     pKernelGmmu->bEnablePerVaspaceBigPage = IsGM20X(pGpu);
440     if (NV_OK == osReadRegistryDword(pGpu,
441                    NV_REG_STR_RM_DISABLE_BIG_PAGE_PER_ADDRESS_SPACE, &data))
442     {
443         pKernelGmmu->bEnablePerVaspaceBigPage = !data;
444     }
445 
446     if (NV_OK == osReadRegistryDword(pGpu,
447                    NV_REG_STR_FERMI_BIG_PAGE_SIZE, &data))
448     {
449         if (pGpu->optimizeUseCaseOverride !=
450             NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX_DEFAULT)
451         {
452             NV_PRINTF(LEVEL_ERROR,
453                       "The %s regkey cannot be used with the %s regkey!\n",
454                       NV_REG_STR_FERMI_BIG_PAGE_SIZE,
455                       NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX);
456             return;
457         }
458         else
459         {
460             switch (data)
461             {
462                 case NV_REG_STR_FERMI_BIG_PAGE_SIZE_64KB:
463                 case NV_REG_STR_FERMI_BIG_PAGE_SIZE_128KB:
464                     pKernelGmmu->overrideBigPageSize = data;
465                     break;
466                 default:
467                     break;
468             }
469         }
470     }
471     else if (pGpu->optimizeUseCaseOverride !=
472              NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX_DEFAULT)
473     {
474         switch (pGpu->optimizeUseCaseOverride)
475         {
476             case NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX_SPARSE_TEX:
477                 pKernelGmmu->overrideBigPageSize = RM_PAGE_SIZE_64K;
478                 break;
479             case NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX_COMPUTE:
480                 pKernelGmmu->overrideBigPageSize = RM_PAGE_SIZE_128K;
481                 break;
482             default:
483                 break;
484         }
485     }
486 
487     // Check if HW fault buffer is disabled
488     if (NV_OK == osReadRegistryDword(pGpu,
489                                      NV_REG_STR_RM_DISABLE_HW_FAULT_BUFFER, &data))
490     {
491         NV_PRINTF(LEVEL_ERROR,
492                   "Overriding HW Fault buffer state to 0x%x due to regkey!\n",
493                   data);
494         pKernelGmmu->setProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED, data);
495     }
496 
497 }
498 
499 GMMU_APERTURE
500 kgmmuGetMemAperture_IMPL
501 (
502     KernelGmmu        *pKernelGmmu,
503     MEMORY_DESCRIPTOR *pMemDesc
504 )
505 {
506     switch (memdescGetAddressSpace(pMemDesc))
507     {
508         case ADDR_FBMEM:
509             return GMMU_APERTURE_VIDEO;
510         case ADDR_SYSMEM:
511             if (NV_MEMORY_CACHED == memdescGetCpuCacheAttrib(pMemDesc))
512             {
513                 return GMMU_APERTURE_SYS_COH;
514             }
515             return GMMU_APERTURE_SYS_NONCOH;
516         default:
517             NV_ASSERT(0);
518             return GMMU_APERTURE_INVALID;
519     }
520 }
521 
522 /*!
523  * Initialize GMMU format structures dependent on big page size.
524  */
525 NV_STATUS
526 kgmmuFmtInit_IMPL(KernelGmmu *pKernelGmmu)
527 {
528     NvU32       v;
529     NvU32       b;
530 
531     // Allocate and init MMU formats for the supported big page sizes.
532     for (v = 0; v < GMMU_FMT_MAX_VERSION_COUNT; ++v)
533     {
534         const NvU32      ver  = g_gmmuFmtVersions[v];
535         GMMU_FMT_FAMILY *pFam = pKernelGmmu->pFmtFamilies[v];
536         if (NULL != pFam)
537         {
538             for (b = 0; b < GMMU_FMT_MAX_BIG_PAGE_SIZES; ++b)
539             {
540                 const NvU32 bigPageShift = g_gmmuFmtBigPageShifts[b];
541 
542                 // Allocate +1 level for the last dual-level.
543                 const NvU32 numLevels = GMMU_FMT_MAX_LEVELS + 1;
544                 const NvU32 size = sizeof(GMMU_FMT) + sizeof(MMU_FMT_LEVEL) * numLevels;
545                 MMU_FMT_LEVEL *pLvls;
546 
547                 // Allocate format and levels in one chunk.
548                 pFam->pFmts[b] = portMemAllocNonPaged(size);
549                 NV_ASSERT_OR_RETURN((pFam->pFmts[b] != NULL), NV_ERR_NO_MEMORY);
550                 portMemSet(pFam->pFmts[b], 0, size);
551 
552                 // Levels stored contiguously after the format struct.
553                 pLvls = (MMU_FMT_LEVEL *)(pFam->pFmts[b] + 1);
554 
555                 // Common init.
556                 pFam->pFmts[b]->version    = ver;
557                 pFam->pFmts[b]->pRoot      = pLvls;
558                 pFam->pFmts[b]->pPdeMulti  = &pFam->pdeMulti;
559                 pFam->pFmts[b]->pPde       = &pFam->pde;
560                 pFam->pFmts[b]->pPte       = &pFam->pte;
561 
562                 kgmmuFmtInitLevels_HAL(pKernelGmmu, pLvls, numLevels, ver, bigPageShift);
563                 kgmmuFmtInitCaps_HAL(pKernelGmmu, pFam->pFmts[b]);
564             }
565         }
566     }
567 
568     return NV_OK;
569 }
570 
571 /*!
572  * Retrieve GMMU format family based on version.
573  */
574 const GMMU_FMT_FAMILY *
575 kgmmuFmtGetFamily_IMPL(KernelGmmu *pKernelGmmu, NvU32 version)
576 {
577     NvU32       v;
578 
579     // Find a matching format.
580     for (v = GMMU_FMT_MAX_VERSION_COUNT; v > 0; --v)
581     {
582         if (0 == version)
583         {
584             // Pick newest default version if none requested.
585             if (NULL != pKernelGmmu->pFmtFamilies[v - 1])
586             {
587                 return pKernelGmmu->pFmtFamilies[v - 1];
588             }
589         }
590         else if (g_gmmuFmtVersions[v - 1] == version)
591         {
592             return pKernelGmmu->pFmtFamilies[v - 1];
593         }
594     }
595 
596     return NULL;
597 }
598 
599 /*!
600  * Returns GMMU settings that are static after GPU state init/load is
601  * finished.
602  */
603 const NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *
604 kgmmuGetStaticInfo_IMPL
605 (
606     OBJGPU *pGpu,
607     KernelGmmu *pKernelGmmu
608 )
609 {
610     // check if state Init has not completed.
611     NV_ASSERT_OR_ELSE(pKernelGmmu != NULL, return NULL);
612 
613     return pKernelGmmu->pStaticInfo;
614 }
615 
616 /*!
617  * @brief Initializes static info data from the Physical side.
618  *
619  * @param      pGpu
620  * @param      pKernelGmmu
621  * @param[out] pStaticInfo pointer to the static info init on Physical driver.
622  */
623 NV_STATUS
624 kgmmuInitStaticInfo_KERNEL
625 (
626     OBJGPU *pGpu,
627     KernelGmmu *pKernelGmmu,
628     NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo
629 )
630 {
631     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
632     NV_STATUS status;
633 
634     status = pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice,
635                                 NV2080_CTRL_CMD_INTERNAL_GMMU_GET_STATIC_INFO,
636                                 pStaticInfo, sizeof(*pStaticInfo));
637 
638     return status;
639 }
640 
641 /*!
642  * Retrieve GMMU format based on version and big page size.
643  */
644 const GMMU_FMT *
645 kgmmuFmtGet_IMPL(KernelGmmu *pKernelGmmu, NvU32 version, NvU64 bigPageSize)
646 {
647     const GMMU_FMT_FAMILY *pFmtFamily  = kgmmuFmtGetFamily(pKernelGmmu, version);
648 
649     if (NULL != pFmtFamily)
650     {
651         NvU32 b;
652 
653         // Pick default big page size if none requested.
654         if (0 == bigPageSize)
655         {
656             //
657             // Retrieve Big Page Size. If it is not yet set, set it to 64K.
658             // Useful when this method is invoked before big page size is set.
659             //
660             if (0 == (bigPageSize = kgmmuGetBigPageSize_HAL(pKernelGmmu)))
661                 bigPageSize = NVBIT64(16);
662         }
663 
664         // Find a matching format.
665         for (b = 0; b < GMMU_FMT_MAX_BIG_PAGE_SIZES; ++b)
666         {
667             if (NVBIT64(g_gmmuFmtBigPageShifts[b]) == bigPageSize)
668             {
669                 return pFmtFamily->pFmts[b];
670             }
671         }
672     }
673 
674     return NULL;
675 }
676 
677 /*!
678  * Check if a big page size is supported.
679  */
680 NvBool
681 kgmmuFmtIsBigPageSizeSupported_IMPL(KernelGmmu *pKernelGmmu, NvU64 bigPageSize)
682 {
683     if (kgmmuIsPerVaspaceBigPageEn(pKernelGmmu))
684     {
685         return NV_TRUE;
686     }
687     return kgmmuGetBigPageSize_HAL(pKernelGmmu) == bigPageSize;
688 }
689 
690 /*!
691  * @bried Returns the latest supported MMU fmt.
692  *
693  * @param[in]  pGpu          OBJGPU pointer
694  * @param[in]  pKernelGmmu   KernelGmmu pointer
695  *
696  * @returns const GMMU_FMT*
697  */
698 const GMMU_FMT*
699 kgmmuFmtGetLatestSupportedFormat_IMPL(OBJGPU *pGpu, KernelGmmu *pKernelGmmu)
700 {
701     NvU32       v;
702     NvU32       maxFmtVersionSupported = 0;
703 
704     for (v = 0; v < GMMU_FMT_MAX_VERSION_COUNT; ++v)
705     {
706         const NvU32 ver = g_gmmuFmtVersions[v];
707         if (kgmmuFmtIsVersionSupported_HAL(pKernelGmmu, ver))
708         {
709             maxFmtVersionSupported = maxFmtVersionSupported < ver ? ver : maxFmtVersionSupported;
710         }
711     }
712 
713     return kgmmuFmtGet(pKernelGmmu, maxFmtVersionSupported, 0);
714 }
715 
716 /*!
717  * @brief Used for calculating total memory required for page tables
718           required for translating a given VA range.
719  *
720  * @param     pGpu
721  * @param     pKernelGmmu
722  * @param[in] pFmt              Pointer to GMMU format
723  * @param[in] vaBase            Start VA
724  * @param[in] vaLimit           End VA
725  * @param[in] pageSizeLockMask  Mask of page sizes locked down at VA reservation
726  *
727  * @returns total size of page tables.
728  */
729 NvU64
730 kgmmuGetSizeOfPageTables_IMPL
731 (
732     OBJGPU         *pGpu,
733     KernelGmmu     *pKernelGmmu,
734     const GMMU_FMT *pFmt,
735     NvU64           vaBase,
736     NvU64           vaLimit,
737     NvU64           pageSizeLockMask
738 )
739 {
740     const MMU_FMT_LEVEL *pPgTbl         = NULL;
741     NvU64                pgTblSize      = 0;
742     NvU64                numPgTblsCeil;
743     NvU64                numPgTblsFloor;
744     NvU64                numEntries;
745     NvU32                pageShift;
746 
747     // Loop over all page table sizes in mask
748     FOR_EACH_INDEX_IN_MASK(64, pageShift, pageSizeLockMask)
749     {
750         pPgTbl = mmuFmtFindLevelWithPageShift(pFmt->pRoot, pageShift);
751 
752         //
753         // Do not consider page directories. They are handled by
754         // @ref kgmmuGetSizeOfPageDirs.
755         //
756         if (!pPgTbl->bPageTable || (pPgTbl->numSubLevels != 0))
757         {
758             continue;
759         }
760 
761         numPgTblsCeil  = NV_DIV_AND_CEIL(vaLimit, NVBIT64(pPgTbl->virtAddrBitHi + 1)) -
762                          (vaBase / NVBIT64(pPgTbl->virtAddrBitHi + 1)) + 1;
763         numPgTblsFloor = vaLimit / NVBIT64(pPgTbl->virtAddrBitHi + 1);
764 
765         // If full page tables are not used, allocate only as much as needed.
766         if (numPgTblsFloor == 0)
767         {
768             numEntries = mmuFmtVirtAddrToEntryIndex(pPgTbl, vaLimit) -
769                          mmuFmtVirtAddrToEntryIndex(pPgTbl, vaBase) + 1;
770             pgTblSize  += numEntries * pPgTbl->entrySize;
771         }
772         else
773         {
774             pgTblSize += numPgTblsCeil * mmuFmtLevelSize(pPgTbl);
775         }
776     }
777     FOR_EACH_INDEX_IN_MASK_END
778 
779     return pgTblSize;
780 }
781 
782 /*!
783  * @brief Used for calculating total memory required for page directories
784           required for translating  a given VA range.
785  *
786  * @param       pGpu
787  * @param       pKernelGmmu
788  * @param[in]   pFmt      Pointer to GMMU format
789  * @param[in]   vaBase    Start VA
790  * @param[in]   vaLimit   End VA
791  *
792  * @returns total size of page directories
793  */
794 NvU64
795 kgmmuGetSizeOfPageDirs_IMPL
796 (
797     OBJGPU         *pGpu,
798     KernelGmmu     *pKernelGmmu,
799     const GMMU_FMT *pFmt,
800     NvU64           vaBase,
801     NvU64           vaLimit,
802     NvU64           pageSizeLockMask
803 )
804 {
805     const MMU_FMT_LEVEL *pLevel = NULL;
806     NvU64                size   = 0;
807     NvU16                i;
808 
809     NV_ASSERT_OR_RETURN(pFmt != NULL, 0);
810 
811     pLevel = pFmt->pRoot;
812 
813     //
814     // Retain only the lowest set bit
815     //
816     // If the lowest set bit corresponds to a leaf page table (4K or 64K), we"ll
817     // calculate memory for all upper level page directories and if the set bit
818     // corresponds to an upper level page directory we"ll factor in all levels
819     // from the root upto that level.
820     //
821     pageSizeLockMask = pageSizeLockMask & -((NvS64)pageSizeLockMask);
822 
823     // Accumulate size for all Page Directories.
824     for (i = 0; i < GMMU_FMT_MAX_LEVELS - 1; i++)
825     {
826         NvU64 vaPerEntry = mmuFmtEntryVirtAddrMask(pLevel) + 1;
827         NvU64 numEntries = NV_DIV_AND_CEIL(vaLimit, vaPerEntry) -
828                            (vaBase / vaPerEntry) + 1;
829         NvU64 levelSize  = numEntries * pLevel->entrySize;
830         levelSize        = NV_ROUNDUP(levelSize, RM_PAGE_SIZE);
831 
832         // Stop accumulating size once we are beyond the specified level.
833         if (mmuFmtLevelPageSize(pLevel) < pageSizeLockMask)
834         {
835             break;
836         }
837 
838         size += levelSize;
839 
840         // If there's one sublevel choose that.
841         if (pLevel->numSubLevels == 1)
842         {
843             pLevel = &(pLevel->subLevels[0]);
844         }
845         else
846         {
847             // Choose the 4K page size sublevel.
848             pLevel = &(pLevel->subLevels[1]);
849         }
850         NV_ASSERT_OR_RETURN(pLevel != NULL, 0);
851 
852         // Stop accumulating size if we've exhausted all Page Dirs.
853         if (pLevel->bPageTable && (pLevel->numSubLevels == 0))
854         {
855             break;
856         }
857     }
858 
859     return size;
860 }
861 
862 /*
863  * Fill comptag field in PTE.
864  */
865 void kgmmuFieldSetKindCompTags_IMPL
866 (
867     KernelGmmu          *pGmmu,
868     const GMMU_FMT      *pFmt,
869     const MMU_FMT_LEVEL *pLevel,
870     const COMPR_INFO    *pCompr,
871     NvU64                physAddr,
872     NvU64                surfOffset,
873     NvU32                pteIndex,
874     NvU8                *pEntries
875 )
876 {
877     OBJGPU                            *pGpu                = ENG_GET_GPU(pGmmu);
878     GMMU_COMPR_INFO                    comprInfo           = {0};
879 
880     comprInfo.compressedKind        = pCompr->kind;
881     comprInfo.compPageShift         = pCompr->compPageShift;
882 
883     if (memmgrIsKind_HAL(GPU_GET_MEMORY_MANAGER(pGpu), FB_IS_KIND_COMPRESSIBLE, pCompr->kind))
884     {
885         const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
886             kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
887 
888         if (pCompr->bPhysBasedComptags)
889         {
890             NvBool bCallingContextPlugin;
891 
892             NV_ASSERT(pMemorySystemConfig->bOneToOneComptagLineAllocation || pMemorySystemConfig->bUseRawModeComptaglineAllocation);
893 
894             NV_ASSERT_OR_RETURN_VOID(vgpuIsCallingContextPlugin(pGpu, &bCallingContextPlugin) == NV_OK);
895             if (IS_VIRTUAL_WITH_SRIOV(pGpu) || bCallingContextPlugin ||
896                 pMemorySystemConfig->bUseRawModeComptaglineAllocation)
897             {
898                 // In raw mode or when SR-IOV is enabled, HW handles compression tags
899                 comprInfo.compTagLineMin = 1;
900             }
901             else
902             {
903                 comprInfo.compTagLineMin = memmgrDetermineComptag_HAL(pGpu, GPU_GET_MEMORY_MANAGER(pGpu), physAddr);
904             }
905 
906             comprInfo.compPageIndexLo = surfOffset >> pCompr->compPageShift;
907             comprInfo.compPageIndexHi = (surfOffset + mmuFmtLevelPageSize(pLevel) - 1) >> pCompr->compPageShift;
908             comprInfo.compTagLineMultiplier = 1;
909         }
910         else
911         {
912             comprInfo.compPageIndexLo       = pCompr->compPageIndexLo;
913             comprInfo.compPageIndexHi       = pCompr->compPageIndexHi;
914             comprInfo.compTagLineMin        = pCompr->compTagLineMin;
915             comprInfo.compTagLineMultiplier = pCompr->compTagLineMultiplier;
916         }
917     }
918 
919     gmmuFmtInitPteCompTags(pFmt, pLevel, &comprInfo, surfOffset, pteIndex, 1, pEntries);
920 }
921 
922 NV_STATUS
923 kgmmuFaultBufferGetAddressSpace_IMPL
924 (
925     OBJGPU               *pGpu,
926     KernelGmmu           *pKernelGmmu,
927     NvU32                 index,
928     NvU32                *pFaultBufferAddrSpace,
929     NvU32                *pFaultBufferAttr
930 )
931 {
932     NvU32 faultBufferAddrSpace = ADDR_UNKNOWN;
933     NvU32 faultBufferAttr = 0;
934     NvBool bAllocInVidmem = NV_FALSE;
935 
936     bAllocInVidmem = gpuIsCCFeatureEnabled(pGpu);
937 
938     NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
939 
940     if (index == NON_REPLAYABLE_FAULT_BUFFER)
941     {
942         faultBufferAddrSpace = bAllocInVidmem ? ADDR_FBMEM : ADDR_SYSMEM;
943         faultBufferAttr      = bAllocInVidmem ? NV_MEMORY_UNCACHED : NV_MEMORY_CACHED;
944         memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC_3, _UVM_FAULT_BUFFER_NONREPLAYABLE, pGpu->instLocOverrides3),
945                                "UVM non-replayable fault", &faultBufferAddrSpace, &faultBufferAttr);
946     }
947     else if (index == REPLAYABLE_FAULT_BUFFER)
948     {
949         faultBufferAddrSpace = bAllocInVidmem ? ADDR_FBMEM : ADDR_SYSMEM;
950         faultBufferAttr      = bAllocInVidmem ? NV_MEMORY_UNCACHED : NV_MEMORY_CACHED;
951         memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC_4, _UVM_FAULT_BUFFER_REPLAYABLE, pGpu->instLocOverrides4),
952                                "UVM replayable fault", &faultBufferAddrSpace, &faultBufferAttr);
953     }
954     //
955     // Whenever Hopper CC is enabled, HW requires both replayable and non-replayable
956     // fault buffers to be in CPR vidmem. It would be illegal to allocate the buffers
957     // in any other aperture
958     //
959     if (bAllocInVidmem && (faultBufferAddrSpace == ADDR_SYSMEM))
960     {
961         NV_PRINTF(LEVEL_ERROR, "Fault buffers must be in CPR vidmem when HCC is enabled\n");
962         NV_ASSERT(0);
963         return NV_ERR_INVALID_ARGUMENT;
964     }
965 
966     if (pFaultBufferAddrSpace != NULL)
967     {
968         *pFaultBufferAddrSpace = faultBufferAddrSpace;
969     }
970 
971     if (pFaultBufferAttr != NULL)
972     {
973         *pFaultBufferAttr = faultBufferAttr;
974     }
975 
976     return NV_OK;
977 }
978 
979 NV_STATUS
980 kgmmuFaultBufferCreateMemDesc_IMPL
981 (
982     OBJGPU               *pGpu,
983     KernelGmmu           *pKernelGmmu,
984     NvU32                 index,
985     NvU32                 faultBufferSize,
986     NvU64                 memDescFlags,
987     MEMORY_DESCRIPTOR   **ppMemDesc
988 )
989 {
990     NV_STATUS status;
991     MEMORY_DESCRIPTOR *pMemDesc = NULL;
992     NvU32 faultBufferAddrSpace = ADDR_UNKNOWN;
993     NvU32 faultBufferAttr = 0;
994     NvBool isContiguous = NV_FALSE;
995 
996     NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
997 
998     status = kgmmuFaultBufferGetAddressSpace(pGpu, pKernelGmmu, index,
999                                              &faultBufferAddrSpace, &faultBufferAttr);
1000     if (status != NV_OK)
1001     {
1002         return status;
1003     }
1004 
1005     if ((IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu))
1006         || gpuIsCCFeatureEnabled(pGpu)
1007        )
1008     {
1009         // Allocate contiguous fault buffers for SR-IOV Heavy
1010         // Fault buffers get allocated in CPR vidmem when Hopper CC is enabled
1011         // We're almost assured to get contiguous allocations in vidmem
1012         isContiguous = NV_TRUE;
1013     }
1014 
1015     status = memdescCreate(&pMemDesc, pGpu,
1016                            RM_PAGE_ALIGN_UP(faultBufferSize), 0, isContiguous,
1017                            faultBufferAddrSpace, faultBufferAttr,
1018                            (memDescFlags | MEMDESC_FLAGS_LOST_ON_SUSPEND));
1019     if (status != NV_OK)
1020     {
1021         return status;
1022     }
1023 
1024     //
1025     // GPU doesn't read faultbuffer memory, so if faultBuffers are in sysmem, ensure that GpuCacheAttr
1026     // is set to UNCACHED as having a vol bit set in PTEs will ensure HUB uses L2Bypass mode and it will
1027     // save extra cycles to cache in L2 while MMU will write fault packets.
1028     //
1029     if (faultBufferAddrSpace == ADDR_SYSMEM &&
1030         pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_SYSMEM_FAULT_BUFFER_GPU_UNCACHED))
1031     {
1032         memdescSetGpuCacheAttrib(pMemDesc, NV_MEMORY_UNCACHED);
1033     }
1034 
1035     memdescSetPageSize(pMemDesc, AT_GPU, RM_PAGE_SIZE);
1036 
1037     *ppMemDesc = pMemDesc;
1038 
1039     return NV_OK;
1040 }
1041 
1042 NV_STATUS
1043 kgmmuFaultBufferUnregister_IMPL
1044 (
1045     OBJGPU               *pGpu,
1046     KernelGmmu           *pKernelGmmu,
1047     NvU32                 index
1048 )
1049 {
1050     struct HW_FAULT_BUFFER *pFaultBuffer;
1051     MEMORY_DESCRIPTOR      *pMemDesc;
1052 
1053     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[index];
1054     pMemDesc = pFaultBuffer->pFaultBufferMemDesc;
1055 
1056     pFaultBuffer->faultBufferSize = 0;
1057     pFaultBuffer->pFaultBufferMemDesc = NULL;
1058 
1059     memdescDestroy(pMemDesc);
1060 
1061     return NV_OK;
1062 }
1063 
1064 NV_STATUS
1065 kgmmuFaultBufferAlloc_IMPL
1066 (
1067     OBJGPU         *pGpu,
1068     KernelGmmu     *pKernelGmmu,
1069     NvU32           index,
1070     NvU32           faultBufferSize
1071 )
1072 {
1073     NV_STATUS status;
1074     MEMORY_DESCRIPTOR *pMemDesc = NULL;
1075     struct HW_FAULT_BUFFER *pFaultBuffer;
1076     const char *name = (index == REPLAYABLE_FAULT_BUFFER ? NV_RM_SURF_NAME_REPLAYABLE_FAULT_BUFFER : NV_RM_SURF_NAME_NONREPLAYABLE_FAULT_BUFFER);
1077 
1078     NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
1079 
1080     if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
1081         return NV_OK;
1082 
1083     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[index];
1084 
1085     status = kgmmuFaultBufferCreateMemDesc(pGpu, pKernelGmmu, index, faultBufferSize,
1086                                            MEMDESC_FLAGS_NONE, &pMemDesc);
1087     if (status != NV_OK)
1088     {
1089         return status;
1090     }
1091 
1092     status = memdescAlloc(pMemDesc);
1093     if (status != NV_OK)
1094     {
1095         memdescDestroy(pMemDesc);
1096         return status;
1097     }
1098 
1099     memdescSetName(pGpu, pMemDesc, name, NULL);
1100 
1101     pFaultBuffer->faultBufferSize = faultBufferSize;
1102     pFaultBuffer->pFaultBufferMemDesc = pMemDesc;
1103 
1104     return status;
1105 }
1106 
1107 NV_STATUS
1108 kgmmuFaultBufferFree_IMPL
1109 (
1110     OBJGPU               *pGpu,
1111     KernelGmmu           *pKernelGmmu,
1112     NvU32                 index
1113 )
1114 {
1115     struct HW_FAULT_BUFFER *pFaultBuffer;
1116 
1117     NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
1118 
1119     if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
1120         return NV_OK;
1121 
1122     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[index];
1123 
1124     memdescFree(pFaultBuffer->pFaultBufferMemDesc);
1125 
1126     kgmmuFaultBufferUnregister(pGpu, pKernelGmmu, index);
1127 
1128     return NV_OK;
1129 }
1130 
1131 NV_STATUS
1132 kgmmuFaultBufferReplayableAllocate_IMPL
1133 (
1134     OBJGPU               *pGpu,
1135     KernelGmmu           *pKernelGmmu,
1136     NvHandle              hClient,
1137     NvHandle              hObject
1138 )
1139 {
1140     NV_STATUS               status;
1141     struct HW_FAULT_BUFFER *pFaultBuffer;
1142     NvU32                   faultBufferSize;
1143     NvU32                   numBufferPages;
1144     const NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo = kgmmuGetStaticInfo(pGpu, pKernelGmmu);
1145 
1146     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
1147         pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
1148     {
1149         return NV_OK;
1150     }
1151 
1152     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[REPLAYABLE_FAULT_BUFFER];
1153     if (pFaultBuffer->pFaultBufferMemDesc != NULL)
1154     {
1155         return NV_ERR_NOT_SUPPORTED;
1156     }
1157 
1158     faultBufferSize = pStaticInfo->replayableFaultBufferSize;
1159 
1160     status = kgmmuFaultBufferAlloc(pGpu, pKernelGmmu,
1161                                    REPLAYABLE_FAULT_BUFFER,
1162                                    faultBufferSize);
1163     if (status != NV_OK)
1164     {
1165         return status;
1166     }
1167 
1168     if (IS_GSP_CLIENT(pGpu))
1169     {
1170         RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
1171         NV2080_CTRL_INTERNAL_GMMU_REGISTER_FAULT_BUFFER_PARAMS *pParams;
1172 
1173         pParams = portMemAllocNonPaged(sizeof(*pParams));
1174         if (pParams == NULL)
1175         {
1176             kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1177             return NV_ERR_NO_MEMORY;
1178         }
1179         portMemSet(pParams, 0, sizeof(*pParams));
1180 
1181         numBufferPages = RM_PAGE_ALIGN_UP(faultBufferSize) / RM_PAGE_SIZE;
1182         if (numBufferPages > NV_ARRAY_ELEMENTS(pParams->faultBufferPteArray))
1183         {
1184             portMemFree(pParams);
1185             kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1186             return NV_ERR_BUFFER_TOO_SMALL;
1187         }
1188 
1189         memdescGetPhysAddrs(pFaultBuffer->pFaultBufferMemDesc,
1190                             AT_GPU, 0, RM_PAGE_SIZE,
1191                             numBufferPages, pParams->faultBufferPteArray);
1192 
1193         pParams->hClient            = hClient;
1194         pParams->hObject            = hObject;
1195         pParams->faultBufferSize    = faultBufferSize;
1196 
1197         status = pRmApi->Control(pRmApi,
1198                                  pGpu->hInternalClient,
1199                                  pGpu->hInternalSubdevice,
1200                                  NV2080_CTRL_CMD_INTERNAL_GMMU_REGISTER_FAULT_BUFFER,
1201                                  pParams, sizeof(*pParams));
1202 
1203         portMemFree(pParams);
1204         if (status != NV_OK)
1205         {
1206             kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1207             return status;
1208         }
1209     }
1210 
1211     pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferClient = hClient;
1212     pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferObject = hObject;
1213     pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].faultBufferGenerationCounter = 0;
1214 
1215     return NV_OK;
1216 }
1217 
1218 NV_STATUS
1219 kgmmuFaultBufferReplayableDestroy_IMPL
1220 (
1221     OBJGPU      *pGpu,
1222     KernelGmmu  *pKernelGmmu
1223 )
1224 {
1225     NV_STATUS               status = NV_OK;
1226     struct HW_FAULT_BUFFER *pFaultBuffer;
1227 
1228     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
1229         pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
1230     {
1231         return NV_OK;
1232     }
1233 
1234     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[REPLAYABLE_FAULT_BUFFER];
1235     if (pFaultBuffer->pFaultBufferMemDesc == NULL)
1236     {
1237         return NV_OK;
1238     }
1239 
1240     pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferClient = 0;
1241     pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferObject = 0;
1242 
1243     if (IS_GSP_CLIENT(pGpu))
1244     {
1245         RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
1246         status = pRmApi->Control(pRmApi,
1247                                  pGpu->hInternalClient,
1248                                  pGpu->hInternalSubdevice,
1249                                  NV2080_CTRL_CMD_INTERNAL_GMMU_UNREGISTER_FAULT_BUFFER,
1250                                  NULL, 0);
1251         if (status != NV_OK)
1252         {
1253             NV_PRINTF(LEVEL_ERROR,
1254                       "Unregistering Replayable Fault buffer failed (status=0x%08x), proceeding...\n",
1255                       status);
1256         }
1257     }
1258 
1259     if (RMCFG_FEATURE_PLATFORM_GSP)
1260     {
1261         status = kgmmuFaultBufferUnregister(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1262     }
1263     else
1264     {
1265         status = kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1266     }
1267 
1268     if (status != NV_OK)
1269     {
1270         NV_PRINTF(LEVEL_ERROR,
1271                   "Destroying Replayable Fault buffer failed (status=0x%08x), proceeding...\n",
1272                   status);
1273     }
1274 
1275     return NV_OK;
1276 }
1277 
1278 /*!
1279  * @brief: Encodes peer addresses to support NVSwitch systems.
1280  *
1281  * This function prepends the fabricBaseAddress to a physical address in order
1282  * to generate a unique peer address from the global fabric address space.
1283  *
1284  * @param[in] pAddresses        : Array of physical addresses to be encoded.
1285  * @param[in] fabricBaseAddress : Unique fabric base address.
1286  * @param[in] count             : Count if physical addresses.
1287  */
1288 static void
1289 _kgmmuEncodePeerAddrs
1290 (
1291     NvU64              *pAddresses,
1292     NvU64               fabricBaseAddress,
1293     NvU64               count
1294 )
1295 {
1296     NvU64 i;
1297 
1298     //
1299     // If there is no fabric address, it should be a NOP. Note, this acts as an
1300     // early complete path for other PEER addressing.
1301     //
1302     if (fabricBaseAddress == NVLINK_INVALID_FABRIC_ADDR)
1303     {
1304         return;
1305     }
1306 
1307     for (i = 0; i < count; i++)
1308     {
1309         pAddresses[i] = fabricBaseAddress + pAddresses[i];
1310     }
1311 }
1312 
1313 void
1314 kgmmuEncodePhysAddrs_IMPL
1315 (
1316     KernelGmmu         *pKernelGmmu,
1317     const GMMU_APERTURE aperture,
1318     NvU64              *pAddresses,
1319     NvU64               fabricBaseAddress,
1320     NvU64               count
1321 )
1322 {
1323     NV_ASSERT(aperture != GMMU_APERTURE_INVALID);
1324 
1325     if (aperture == GMMU_APERTURE_SYS_COH ||
1326         aperture == GMMU_APERTURE_SYS_NONCOH)
1327     {
1328         kgmmuEncodeSysmemAddrs_HAL(pKernelGmmu, pAddresses, count);
1329     }
1330     else if (aperture == GMMU_APERTURE_PEER)
1331     {
1332         _kgmmuEncodePeerAddrs(pAddresses, fabricBaseAddress, count);
1333     }
1334     else
1335     {
1336         return;
1337     }
1338 }
1339 
1340 NvU64
1341 kgmmuEncodePhysAddr_IMPL
1342 (
1343     KernelGmmu         *pKernelGmmu,
1344     const GMMU_APERTURE aperture,
1345     NvU64               physAddr,
1346     NvU64               fabricBaseAddress
1347 )
1348 {
1349     kgmmuEncodePhysAddrs(pKernelGmmu, aperture, &physAddr, fabricBaseAddress, 1);
1350     return physAddr;
1351 }
1352 
1353 static void
1354 _kgmmuClientShadowBufferQueueCopyData
1355 (
1356     NvLength      msgSize,
1357     NvLength      opIdx,
1358     QueueContext *pCtx,
1359     void         *pData,
1360     NvLength      count,
1361     NvBool        bCopyIn
1362 )
1363 {
1364     NvLength size;
1365     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer = pCtx->pData;
1366     NvU8 *pQueueData, *pClientData = pData;
1367     void *pDst, *pSrc;
1368 
1369     if (count == 0)
1370         return;
1371 
1372     size = count * msgSize;
1373     pQueueData = KERNEL_POINTER_FROM_NvP64(NvU8 *, pClientShadowFaultBuffer->pBufferAddress);
1374     pQueueData = pQueueData + (opIdx * msgSize);
1375 
1376     pDst = bCopyIn ? pQueueData : pClientData;
1377     pSrc = bCopyIn ? pClientData : pQueueData;
1378     portMemCopy(pDst, size, pSrc, size);
1379 }
1380 
1381 static NV_STATUS
1382 _kgmmuClientShadowFaultBufferQueueAllocate
1383 (
1384     OBJGPU           *pGpu,
1385     KernelGmmu       *pKernelGmmu,
1386     FAULT_BUFFER_TYPE index
1387 )
1388 {
1389     NV_STATUS status;
1390     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1391     MEMORY_DESCRIPTOR *pQueueMemDesc;
1392     NvU64 flags = MEMDESC_FLAGS_NONE;
1393 
1394     //
1395     // On systems with SEV enabled, the client shadow buffers should be allocated
1396     // in unprotected sysmem as GSP will be writing the fault packets to these
1397     // buffers. Since GSP will be encrypting the fault packets, we don't risk
1398     // leaking any information
1399     //
1400     flags |= MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY;
1401 
1402     //
1403     // Shadow fault buffers are not implemented using circular queues when
1404     // Hopper CC is enabled
1405     //
1406     if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu))
1407         return NV_OK;
1408 
1409     pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index];
1410 
1411     status = memdescCreate(&pQueueMemDesc, pGpu,
1412                            sizeof(GMMU_SHADOW_FAULT_BUF), RM_PAGE_SIZE,
1413                            NV_TRUE, ADDR_SYSMEM, NV_MEMORY_CACHED,
1414                            flags);
1415     if (status != NV_OK)
1416     {
1417         return status;
1418     }
1419 
1420     status = memdescAlloc(pQueueMemDesc);
1421     if (status != NV_OK)
1422     {
1423         memdescDestroy(pQueueMemDesc);
1424         return status;
1425     }
1426 
1427     status = memdescMap(pQueueMemDesc, 0,
1428                         memdescGetSize(pQueueMemDesc),
1429                         NV_TRUE, NV_PROTECT_READ_WRITE,
1430                         &pClientShadowFaultBuffer->pQueueAddress,
1431                         &pClientShadowFaultBuffer->pQueuePriv);
1432     if (status != NV_OK)
1433     {
1434         memdescFree(pQueueMemDesc);
1435         memdescDestroy(pQueueMemDesc);
1436         return status;
1437     }
1438 
1439     pClientShadowFaultBuffer->queueContext.pCopyData = _kgmmuClientShadowBufferQueueCopyData;
1440     pClientShadowFaultBuffer->queueContext.pData = pClientShadowFaultBuffer;
1441     pClientShadowFaultBuffer->pQueueMemDesc = pQueueMemDesc;
1442 
1443     return NV_OK;
1444 }
1445 
1446 void
1447 kgmmuClientShadowFaultBufferQueueDestroy_IMPL
1448 (
1449     OBJGPU           *pGpu,
1450     KernelGmmu       *pKernelGmmu,
1451     NvBool            bFreeQueue,
1452     FAULT_BUFFER_TYPE index
1453 )
1454 {
1455     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1456     MEMORY_DESCRIPTOR *pQueueMemDesc;
1457 
1458     //
1459     // Shadow fault buffers are not implemented using circular queues when
1460     // Hopper CC is enabled. So, there is nothing to free here
1461     //
1462     if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu))
1463         return;
1464 
1465     pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index];
1466 
1467     pQueueMemDesc = pClientShadowFaultBuffer->pQueueMemDesc;
1468 
1469     pClientShadowFaultBuffer->pQueueMemDesc = NULL;
1470     pClientShadowFaultBuffer->pQueueAddress = NvP64_NULL;
1471     pClientShadowFaultBuffer->pQueuePriv = NvP64_NULL;
1472 
1473     if (bFreeQueue)
1474     {
1475         memdescFree(pQueueMemDesc);
1476     }
1477     memdescDestroy(pQueueMemDesc);
1478 }
1479 
1480 static NV_STATUS
1481 _kgmmuClientShadowFaultBufferPagesAllocate
1482 (
1483     OBJGPU           *pGpu,
1484     KernelGmmu       *pKernelGmmu,
1485     NvU32             shadowFaultBufferSize,
1486     NvU32             shadowFaultBufferMetadataSize,
1487     FAULT_BUFFER_TYPE index
1488 )
1489 {
1490     NV_STATUS status;
1491     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1492     MEMORY_DESCRIPTOR *pMemDesc;
1493     NvU64 flags = MEMDESC_FLAGS_NONE;
1494     NvU32 shadowFaultBufferSizeTotal;
1495 
1496     //
1497     // On systems with SEV enabled, the client shadow buffers should be allocated
1498     // in unprotected sysmem as GSP will be writing the fault packets to these
1499     // buffers. Since GSP will be encrypting the fault packets, we don't risk
1500     // leaking any information
1501     //
1502     flags |= MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY;
1503 
1504     pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index];
1505 
1506     shadowFaultBufferSizeTotal = RM_PAGE_ALIGN_UP(shadowFaultBufferSize) + RM_PAGE_ALIGN_UP(shadowFaultBufferMetadataSize);
1507 
1508     status = memdescCreate(&pMemDesc, pGpu,
1509                            shadowFaultBufferSizeTotal, RM_PAGE_SIZE,
1510                            NV_FALSE, ADDR_SYSMEM, NV_MEMORY_CACHED,
1511                            flags);
1512     if (status != NV_OK)
1513     {
1514         return status;
1515     }
1516 
1517     status = memdescAlloc(pMemDesc);
1518     if (status != NV_OK)
1519     {
1520         memdescDestroy(pMemDesc);
1521         return status;
1522     }
1523 
1524     status = memdescMap(pMemDesc, 0,
1525                         memdescGetSize(pMemDesc),
1526                         NV_TRUE, NV_PROTECT_READ_WRITE,
1527                         &pClientShadowFaultBuffer->pBufferAddress,
1528                         &pClientShadowFaultBuffer->pBufferPriv);
1529     if (status != NV_OK)
1530     {
1531         memdescFree(pMemDesc);
1532         memdescDestroy(pMemDesc);
1533         return status;
1534     }
1535 
1536     pClientShadowFaultBuffer->pFaultBufferMetadataAddress =
1537                              ((NvP64)(((NvU64) pClientShadowFaultBuffer->pBufferAddress) +
1538                               RM_PAGE_ALIGN_UP(shadowFaultBufferSize)));
1539     pClientShadowFaultBuffer->pBufferMemDesc = pMemDesc;
1540 
1541     return NV_OK;
1542 }
1543 
1544 void
1545 kgmmuClientShadowFaultBufferPagesDestroy_IMPL
1546 (
1547     OBJGPU           *pGpu,
1548     KernelGmmu       *pKernelGmmu,
1549     NvBool            bFreePages,
1550     FAULT_BUFFER_TYPE index
1551 )
1552 {
1553     MEMORY_DESCRIPTOR *pMemDesc;
1554     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1555     GMMU_FAULT_BUFFER_PAGE *pBufferPage;
1556     NvU32 i;
1557 
1558     pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index];
1559     pMemDesc = pClientShadowFaultBuffer->pBufferMemDesc;
1560 
1561     if (bFreePages)
1562     {
1563         memdescUnmap(pMemDesc,
1564                      NV_TRUE, osGetCurrentProcess(),
1565                      pClientShadowFaultBuffer->pBufferAddress,
1566                      pClientShadowFaultBuffer->pBufferPriv);
1567 
1568         memdescFree(pMemDesc);
1569     }
1570     else
1571     {
1572         for (i = 0; i < pClientShadowFaultBuffer->numBufferPages; i++)
1573         {
1574             pBufferPage = &pClientShadowFaultBuffer->pBufferPages[i];
1575 
1576             memdescUnmap(pMemDesc, NV_TRUE, osGetCurrentProcess(),
1577                          pBufferPage->pAddress, pBufferPage->pPriv);
1578         }
1579         portMemFree(pClientShadowFaultBuffer->pBufferPages);
1580     }
1581     memdescDestroy(pMemDesc);
1582 }
1583 
1584 NV_STATUS
1585 kgmmuClientShadowFaultBufferRegister_IMPL
1586 (
1587     OBJGPU           *pGpu,
1588     KernelGmmu       *pKernelGmmu,
1589     FAULT_BUFFER_TYPE index
1590 )
1591 {
1592     NV_STATUS status = NV_OK;
1593     struct GMMU_FAULT_BUFFER *pFaultBuffer;
1594     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1595     GMMU_SHADOW_FAULT_BUF *pQueue;
1596     MEMORY_DESCRIPTOR *pBufferMemDesc;
1597     RmPhysAddr shadowFaultBufferQueuePhysAddr;
1598     NvU32 queueCapacity, numBufferPages;
1599     NvU32 faultBufferSize;
1600     NvU32 shadowFaultBufferMetadataSize;
1601     const NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo = kgmmuGetStaticInfo(pGpu, pKernelGmmu);
1602     NvBool bQueueAllocated = NV_FALSE;
1603 
1604     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF];
1605     pClientShadowFaultBuffer = &pFaultBuffer->clientShadowFaultBuffer[index];
1606 
1607     if (index == NON_REPLAYABLE_FAULT_BUFFER)
1608     {
1609         faultBufferSize = pStaticInfo->nonReplayableFaultBufferSize;
1610         shadowFaultBufferMetadataSize = pStaticInfo->nonReplayableShadowFaultBufferMetadataSize;
1611     }
1612     else if (index == REPLAYABLE_FAULT_BUFFER)
1613     {
1614         faultBufferSize = pStaticInfo->replayableFaultBufferSize;
1615         shadowFaultBufferMetadataSize = pStaticInfo->replayableShadowFaultBufferMetadataSize;
1616     }
1617     else
1618     {
1619         NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
1620     }
1621 
1622     //
1623     // We don't use circular queues for shadow fault buffers when Hopper
1624     // CC is enabled
1625     //
1626     if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
1627     {
1628         pQueue = KERNEL_POINTER_FROM_NvP64(GMMU_SHADOW_FAULT_BUF *,
1629                                            pClientShadowFaultBuffer->pQueueAddress);
1630         queueCapacity = faultBufferSize / NVC369_BUF_SIZE;
1631 
1632         status = queueInitNonManaged(pQueue, queueCapacity);
1633         if (status != NV_OK)
1634         {
1635             return status;
1636         }
1637         bQueueAllocated = NV_TRUE;
1638     }
1639 
1640     if (!IS_GSP_CLIENT(pGpu))
1641     {
1642         portSyncSpinlockAcquire(pFaultBuffer->pShadowFaultBufLock);
1643 
1644         if (pFaultBuffer->pClientShadowFaultBuffer[index] == NULL)
1645         {
1646             pFaultBuffer->pClientShadowFaultBuffer[index] = pClientShadowFaultBuffer;
1647         }
1648         else
1649         {
1650             status = NV_ERR_NOT_SUPPORTED;
1651         }
1652 
1653         portSyncSpinlockRelease(pFaultBuffer->pShadowFaultBufLock);
1654 
1655         if (status != NV_OK)
1656         {
1657             if (bQueueAllocated)
1658                 queueDestroy(pQueue);
1659             return status;
1660         }
1661     }
1662     else
1663     {
1664         RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
1665         NV2080_CTRL_INTERNAL_GMMU_REGISTER_CLIENT_SHADOW_FAULT_BUFFER_PARAMS *pParams;
1666 
1667         pParams = portMemAllocNonPaged(sizeof(*pParams));
1668         if (pParams == NULL)
1669         {
1670             if (bQueueAllocated)
1671                 queueDestroy(pQueue);
1672             return NV_ERR_NO_MEMORY;
1673         }
1674         portMemSet(pParams, 0, sizeof(*pParams));
1675 
1676         pBufferMemDesc = pClientShadowFaultBuffer->pBufferMemDesc;
1677         numBufferPages = memdescGetSize(pBufferMemDesc) >> RM_PAGE_SHIFT;
1678         if (numBufferPages > NV_ARRAY_ELEMENTS(pParams->shadowFaultBufferPteArray))
1679         {
1680             portMemFree(pParams);
1681             if (bQueueAllocated)
1682                 queueDestroy(pQueue);
1683             return NV_ERR_BUFFER_TOO_SMALL;
1684         }
1685 
1686         memdescGetPhysAddrs(pBufferMemDesc,
1687                             AT_GPU,
1688                             0, RM_PAGE_SIZE,
1689                             numBufferPages, pParams->shadowFaultBufferPteArray);
1690 
1691         if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
1692         {
1693             shadowFaultBufferQueuePhysAddr = memdescGetPhysAddr(pClientShadowFaultBuffer->pQueueMemDesc,
1694                                                                 AT_GPU, 0);
1695             pParams->shadowFaultBufferQueuePhysAddr = shadowFaultBufferQueuePhysAddr;
1696         }
1697         pParams->shadowFaultBufferSize         = faultBufferSize;
1698         pParams->shadowFaultBufferMetadataSize = shadowFaultBufferMetadataSize;
1699         pParams->shadowFaultBufferType         = (index == NON_REPLAYABLE_FAULT_BUFFER) ?
1700                                                  NV2080_CTRL_FAULT_BUFFER_NON_REPLAYABLE :
1701                                                  NV2080_CTRL_FAULT_BUFFER_REPLAYABLE;
1702 
1703         if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu) && index == REPLAYABLE_FAULT_BUFFER)
1704         {
1705             pParams->faultBufferSharedMemoryPhysAddr = memdescGetPhysAddr(pClientShadowFaultBuffer->pFaultBufferSharedMemDesc,
1706                                                                           AT_GPU, 0);
1707         }
1708 
1709         status = pRmApi->Control(pRmApi,
1710                                  pGpu->hInternalClient,
1711                                  pGpu->hInternalSubdevice,
1712                                  NV2080_CTRL_CMD_INTERNAL_GMMU_REGISTER_CLIENT_SHADOW_FAULT_BUFFER,
1713                                  pParams, sizeof(*pParams));
1714 
1715         portMemFree(pParams);
1716         if (status != NV_OK)
1717         {
1718             if (bQueueAllocated)
1719                 queueDestroy(pQueue);
1720             return status;
1721         }
1722 
1723         pFaultBuffer->pClientShadowFaultBuffer[index] = pClientShadowFaultBuffer;
1724     }
1725 
1726     return NV_OK;
1727 }
1728 
1729 void
1730 kgmmuClientShadowFaultBufferUnregister_IMPL
1731 (
1732     OBJGPU           *pGpu,
1733     KernelGmmu       *pKernelGmmu,
1734     FAULT_BUFFER_TYPE index
1735 )
1736 {
1737     NV_STATUS status = NV_OK;
1738     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1739     GMMU_SHADOW_FAULT_BUF *pQueue;
1740     struct GMMU_FAULT_BUFFER *pFaultBuffer;
1741 
1742     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF];
1743 
1744     if (!IS_GSP_CLIENT(pGpu))
1745     {
1746         portSyncSpinlockAcquire(pFaultBuffer->pShadowFaultBufLock);
1747 
1748         pFaultBuffer->pClientShadowFaultBuffer[index] = NULL;
1749 
1750         portSyncSpinlockRelease(pFaultBuffer->pShadowFaultBufLock);
1751     }
1752     else
1753     {
1754         RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
1755         NV2080_CTRL_INTERNAL_GMMU_UNREGISTER_CLIENT_SHADOW_FAULT_BUFFER_PARAMS params;
1756 
1757         portMemSet(&params, 0, sizeof(params));
1758 
1759         params.shadowFaultBufferType = (index == NON_REPLAYABLE_FAULT_BUFFER) ?
1760                                        NV2080_CTRL_FAULT_BUFFER_NON_REPLAYABLE :
1761                                        NV2080_CTRL_FAULT_BUFFER_REPLAYABLE;
1762         status = pRmApi->Control(pRmApi,
1763                                  pGpu->hInternalClient,
1764                                  pGpu->hInternalSubdevice,
1765                                  NV2080_CTRL_CMD_INTERNAL_GMMU_UNREGISTER_CLIENT_SHADOW_FAULT_BUFFER,
1766                                  &params, sizeof(params));
1767         if (status != NV_OK)
1768         {
1769             NV_PRINTF(LEVEL_ERROR,
1770                       "Unregistering %s fault buffer failed (status=0x%08x), proceeding...\n",
1771                       (index == NON_REPLAYABLE_FAULT_BUFFER) ? "non-replayable" : "replayable",
1772                       status);
1773         }
1774 
1775         pFaultBuffer->pClientShadowFaultBuffer[index] = NULL;
1776     }
1777 
1778     if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
1779     {
1780         pClientShadowFaultBuffer = &pFaultBuffer->clientShadowFaultBuffer[index];
1781         pQueue = KERNEL_POINTER_FROM_NvP64(GMMU_SHADOW_FAULT_BUF *,
1782                                            pClientShadowFaultBuffer->pQueueAddress);
1783         queueDestroy(pQueue);
1784     }
1785 }
1786 
1787 /*!
1788  * @brief Creates shadow fault buffer for client handling of replayable/non-replayable
1789  *        faults in the CPU-RM, and registers it in the GSP-RM.
1790  *
1791  * @param[in] pGpu
1792  * @param[in] pKernelGmmu
1793  * @param[in] index         Replayable or non-replayable fault buffer
1794  *
1795  * @returns
1796  */
1797 NV_STATUS
1798 kgmmuClientShadowFaultBufferAllocate_IMPL
1799 (
1800     OBJGPU            *pGpu,
1801     KernelGmmu        *pKernelGmmu,
1802     FAULT_BUFFER_TYPE  index
1803 )
1804 {
1805     NV_STATUS   status;
1806     const NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo = kgmmuGetStaticInfo(pGpu, pKernelGmmu);
1807     NvU32 faultBufferSize;
1808     NvU32 shadowFaultBufferMetadataSize;
1809 
1810     ct_assert((RM_PAGE_SIZE % sizeof(struct GMMU_FAULT_PACKET)) == 0);
1811 
1812     NV_ASSERT_OR_RETURN(!pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED), NV_ERR_INVALID_STATE);
1813 
1814     NV_ASSERT_OR_RETURN(pStaticInfo->nonReplayableFaultBufferSize != 0, NV_ERR_INVALID_STATE);
1815 
1816     if (index == NON_REPLAYABLE_FAULT_BUFFER)
1817     {
1818         faultBufferSize = pStaticInfo->nonReplayableFaultBufferSize;
1819         shadowFaultBufferMetadataSize = pStaticInfo->nonReplayableShadowFaultBufferMetadataSize;
1820     }
1821     else if (index == REPLAYABLE_FAULT_BUFFER)
1822     {
1823         faultBufferSize = pStaticInfo->replayableFaultBufferSize;
1824         shadowFaultBufferMetadataSize = pStaticInfo->replayableShadowFaultBufferMetadataSize;
1825     }
1826     else
1827     {
1828         NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
1829     }
1830 
1831     status = _kgmmuClientShadowFaultBufferQueueAllocate(pGpu, pKernelGmmu, index);
1832     if (status != NV_OK)
1833     {
1834         return status;
1835     }
1836 
1837     status = _kgmmuClientShadowFaultBufferPagesAllocate(pGpu, pKernelGmmu,
1838                                                         faultBufferSize,
1839                                                         shadowFaultBufferMetadataSize,
1840                                                         index);
1841     if (status != NV_OK)
1842     {
1843         goto destroy_queue_and_exit;
1844     }
1845 
1846     status = kgmmuFaultBufferAllocSharedMemory_HAL(pGpu, pKernelGmmu, index);
1847     if (status != NV_OK)
1848     {
1849         goto destroy_pages_and_exit;
1850     }
1851 
1852     status = kgmmuClientShadowFaultBufferRegister(pGpu, pKernelGmmu,
1853                                                   index);
1854     if (status != NV_OK)
1855     {
1856         goto destroy_shared_memory_and_exit;
1857     }
1858 
1859     return NV_OK;
1860 
1861 destroy_shared_memory_and_exit:
1862     kgmmuFaultBufferFreeSharedMemory_HAL(pGpu, pKernelGmmu, index);
1863 destroy_pages_and_exit:
1864     kgmmuClientShadowFaultBufferPagesDestroy(pGpu, pKernelGmmu, NV_TRUE,
1865                                              index);
1866 destroy_queue_and_exit:
1867     kgmmuClientShadowFaultBufferQueueDestroy(pGpu, pKernelGmmu, NV_TRUE,
1868                                              index);
1869     return status;
1870 }
1871 
1872 /*!
1873  * @brief Unregister client shadow fault buffer in the GSP-RM or destroy
1874  *        it in the CPU-RM.
1875  *
1876  * @param[in] pGpu
1877  * @param[in] pKernelGmmu
1878  *
1879  * @returns
1880  */
1881 NV_STATUS
1882 kgmmuClientShadowFaultBufferDestroy_IMPL
1883 (
1884     OBJGPU           *pGpu,
1885     KernelGmmu       *pKernelGmmu,
1886     FAULT_BUFFER_TYPE index
1887 )
1888 {
1889     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1890     NvBool bFreeMemory = !RMCFG_FEATURE_PLATFORM_GSP;
1891 
1892     pClientShadowFaultBuffer =
1893         pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].pClientShadowFaultBuffer[index];
1894 
1895     if (pClientShadowFaultBuffer != NvP64_NULL)
1896     {
1897         kgmmuClientShadowFaultBufferUnregister(pGpu, pKernelGmmu,
1898                                                index);
1899 
1900         kgmmuFaultBufferFreeSharedMemory_HAL(pGpu, pKernelGmmu, index);
1901 
1902         kgmmuClientShadowFaultBufferPagesDestroy(pGpu, pKernelGmmu, bFreeMemory,
1903                                                  index);
1904         kgmmuClientShadowFaultBufferQueueDestroy(pGpu, pKernelGmmu, bFreeMemory,
1905                                                  index);
1906     }
1907 
1908     return NV_OK;
1909 }
1910 
1911 /*!
1912  * Returns the minimum allocation size to align to big-page size in bytes
1913  *
1914  * @param[in]  pKernelGmmu
1915  *
1916  * @return NvU32
1917  */
1918 NvU64
1919 kgmmuGetMinBigPageSize_IMPL(KernelGmmu *pKernelGmmu)
1920 {
1921     //
1922     // Set the minimum size in the heap that we will round up to a big page instead
1923     // just 4KB. HW doesn't like 4KB pages in video memory, but SW wants to pack
1924     // physical memory sometimes.  Typically UMDs that really care about perf use
1925     // suballocation for larger RM allocations anyway.
1926     //
1927     // Promote allocates bigger than half the big page size.
1928     // (this is a policy change for Big page sizes/VASpace)
1929     //
1930     return RM_PAGE_SIZE_64K >> 1;
1931 }
1932 
1933 /*!
1934  * @brief Initializes the init block for an engine
1935  *
1936  * @param[in] pKernelGmmu
1937  * @param[in] pInstBlkDesc    Memory descriptor for the instance block of the engine
1938  * @param[in] pVAS            OBJVASPACE pointer of the engine
1939  * @param[in] subctxId        subctxId Value
1940  * @param[in] pInstBlkParams  Pointer to the structure storing the parameters passed by the caller
1941  *
1942  * @returns NV_STATUS
1943  */
1944 NV_STATUS
1945 kgmmuInstBlkInit_IMPL
1946 (
1947     KernelGmmu           *pKernelGmmu,
1948     MEMORY_DESCRIPTOR    *pInstBlkDesc,
1949     OBJVASPACE           *pVAS,
1950     NvU32                 subctxId,
1951     INST_BLK_INIT_PARAMS *pInstBlkParams
1952 )
1953 {
1954     OBJGPU   *pGpu   = ENG_GET_GPU(pKernelGmmu);
1955     KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1956     NvU8     *pInstBlk;      // CPU VA of instance block.
1957     NvU64     vaLimitData;
1958     NvU32     vaLimitOffset;
1959     NvU32     dirBaseHiOffset;
1960     NvU32     dirBaseHiData;
1961     NvU32     dirBaseLoOffset;
1962     NvU32     dirBaseLoData;
1963     NvU32     atsOffset;
1964     NvU32     atsData;
1965     NvU32     magicValueOffset;
1966     NvU32     magicValueData;
1967     NV_STATUS status = NV_OK;
1968 
1969     NV_ASSERT(!gpumgrGetBcEnabledStatus(pGpu));
1970 
1971     // Get VA limit
1972     status = kgmmuInstBlkVaLimitGet_HAL(pKernelGmmu, pVAS, subctxId, pInstBlkParams, &vaLimitOffset, &vaLimitData);
1973     NV_ASSERT_OR_RETURN((status == NV_OK), status);
1974 
1975     // Get page dir base
1976     NV_ASSERT_OK_OR_RETURN(kgmmuInstBlkPageDirBaseGet_HAL(pGpu, pKernelGmmu,
1977         pVAS, pInstBlkParams, subctxId,
1978         &dirBaseLoOffset, &dirBaseLoData, &dirBaseHiOffset, &dirBaseHiData));
1979 
1980     if ((pVAS != NULL) && vaspaceIsAtsEnabled(pVAS))
1981     {
1982         // Coherent link ATS parameters are only set on the new VMM path.
1983         status = kgmmuInstBlkAtsGet_HAL(pKernelGmmu, pVAS, subctxId, &atsOffset, &atsData);
1984         NV_ASSERT_OR_RETURN((status == NV_OK), status);
1985     }
1986     else
1987     {
1988         atsOffset = 0;
1989         atsData = 0;
1990     }
1991 
1992     status = kgmmuInstBlkMagicValueGet_HAL(pKernelGmmu, &magicValueOffset, &magicValueData);
1993 
1994     // Write the fields out
1995     pInstBlk = pInstBlkParams->pInstBlk;
1996 
1997     if (pInstBlk != NULL)
1998     {
1999         if (vaLimitOffset != 0)
2000         {
2001             // TO DO: FMODEL fails with MEM_WR64
2002             if (IS_SIMULATION(pGpu))
2003             {
2004                 MEM_WR32(pInstBlk + vaLimitOffset + 0, NvU64_LO32(vaLimitData));
2005                 MEM_WR32(pInstBlk + vaLimitOffset + 4, NvU64_HI32(vaLimitData));
2006             }
2007             else
2008             {
2009                 MEM_WR64(pInstBlk + vaLimitOffset, vaLimitData);
2010             }
2011         }
2012 
2013         MEM_WR32(pInstBlk + dirBaseHiOffset, dirBaseHiData);
2014         MEM_WR32(pInstBlk + dirBaseLoOffset, dirBaseLoData);
2015 
2016         if (atsOffset != 0)
2017             MEM_WR32(pInstBlk + atsOffset, atsData);
2018 
2019         if (status == NV_OK)
2020             MEM_WR32(pInstBlk + magicValueOffset, magicValueData);
2021     }
2022     else
2023     {
2024         MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2025 
2026         pInstBlk = memmgrMemDescBeginTransfer(pMemoryManager, pInstBlkDesc,
2027                                               TRANSFER_FLAGS_SHADOW_ALLOC);
2028         if (pInstBlk == NULL)
2029         {
2030             return NV_ERR_INSUFFICIENT_RESOURCES;
2031         }
2032 
2033         if (vaLimitOffset != 0)
2034         {
2035             // TO DO: FMODEL fails with MEM_WR64
2036             if (IS_SIMULATION(pGpu))
2037             {
2038                 MEM_WR32(pInstBlk + vaLimitOffset + 0, NvU64_LO32(vaLimitData));
2039                 MEM_WR32(pInstBlk + vaLimitOffset + 4, NvU64_HI32(vaLimitData));
2040             }
2041             else
2042             {
2043                 MEM_WR64(pInstBlk + vaLimitOffset, vaLimitData);
2044             }
2045         }
2046 
2047         MEM_WR32(pInstBlk + dirBaseHiOffset, dirBaseHiData);
2048         MEM_WR32(pInstBlk + dirBaseLoOffset, dirBaseLoData);
2049 
2050         if (atsOffset != 0)
2051             MEM_WR32(pInstBlk + atsOffset, atsData);
2052 
2053         if (status == NV_OK)
2054             MEM_WR32(pInstBlk + magicValueOffset, magicValueData);
2055 
2056         memmgrMemDescEndTransfer(pMemoryManager, pInstBlkDesc,
2057                                  TRANSFER_FLAGS_SHADOW_ALLOC);
2058     }
2059 
2060     if (!pInstBlkParams->bDeferFlush)
2061     {
2062         kbusFlush_HAL(pGpu, pKernelBus, BUS_FLUSH_USE_PCIE_READ
2063                                         | kbusGetFlushAperture(pKernelBus, memdescGetAddressSpace(pInstBlkDesc)));
2064     }
2065 
2066     return NV_OK;
2067 }
2068 
2069 GMMU_APERTURE
2070 kgmmuGetExternalAllocAperture_IMPL
2071 (
2072     NvU32 addressSpace
2073 )
2074 {
2075     switch (addressSpace)
2076     {
2077         case ADDR_FBMEM:
2078             return GMMU_APERTURE_VIDEO;
2079         case ADDR_FABRIC_V2:
2080         case ADDR_FABRIC_MC:
2081             return GMMU_APERTURE_PEER;
2082         case ADDR_SYSMEM:
2083         case ADDR_VIRTUAL:
2084             return GMMU_APERTURE_SYS_COH;
2085         default:
2086             NV_PRINTF(LEVEL_ERROR, "Unexpected addressSpace (%u) when mapping to GMMU_APERTURE.\n",
2087                       addressSpace);
2088             NV_ASSERT(0);
2089             return GMMU_APERTURE_SYS_COH;
2090     }
2091 }
2092 
2093 /*!
2094  * @brief
2095  *
2096  * @param pGpu
2097  * @param pKernelGmmu
2098  * @param bOwnedByRm
2099  */
2100 void
2101 kgmmuAccessCntrChangeIntrOwnership_IMPL
2102 (
2103     OBJGPU     *pGpu,
2104     KernelGmmu *pKernelGmmu,
2105     NvBool      bOwnedByRm
2106 )
2107 {
2108     //
2109     // Disable the interrupt when RM loses the ownership and enable it back when
2110     // RM regains it. nvUvmInterfaceOwnAccessCntIntr() will rely on this behavior.
2111     //
2112     if (bOwnedByRm)
2113         pKernelGmmu->uvmSharedIntrRmOwnsMask |= RM_UVM_SHARED_INTR_MASK_HUB_ACCESS_COUNTER_NOTIFY;
2114     else
2115         pKernelGmmu->uvmSharedIntrRmOwnsMask &= ~RM_UVM_SHARED_INTR_MASK_HUB_ACCESS_COUNTER_NOTIFY;
2116 }
2117 
2118 /**
2119  * @brief Provides an opportunity to register some IntrService during intrStateInit.
2120  */
2121 void
2122 kgmmuRegisterIntrService_IMPL
2123 (
2124     OBJGPU              *pGpu,
2125     KernelGmmu          *pKernelGmmu,
2126     IntrServiceRecord   pRecords[MC_ENGINE_IDX_MAX]
2127 )
2128 {
2129     NvU32 engineIdx;
2130     NvU16 *pEngineIdxList;
2131     NvU32 listSize;
2132 
2133     static NvU16 engineIdxList[] = {
2134         MC_ENGINE_IDX_REPLAYABLE_FAULT,
2135         MC_ENGINE_IDX_REPLAYABLE_FAULT_ERROR,
2136     };
2137 
2138     static NvU16 engineIdxListForCC[] = {
2139         MC_ENGINE_IDX_REPLAYABLE_FAULT_CPU,
2140         MC_ENGINE_IDX_NON_REPLAYABLE_FAULT_CPU,
2141     };
2142 
2143     if (IS_GSP_CLIENT(pGpu) && gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu))
2144     {
2145         pEngineIdxList = engineIdxListForCC;
2146         listSize = NV_ARRAY_ELEMENTS(engineIdxListForCC);
2147     }
2148     else
2149     {
2150         pEngineIdxList = engineIdxList;
2151         listSize = NV_ARRAY_ELEMENTS(engineIdxList);
2152     }
2153 
2154     for (NvU32 tableIdx = 0; tableIdx < listSize; tableIdx++)
2155     {
2156         engineIdx = (pEngineIdxList)[tableIdx];
2157         NV_ASSERT(pRecords[engineIdx].pInterruptService == NULL);
2158         pRecords[engineIdx].pInterruptService = staticCast(pKernelGmmu, IntrService);
2159     }
2160 }
2161 
2162 /**
2163  * @brief Service stall interrupts.
2164  *
2165  * @returns Zero, or any implementation-chosen nonzero value. If the same nonzero value is returned enough
2166  *          times the interrupt is considered stuck.
2167  */
2168 NvU32
2169 kgmmuServiceInterrupt_IMPL
2170 (
2171     OBJGPU      *pGpu,
2172     KernelGmmu  *pKernelGmmu,
2173     IntrServiceServiceInterruptArguments *pParams
2174 )
2175 {
2176     NV_STATUS status;
2177 
2178     NV_ASSERT_OR_RETURN(pParams != NULL, 0);
2179 
2180     switch (pParams->engineIdx)
2181     {
2182         case MC_ENGINE_IDX_REPLAYABLE_FAULT:
2183         {
2184             NV_STATUS status = kgmmuServiceReplayableFault_HAL(pGpu, pKernelGmmu);
2185             if (status != NV_OK)
2186             {
2187                 NV_ASSERT_OK_FAILED("Failed to service replayable MMU fault error",
2188                     status);
2189             }
2190             break;
2191         }
2192         case MC_ENGINE_IDX_REPLAYABLE_FAULT_ERROR:
2193         {
2194             status = kgmmuReportFaultBufferOverflow_HAL(pGpu, pKernelGmmu);
2195             if (status != NV_OK)
2196             {
2197                 NV_ASSERT_OK_FAILED(
2198                     "Failed to report replayable MMU fault buffer overflow error",
2199                     status);
2200             }
2201             break;
2202         }
2203         case MC_ENGINE_IDX_NON_REPLAYABLE_FAULT_CPU:
2204         {
2205             osQueueMMUFaultHandler(pGpu);
2206             status = 0;
2207             break;
2208         }
2209         case MC_ENGINE_IDX_REPLAYABLE_FAULT_CPU:
2210         {
2211             NV_PRINTF(LEVEL_ERROR, "Unexpected replayable interrupt routed to RM. Verify UVM took ownership.\n");
2212             status = NV_ERR_INVALID_STATE;
2213             break;
2214         }
2215         default:
2216         {
2217             NV_ASSERT_FAILED("Invalid engineIdx");
2218             break;
2219         }
2220     }
2221 
2222     return 0;
2223 }
2224 
2225 /*!
2226  * @brief Extract the PTE FIELDS from the PTE and
2227  * set the corresponding flags/fields in pParams.
2228  *
2229  * @param[in]  pKernelGmmu
2230  * @param[in]  pPte        Pointer to the PTE contents
2231  * @param[out] pPteInfo    Pointer to the PTE info structure
2232  * @param[in]  pFmt        NV0080_CTRL_DMA_PTE_INFO_PTE_BLOCK pointer to cmd params
2233  * @param[in]  pLevelFmt   Format of the level
2234  *
2235  *
2236  * @returns none
2237  */
2238 void
2239 kgmmuExtractPteInfo_IMPL
2240 (
2241     KernelGmmu                          *pKernelGmmu,
2242     GMMU_ENTRY_VALUE                    *pPte,
2243     NV0080_CTRL_DMA_PTE_INFO_PTE_BLOCK  *pPteInfo,
2244     const GMMU_FMT                      *pFmt,
2245     const MMU_FMT_LEVEL                 *pLevelFmt
2246 )
2247 {
2248     OBJGPU             *pGpu = ENG_GET_GPU(pKernelGmmu);
2249     MemoryManager      *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2250     const GMMU_FMT_PTE *pFmtPte = pFmt->pPte;
2251     NvBool              bPteValid;
2252 
2253     bPteValid = nvFieldGetBool(&pFmtPte->fldValid, pPte->v8);
2254 
2255     pPteInfo->pteFlags = FLD_SET_DRF_NUM(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_VALID,
2256         bPteValid, pPteInfo->pteFlags);
2257 
2258     if (pFmtPte->version != GMMU_FMT_VERSION_3)
2259     {
2260         pPteInfo->pteFlags = FLD_SET_DRF_NUM(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_ENCRYPTED,
2261             nvFieldGetBool(&pFmtPte->fldEncrypted, pPte->v8), pPteInfo->pteFlags);
2262     }
2263 
2264     switch (gmmuFieldGetAperture(&pFmtPte->fldAperture, pPte->v8))
2265     {
2266         case GMMU_APERTURE_VIDEO:
2267             pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_APERTURE,
2268                 _VIDEO_MEMORY, pPteInfo->pteFlags);
2269             break;
2270         case GMMU_APERTURE_PEER:
2271             pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_APERTURE,
2272                 _PEER_MEMORY, pPteInfo->pteFlags);
2273             break;
2274         case GMMU_APERTURE_SYS_COH:
2275             pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_APERTURE,
2276                 _SYSTEM_COHERENT_MEMORY, pPteInfo->pteFlags);
2277             break;
2278         case GMMU_APERTURE_SYS_NONCOH:
2279             pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_APERTURE,
2280                 _SYSTEM_NON_COHERENT_MEMORY, pPteInfo->pteFlags);
2281             break;
2282         case GMMU_APERTURE_INVALID:
2283         default:
2284             NV_ASSERT(0);
2285             break;
2286     }
2287 
2288     if (pFmtPte->version == GMMU_FMT_VERSION_3)
2289     {
2290         KernelGmmu  *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
2291         NvU32        ptePcfHw;
2292         NvU32        ptePcfSw = 0;
2293 
2294         // In Version 3, parse the PCF bits and return those
2295         ptePcfHw = nvFieldGet32(&pFmtPte->fldPtePcf, pPte->v8);
2296         NV_ASSERT(kgmmuTranslatePtePcfFromHw_HAL(pKernelGmmu, ptePcfHw, bPteValid, &ptePcfSw) == NV_OK);
2297 
2298         // Valid 2MB PTEs follow the same format as 64K and 4K PTEs
2299         if (bPteValid)
2300         {
2301             if (!(ptePcfSw & (1 << SW_MMU_PCF_UNCACHED_IDX)))
2302             {
2303                 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2304                         _PARAMS_FLAGS_GPU_CACHED, _TRUE, pPteInfo->pteFlags);
2305             }
2306             if (ptePcfSw & (1 << SW_MMU_PCF_RO_IDX))
2307             {
2308                 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2309                         _PARAMS_FLAGS_READ_ONLY, _TRUE, pPteInfo->pteFlags);
2310             }
2311             if (ptePcfSw & (1 << SW_MMU_PCF_NOATOMIC_IDX))
2312             {
2313                 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2314                         _PARAMS_FLAGS_ATOMIC, _DISABLE, pPteInfo->pteFlags);
2315             }
2316             if (ptePcfSw & (1 << SW_MMU_PCF_REGULAR_IDX))
2317             {
2318                 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2319                         _PARAMS_FLAGS_PRIVILEGED, _FALSE, pPteInfo->pteFlags);
2320             }
2321             if (ptePcfSw & (1 << SW_MMU_PCF_ACE_IDX))
2322             {
2323                 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2324                         _PARAMS_FLAGS_ACCESS_COUNTING, _ENABLE, pPteInfo->pteFlags);
2325             }
2326         }
2327         else
2328         {
2329             if (pLevelFmt->numSubLevels == 0)
2330             {
2331                 if (ptePcfSw & (1 << SW_MMU_PCF_SPARSE_IDX))
2332                 {
2333                     pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2334                             _PARAMS_FLAGS_GPU_CACHED, _FALSE, pPteInfo->pteFlags);
2335                 }
2336                 else
2337                 {
2338                     pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2339                             _PARAMS_FLAGS_GPU_CACHED, _TRUE, pPteInfo->pteFlags);
2340                 }
2341             }
2342             else
2343             {
2344                 NvU32  pdePcfHw = 0;
2345                 NvU32  pdePcfSw = 0;
2346 
2347                 pdePcfHw = nvFieldGet32(&pFmt->pPde->fldPdePcf, pPte->v8);
2348                 NV_ASSERT(kgmmuTranslatePdePcfFromHw_HAL(pKernelGmmu, pdePcfHw, GMMU_APERTURE_INVALID, &pdePcfSw) == NV_OK);
2349                 if (pdePcfSw & (1 << SW_MMU_PCF_SPARSE_IDX))
2350                 {
2351                     pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2352                             _PARAMS_FLAGS_GPU_CACHED, _FALSE, pPteInfo->pteFlags);
2353                 }
2354                 else
2355                 {
2356                     pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2357                             _PARAMS_FLAGS_GPU_CACHED, _TRUE, pPteInfo->pteFlags);
2358                 }
2359 
2360             }
2361         }
2362     }
2363     else
2364     {
2365         pPteInfo->pteFlags = FLD_SET_DRF_NUM(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_GPU_CACHED,
2366             !nvFieldGetBool(&pFmtPte->fldVolatile, pPte->v8), pPteInfo->pteFlags);
2367 
2368         if (nvFieldIsValid32(&pFmtPte->fldReadDisable.desc) &&
2369             nvFieldIsValid32(&pFmtPte->fldWriteDisable.desc))
2370         {
2371             if (nvFieldGetBool(&pFmtPte->fldWriteDisable, pPte->v8))
2372             {
2373                 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2374                         _PARAMS_FLAGS_SHADER_ACCESS, _READ_ONLY, pPteInfo->pteFlags);
2375             }
2376             else if (nvFieldGetBool(&pFmtPte->fldReadDisable, pPte->v8))
2377             {
2378                 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2379                         _PARAMS_FLAGS_SHADER_ACCESS, _WRITE_ONLY, pPteInfo->pteFlags);
2380             }
2381             else
2382             {
2383                 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2384                         _PARAMS_FLAGS_SHADER_ACCESS, _READ_WRITE, pPteInfo->pteFlags);
2385             }
2386         }
2387         else
2388         {
2389             pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_SHADER_ACCESS,
2390                 _NOT_SUPPORTED, pPteInfo->pteFlags);
2391         }
2392 
2393         pPteInfo->pteFlags = FLD_SET_DRF_NUM(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_READ_ONLY,
2394             nvFieldGetBool(&pFmtPte->fldReadOnly, pPte->v8), pPteInfo->pteFlags);
2395 
2396         // Get comptagline
2397         pPteInfo->comptagLine = nvFieldGet32(&pFmtPte->fldCompTagLine, pPte->v8);
2398     }
2399 
2400     // Get kind
2401     pPteInfo->kind = nvFieldGet32(&pFmtPte->fldKind, pPte->v8);
2402 
2403     //
2404     // Decode the comptags value from kind.  GF100 only supports 2 bits per rop tile,
2405     // but future chips will use the other layouts.
2406     //
2407     if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE_1, pPteInfo->kind))
2408     {
2409         pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_COMPTAGS, _1, pPteInfo->pteFlags);
2410     }
2411     else if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE_2, pPteInfo->kind))
2412     {
2413         pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_COMPTAGS, _2, pPteInfo->pteFlags);
2414     }
2415     else if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE_4, pPteInfo->kind))
2416     {
2417         pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_COMPTAGS, _4, pPteInfo->pteFlags);
2418     }
2419     else
2420     {
2421         pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_COMPTAGS, _NONE, pPteInfo->pteFlags);
2422     }
2423 }
2424 
2425 NvS32*
2426 kgmmuGetFatalFaultIntrPendingState_IMPL
2427 (
2428     KernelGmmu *pKernelGmmu,
2429     NvU8 gfid
2430 )
2431 {
2432     return &pKernelGmmu->mmuFaultBuffer[gfid].fatalFaultIntrPending;
2433 }
2434 
2435 struct HW_FAULT_BUFFER*
2436 kgmmuGetHwFaultBufferPtr_IMPL
2437 (
2438     KernelGmmu *pKernelGmmu,
2439     NvU8 gfid,
2440     NvU8 faultBufferIndex
2441 )
2442 {
2443     return &pKernelGmmu->mmuFaultBuffer[gfid].hwFaultBuffers[faultBufferIndex];
2444 }
2445 
2446 NvU64
2447 kgmmuGetFaultBufferGenCnt_IMPL
2448 (
2449     OBJGPU     *pGpu,
2450     KernelGmmu *pKernelGmmu,
2451     NvU8        gfid
2452 )
2453 {
2454     return pKernelGmmu->mmuFaultBuffer[gfid].faultBufferGenerationCounter;
2455 }
2456 
2457 void *
2458 kgmmuGetShadowFaultBufferCslContext_IMPL
2459 (
2460     OBJGPU *pGpu,
2461     KernelGmmu *pKernelGmmu,
2462     FAULT_BUFFER_TYPE type
2463 )
2464 {
2465     ConfidentialCompute *pConfCompute = GPU_GET_CONF_COMPUTE(pGpu);
2466 
2467     if (!gpuIsCCFeatureEnabled(pGpu))
2468     {
2469         return NULL;
2470     }
2471 
2472     NV_ASSERT_OR_RETURN(
2473         pConfCompute->getProperty(pConfCompute, PDB_PROP_CONFCOMPUTE_ENCRYPT_ENABLED),
2474         NULL);
2475 
2476     switch (type)
2477     {
2478         case NON_REPLAYABLE_FAULT_BUFFER:
2479             return pConfCompute->pNonReplayableFaultCcslCtx;
2480         case REPLAYABLE_FAULT_BUFFER:
2481             return pConfCompute->pReplayableFaultCcslCtx;
2482         default:
2483             break;
2484     }
2485 
2486     return NULL;
2487 }
2488