1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24  /******************************************************************************
25 *
26 *       Kernel GMMU module header
27 *       Defines and structures used on CPU RM for the GMMU object.
28 *
29 ******************************************************************************/
30 
31 #define NVOC_KERN_GMMU_H_PRIVATE_ACCESS_ALLOWED
32 
33 #include "gpu/bif/kernel_bif.h"
34 #include "gpu/mmu/kern_gmmu.h"
35 #include "gpu/bus/kern_bus.h"
36 #include "gpu/nvlink/kernel_nvlink.h"
37 #include "gpu/mem_sys/kern_mem_sys.h"
38 #include "gpu/mem_mgr/mem_mgr.h"
39 #include "vgpu/vgpu_events.h"
40 #include "gpu/mem_mgr/mem_desc.h"
41 #include "gpu/subdevice/subdevice.h"
42 #include "os/os.h"
43 #include "rmapi/rmapi.h"
44 #include "gpu/gpu.h"
45 #include "nvRmReg.h"
46 #include "vgpu/rpc.h"
47 #include "kernel/gpu/intr/engine_idx.h"
48 
49 #include "kernel/gpu/conf_compute/ccsl.h"
50 
51 static void _kgmmuInitRegistryOverrides(OBJGPU *pGpu, KernelGmmu *pKernelGmmu);
52 
53 /*!
54  * KERNEL_GMMU constructor
55  *
56  * @param[in]  pGpu
57  * @param[in]  pKernelGmmu
58  * @param[in]  engDesc       Engine descriptor
59  *
60  * @return NV_OK on success, pertinent error code on failure.
61  */
62 NV_STATUS
kgmmuConstructEngine_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,ENGDESCRIPTOR engDesc)63 kgmmuConstructEngine_IMPL(OBJGPU *pGpu, KernelGmmu *pKernelGmmu, ENGDESCRIPTOR engDesc)
64 {
65     NvU32  v;
66 
67     kgmmuDetermineMaxVASize_HAL(pGpu, pKernelGmmu);
68 
69     if (gpuIsCacheOnlyModeEnabled(pGpu))
70     {
71         pKernelGmmu->bHugePageSupported      = NV_FALSE;
72         pKernelGmmu->bPageSize512mbSupported = NV_FALSE;
73     }
74 
75     // Allocate and init MMU format families.
76     kgmmuFmtInitPdeApertures_HAL(pKernelGmmu, pKernelGmmu->pdeApertures);
77     kgmmuFmtInitPteApertures_HAL(pKernelGmmu, pKernelGmmu->pteApertures);
78 
79     for (v = 0; v < GMMU_FMT_MAX_VERSION_COUNT; ++v)
80     {
81         const NvU32 ver = g_gmmuFmtVersions[v];
82         if (kgmmuFmtIsVersionSupported_HAL(pKernelGmmu, ver))
83         {
84             GMMU_FMT_FAMILY *pFam = NULL;
85 
86             // Alloc version struct.
87             pFam = portMemAllocNonPaged(sizeof(*pFam));
88             NV_ASSERT_OR_RETURN((pFam != NULL), NV_ERR_NO_MEMORY);
89             portMemSet(pFam, 0, sizeof(*pFam));
90             pKernelGmmu->pFmtFamilies[v] = pFam;
91 
92             // Init PDE/PTE formats.
93             kgmmuFmtInitPdeMulti_HAL(pKernelGmmu, &pFam->pdeMulti, ver, pKernelGmmu->pdeApertures);
94             kgmmuFmtInitPde_HAL(pKernelGmmu, &pFam->pde, ver, pKernelGmmu->pdeApertures);
95             kgmmuFmtInitPte_HAL(pKernelGmmu, &pFam->pte, ver, pKernelGmmu->pteApertures,
96                 gpuIsUnifiedMemorySpaceEnabled(pGpu));
97 
98             kgmmuFmtInitPteComptagLine_HAL(pKernelGmmu, &pFam->pte, ver);
99         }
100         else
101         {
102             pKernelGmmu->pFmtFamilies[v] = NULL;
103         }
104     }
105 
106     NV_ASSERT_OK_OR_RETURN(kgmmuFmtInit(pKernelGmmu));
107 
108     portMemSet(&pKernelGmmu->mmuFaultBuffer, 0, sizeof(pKernelGmmu->mmuFaultBuffer));
109 
110     // Default placement for PDEs is in vidmem.
111     pKernelGmmu->PDEAperture = ADDR_FBMEM;
112     pKernelGmmu->PDEAttr = NV_MEMORY_WRITECOMBINED;
113     pKernelGmmu->PDEBAR1Aperture = ADDR_FBMEM;
114     pKernelGmmu->PDEBAR1Attr = NV_MEMORY_WRITECOMBINED;
115 
116     // Default placement for PTEs is in vidmem.
117     pKernelGmmu->PTEAperture = ADDR_FBMEM;
118     pKernelGmmu->PTEAttr = NV_MEMORY_WRITECOMBINED;
119     pKernelGmmu->PTEBAR1Aperture = ADDR_FBMEM;
120     pKernelGmmu->PTEBAR1Attr = NV_MEMORY_WRITECOMBINED;
121 
122     _kgmmuInitRegistryOverrides(pGpu, pKernelGmmu);
123 
124     return NV_OK;
125 }
126 
127 static NV_STATUS
_kgmmuInitStaticInfo(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)128 _kgmmuInitStaticInfo
129 (
130     OBJGPU *pGpu,
131     KernelGmmu *pKernelGmmu
132 )
133 {
134     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
135     NV_STATUS status;
136 
137     //
138     // On vGPU, all hardware management is done by the host except for full SR-IOV.
139     // Thus, only do any further HW initialization on the host.
140     //
141     if (!(IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
142           (IS_VIRTUAL_WITH_SRIOV(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu))))
143     {
144         // Init HAL specific features.
145         NV_ASSERT_OK_OR_RETURN(kgmmuFmtFamiliesInit_HAL(pGpu, pKernelGmmu));
146     }
147 
148     pKernelGmmu->pStaticInfo = portMemAllocNonPaged(sizeof(*pKernelGmmu->pStaticInfo));
149     NV_CHECK_OR_RETURN(LEVEL_ERROR, pKernelGmmu->pStaticInfo != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
150     portMemSet(pKernelGmmu->pStaticInfo, 0, sizeof(*pKernelGmmu->pStaticInfo));
151 
152     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
153         pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice,
154                                 NV2080_CTRL_CMD_INTERNAL_GMMU_GET_STATIC_INFO,
155                                 pKernelGmmu->pStaticInfo, sizeof(*pKernelGmmu->pStaticInfo)), fail);
156 
157 fail:
158     if (status != NV_OK)
159     {
160         portMemFree(pKernelGmmu->pStaticInfo);
161     }
162 
163     return status;
164 }
165 
166 /*
167  * Initialize the Kernel GMMU state.
168  *
169  * @param      pGpu
170  * @param      pKernelGmmu
171  */
kgmmuStateInitLocked_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)172 NV_STATUS kgmmuStateInitLocked_IMPL
173 (
174     OBJGPU     *pGpu,
175     KernelGmmu *pKernelGmmu
176 )
177 {
178     KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
179     NV_STATUS  status;
180 
181     if (pKernelBif != NULL)
182     {
183         // This value shouldn't change after initialization, so cache it now
184         pKernelGmmu->sysmemBaseAddress = pKernelBif->dmaWindowStartAddress;
185     }
186 
187     status = _kgmmuInitStaticInfo(pGpu, pKernelGmmu);
188     if (status != NV_OK)
189     {
190         return status;
191     }
192 
193     if (IS_VIRTUAL_WITH_SRIOV(pGpu))
194     {
195         VGPU_STATIC_INFO *pVSI = GPU_GET_STATIC_INFO(pGpu);
196         pGpu->setProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED, pVSI->bAtsSupported);
197     }
198 
199     // Setup Fault buffer if enabled
200     if (!pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
201     {
202         NV_ASSERT_OK_OR_RETURN(kgmmuFaultBufferInit_HAL(pGpu, pKernelGmmu));
203     }
204 
205     return status;
206 }
207 
208 static NV_STATUS
_kgmmuCreateGlobalVASpace(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 flags)209 _kgmmuCreateGlobalVASpace
210 (
211     OBJGPU  *pGpu,
212     KernelGmmu *pKernelGmmu,
213     NvU32 flags
214 )
215 {
216     NvU32       constructFlags = VASPACE_FLAGS_NONE;
217     OBJVASPACE *pGlobalVAS     = NULL;
218     NV_STATUS   rmStatus;
219     OBJGPUGRP  *pGpuGrp        = NULL;
220 
221     // Bail out early on sleep/suspend cases
222     if (flags & GPU_STATE_FLAGS_PRESERVING)
223         return NV_OK;
224     if (!gpumgrIsParentGPU(pGpu))
225         return NV_OK;
226 
227     //
228     // We create the device vaspace at this point. Assemble the flags needed
229     // for construction.
230     //
231 
232     // Allow PTE in SYS
233     constructFlags |= VASPACE_FLAGS_RETRY_PTE_ALLOC_IN_SYS;
234     constructFlags |= DRF_DEF(_VASPACE, _FLAGS, _BIG_PAGE_SIZE, _DEFAULT);
235 
236     pGpuGrp = gpumgrGetGpuGrpFromGpu(pGpu);
237     NV_ASSERT_OR_RETURN(pGpuGrp != NULL, NV_ERR_INVALID_DATA);
238 
239     rmStatus = gpugrpCreateGlobalVASpace(pGpuGrp, pGpu,
240                                          FERMI_VASPACE_A,
241                                          0, 0,
242                                          constructFlags,
243                                          &pGlobalVAS);
244     NV_ASSERT_OR_RETURN((NV_OK == rmStatus), rmStatus);
245 
246     return NV_OK;
247 }
248 
249 static NV_STATUS
_kgmmuDestroyGlobalVASpace(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 flags)250 _kgmmuDestroyGlobalVASpace
251 (
252     OBJGPU  *pGpu,
253     KernelGmmu *pKernelGmmu,
254     NvU32 flags
255 )
256 {
257     OBJGPUGRP *pGpuGrp = NULL;
258 
259     if (flags & GPU_STATE_FLAGS_PRESERVING)
260         return NV_OK;
261 
262     pGpuGrp = gpumgrGetGpuGrpFromGpu(pGpu);
263     return gpugrpDestroyGlobalVASpace(pGpuGrp, pGpu);
264 }
265 
266 /*
267  *  Helper function to enable ComputePeerMode
268  */
269 NV_STATUS
kgmmuEnableComputePeerAddressing_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 flags)270 kgmmuEnableComputePeerAddressing_IMPL
271 (
272     OBJGPU *pGpu,
273     KernelGmmu *pKernelGmmu,
274     NvU32 flags
275 )
276 {
277     KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
278     OBJSYS    *pSys = SYS_GET_INSTANCE();
279     NV_STATUS status = NV_OK;
280     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
281     NvBool bComputePeerMode = NV_FALSE;
282 
283     if (pSys->getProperty(pSys, PDB_PROP_SYS_NVSWITCH_IS_PRESENT) ||
284         kbusIsFlaSupported(pKernelBus))
285     {
286         bComputePeerMode = NV_TRUE;
287     }
288 
289     if (bComputePeerMode)
290     {
291         status = kgmmuEnableNvlinkComputePeerAddressing_HAL(pKernelGmmu);
292         if (status != NV_OK)
293         {
294             NV_PRINTF(LEVEL_ERROR,
295                         "Failed to enable GMMU property compute addressing for GPU %x , status:%x\n",
296                         pGpu->gpuInstance, status);
297             return status;
298         }
299 
300         status = pRmApi->Control(pRmApi,
301                                 pGpu->hInternalClient,
302                                 pGpu->hInternalSubdevice,
303                                 NV2080_CTRL_CMD_INTERNAL_NVLINK_ENABLE_COMPUTE_PEER_ADDR,
304                                 NULL, 0);
305     }
306     return status;
307 }
308 
309 /*
310  *  State Post Load
311  */
kgmmuStatePostLoad_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 flags)312 NV_STATUS kgmmuStatePostLoad_IMPL
313 (
314     OBJGPU *pGpu,
315     KernelGmmu *pKernelGmmu,
316     NvU32 flags
317 )
318 {
319     NV_STATUS status = NV_OK;
320 
321     status = _kgmmuCreateGlobalVASpace(pGpu, pKernelGmmu, flags);
322 
323     if (status != NV_OK)
324     {
325         NV_PRINTF(LEVEL_ERROR,
326                     "Failed to create GVASpace, status:%x\n",
327                     status);
328         return status;
329     }
330 
331     status = kgmmuEnableComputePeerAddressing(pGpu, pKernelGmmu, flags);
332 
333     if (status != NV_OK)
334     {
335         NV_PRINTF(LEVEL_ERROR,
336                     "Failed to enable compute peer addressing, status:%x\n",
337                     status);
338         return status;
339     }
340 
341     NV_ASSERT_OK_OR_RETURN(kgmmuInitCeMmuFaultIdRange_HAL(pGpu, pKernelGmmu));
342 
343     return status;
344 }
345 
346 /*
347  *  State Pre Unload
348  */
349 NV_STATUS
kgmmuStatePreUnload_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 flags)350 kgmmuStatePreUnload_IMPL
351 (
352     OBJGPU *pGpu,
353     KernelGmmu *pKernelGmmu,
354     NvU32 flags
355 )
356 {
357     NV_STATUS status = NV_OK;
358 
359     status = _kgmmuDestroyGlobalVASpace(pGpu, pKernelGmmu, flags);
360 
361     if (status != NV_OK)
362     {
363         NV_PRINTF(LEVEL_ERROR,
364                     "Failed to destory GVASpace, status:%x\n",
365                     status);
366         return status;
367     }
368     return status;
369 }
370 
371 /*!
372  * KernelGmmu destructor
373  *
374  * @param[in]  pKernelGmmu KernelGmmu object pointer
375  */
376 void
kgmmuDestruct_IMPL(KernelGmmu * pKernelGmmu)377 kgmmuDestruct_IMPL(KernelGmmu *pKernelGmmu)
378 {
379     NvU32       v;
380     NvU32       b;
381 
382     // Free per big page size format and format-family storage.
383     for (v = 0; v < GMMU_FMT_MAX_VERSION_COUNT; ++v)
384     {
385         if (NULL != pKernelGmmu->pFmtFamilies[v])
386         {
387             for (b = 0; b < GMMU_FMT_MAX_BIG_PAGE_SIZES; ++b)
388             {
389                 portMemFree(pKernelGmmu->pFmtFamilies[v]->pFmts[b]);
390                 pKernelGmmu->pFmtFamilies[v]->pFmts[b] = NULL;
391             }
392             portMemFree(pKernelGmmu->pFmtFamilies[v]);
393         }
394     }
395 }
396 
397 void
kgmmuStateDestroy_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)398 kgmmuStateDestroy_IMPL(OBJGPU *pGpu, KernelGmmu *pKernelGmmu)
399 {
400     if (NULL != pKernelGmmu->pStaticInfo)
401     {
402         portMemFree((void *)pKernelGmmu->pStaticInfo);
403         pKernelGmmu->pStaticInfo = NULL;
404     }
405     if (NULL != pKernelGmmu->pWarSmallPageTable)
406     {
407         memdescFree(pKernelGmmu->pWarSmallPageTable);
408         memdescDestroy(pKernelGmmu->pWarSmallPageTable);
409         pKernelGmmu->pWarSmallPageTable = NULL;
410     }
411     if (NULL != pKernelGmmu->pWarPageDirectory0)
412     {
413         memdescFree(pKernelGmmu->pWarPageDirectory0);
414         memdescDestroy(pKernelGmmu->pWarPageDirectory0);
415         pKernelGmmu->pWarPageDirectory0 = NULL;
416     }
417 
418     // Only if faultBuffer is enabled
419     if (!pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
420     {
421         (void)kgmmuFaultBufferDestroy_HAL(pGpu, pKernelGmmu);
422     }
423 }
424 
425 NV_STATUS
kgmmuStateLoad_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 flags)426 kgmmuStateLoad_IMPL
427 (
428     OBJGPU     *pGpu,
429     KernelGmmu *pKernelGmmu,
430     NvU32       flags
431 )
432 {
433     NV_STATUS status = NV_OK;
434 
435     // Only if faultBuffer is enabled
436     if (!pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
437     {
438         status = kgmmuFaultBufferLoad_HAL(pGpu, pKernelGmmu,
439                                           NON_REPLAYABLE_FAULT_BUFFER, GPU_GFID_PF);
440         NV_ASSERT_OK_OR_RETURN(status);
441 
442         //
443         // Note1: We check both enablement of replayable fault buffer as well
444         // as PM codepath because replayable fault buffer is client controlled
445         // and it may be or may not be enabled at the time of S3 entry / exit.
446         // Also, the state of the replayable fault buffer needs to be
447         // disabled / enabled during S3 entry / exit since the client is
448         // unaware of its state being lost during S3 entry.
449         //
450         if ((pKernelGmmu->getProperty(pKernelGmmu,
451                         PDB_PROP_KGMMU_REPLAYABLE_FAULT_BUFFER_IN_USE)) &&
452                         (pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_PM_CODEPATH)))
453         {
454             status = kgmmuFaultBufferLoad_HAL(pGpu, pKernelGmmu,
455                                               REPLAYABLE_FAULT_BUFFER, GPU_GFID_PF);
456         }
457         return status;
458     }
459 
460     return NV_OK;
461 }
462 
463 NV_STATUS
kgmmuStateUnload_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 flags)464 kgmmuStateUnload_IMPL
465 (
466     OBJGPU     *pGpu,
467     KernelGmmu *pKernelGmmu,
468     NvU32       flags
469 )
470 {
471     NV_STATUS status = NV_OK;
472 
473     // Only if faultBuffer is enabled
474     if (!pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
475     {
476         status = kgmmuFaultBufferUnload_HAL(pGpu, pKernelGmmu,
477                                             NON_REPLAYABLE_FAULT_BUFFER, GPU_GFID_PF);
478         NV_ASSERT_OK_OR_RETURN(status);
479 
480         // See Note1:
481         if ((pKernelGmmu->getProperty(pKernelGmmu,
482                         PDB_PROP_KGMMU_REPLAYABLE_FAULT_BUFFER_IN_USE)) &&
483                         (pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_PM_CODEPATH)))
484         {
485             status = kgmmuFaultBufferUnload_HAL(pGpu, pKernelGmmu,
486                                                 REPLAYABLE_FAULT_BUFFER, GPU_GFID_PF);
487         }
488         return status;
489     }
490 
491     return NV_OK;
492 }
493 
494 /*!
495  * Initializes KERN_GMMU state based on registry key overrides
496  *
497  * @param[in]  pGpu
498  * @param[in]  pKernelGmmu
499  */
500 static void
_kgmmuInitRegistryOverrides(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)501 _kgmmuInitRegistryOverrides(OBJGPU *pGpu, KernelGmmu *pKernelGmmu)
502 {
503     NvU32 data;
504 
505     memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC, _PDE, pGpu->instLocOverrides),
506                            "GMMU PDE",
507                            &pKernelGmmu->PDEAperture,
508                            &pKernelGmmu->PDEAttr);
509     memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC, _BAR_PDE, pGpu->instLocOverrides),
510                            "BAR1 PDE",
511                            &pKernelGmmu->PDEBAR1Aperture,
512                            &pKernelGmmu->PDEBAR1Attr);
513     memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC, _PTE, pGpu->instLocOverrides),
514                            "GMMU PTE",
515                            &pKernelGmmu->PTEAperture,
516                            &pKernelGmmu->PTEAttr);
517     memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC, _BAR_PTE, pGpu->instLocOverrides),
518                            "BAR1 PTE",
519                            &pKernelGmmu->PTEBAR1Aperture,
520                            &pKernelGmmu->PTEBAR1Attr);
521 
522     //
523     // Check if we want to disable big page size per address space
524     //
525     pKernelGmmu->bEnablePerVaspaceBigPage = IsGM20X(pGpu);
526     if (NV_OK == osReadRegistryDword(pGpu,
527                    NV_REG_STR_RM_DISABLE_BIG_PAGE_PER_ADDRESS_SPACE, &data))
528     {
529         pKernelGmmu->bEnablePerVaspaceBigPage = !data;
530     }
531 
532     if (NV_OK == osReadRegistryDword(pGpu,
533                    NV_REG_STR_FERMI_BIG_PAGE_SIZE, &data))
534     {
535         if (pGpu->optimizeUseCaseOverride !=
536             NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX_DEFAULT)
537         {
538             NV_PRINTF(LEVEL_ERROR,
539                       "The %s regkey cannot be used with the %s regkey!\n",
540                       NV_REG_STR_FERMI_BIG_PAGE_SIZE,
541                       NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX);
542             return;
543         }
544         else
545         {
546             switch (data)
547             {
548                 case NV_REG_STR_FERMI_BIG_PAGE_SIZE_64KB:
549                 case NV_REG_STR_FERMI_BIG_PAGE_SIZE_128KB:
550                     pKernelGmmu->overrideBigPageSize = data;
551                     break;
552                 default:
553                     break;
554             }
555         }
556     }
557     else if (pGpu->optimizeUseCaseOverride !=
558              NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX_DEFAULT)
559     {
560         switch (pGpu->optimizeUseCaseOverride)
561         {
562             case NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX_SPARSE_TEX:
563                 pKernelGmmu->overrideBigPageSize = RM_PAGE_SIZE_64K;
564                 break;
565             case NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX_COMPUTE:
566                 pKernelGmmu->overrideBigPageSize = RM_PAGE_SIZE_128K;
567                 break;
568             default:
569                 break;
570         }
571     }
572 
573     // Check if HW fault buffer is disabled
574     if (NV_OK == osReadRegistryDword(pGpu,
575                                      NV_REG_STR_RM_DISABLE_HW_FAULT_BUFFER, &data))
576     {
577         NV_PRINTF(LEVEL_ERROR,
578                   "Overriding HW Fault buffer state to 0x%x due to regkey!\n",
579                   data);
580         pKernelGmmu->setProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED, data);
581     }
582 
583 }
584 
585 GMMU_APERTURE
kgmmuGetMemAperture_IMPL(KernelGmmu * pKernelGmmu,MEMORY_DESCRIPTOR * pMemDesc)586 kgmmuGetMemAperture_IMPL
587 (
588     KernelGmmu        *pKernelGmmu,
589     MEMORY_DESCRIPTOR *pMemDesc
590 )
591 {
592     switch (memdescGetAddressSpace(pMemDesc))
593     {
594         case ADDR_FBMEM:
595             return GMMU_APERTURE_VIDEO;
596         case ADDR_SYSMEM:
597             if (NV_MEMORY_CACHED == memdescGetCpuCacheAttrib(pMemDesc))
598             {
599                 return GMMU_APERTURE_SYS_COH;
600             }
601             return GMMU_APERTURE_SYS_NONCOH;
602         default:
603             NV_ASSERT(0);
604             return GMMU_APERTURE_INVALID;
605     }
606 }
607 
608 /*!
609  * Initialize GMMU format structures dependent on big page size.
610  */
611 NV_STATUS
kgmmuFmtInit_IMPL(KernelGmmu * pKernelGmmu)612 kgmmuFmtInit_IMPL(KernelGmmu *pKernelGmmu)
613 {
614     NvU32       v;
615     NvU32       b;
616 
617     // Allocate and init MMU formats for the supported big page sizes.
618     for (v = 0; v < GMMU_FMT_MAX_VERSION_COUNT; ++v)
619     {
620         const NvU32      ver  = g_gmmuFmtVersions[v];
621         GMMU_FMT_FAMILY *pFam = pKernelGmmu->pFmtFamilies[v];
622         if (NULL != pFam)
623         {
624             for (b = 0; b < GMMU_FMT_MAX_BIG_PAGE_SIZES; ++b)
625             {
626                 const NvU32 bigPageShift = g_gmmuFmtBigPageShifts[b];
627 
628                 // Allocate +1 level for the last dual-level.
629                 const NvU32 numLevels = GMMU_FMT_MAX_LEVELS + 1;
630                 const NvU32 size = sizeof(GMMU_FMT) + sizeof(MMU_FMT_LEVEL) * numLevels;
631                 MMU_FMT_LEVEL *pLvls;
632 
633                 // Allocate format and levels in one chunk.
634                 pFam->pFmts[b] = portMemAllocNonPaged(size);
635                 NV_ASSERT_OR_RETURN((pFam->pFmts[b] != NULL), NV_ERR_NO_MEMORY);
636                 portMemSet(pFam->pFmts[b], 0, size);
637 
638                 // Levels stored contiguously after the format struct.
639                 pLvls = (MMU_FMT_LEVEL *)(pFam->pFmts[b] + 1);
640 
641                 // Common init.
642                 pFam->pFmts[b]->version    = ver;
643                 pFam->pFmts[b]->pRoot      = pLvls;
644                 pFam->pFmts[b]->pPdeMulti  = &pFam->pdeMulti;
645                 pFam->pFmts[b]->pPde       = &pFam->pde;
646                 pFam->pFmts[b]->pPte       = &pFam->pte;
647 
648                 kgmmuFmtInitLevels_HAL(pKernelGmmu, pLvls, numLevels, ver, bigPageShift);
649                 kgmmuFmtInitCaps_HAL(pKernelGmmu, pFam->pFmts[b]);
650             }
651         }
652     }
653 
654     return NV_OK;
655 }
656 
657 /*!
658  * Retrieve GMMU format family based on version.
659  */
660 const GMMU_FMT_FAMILY *
kgmmuFmtGetFamily_IMPL(KernelGmmu * pKernelGmmu,NvU32 version)661 kgmmuFmtGetFamily_IMPL(KernelGmmu *pKernelGmmu, NvU32 version)
662 {
663     NvU32       v;
664 
665     // Find a matching format.
666     for (v = GMMU_FMT_MAX_VERSION_COUNT; v > 0; --v)
667     {
668         if (0 == version)
669         {
670             // Pick newest default version if none requested.
671             if (NULL != pKernelGmmu->pFmtFamilies[v - 1])
672             {
673                 return pKernelGmmu->pFmtFamilies[v - 1];
674             }
675         }
676         else if (g_gmmuFmtVersions[v - 1] == version)
677         {
678             return pKernelGmmu->pFmtFamilies[v - 1];
679         }
680     }
681 
682     return NULL;
683 }
684 
685 /*!
686  * Returns GMMU settings that are static after GPU state init/load is
687  * finished.
688  */
689 const NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *
kgmmuGetStaticInfo_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)690 kgmmuGetStaticInfo_IMPL
691 (
692     OBJGPU *pGpu,
693     KernelGmmu *pKernelGmmu
694 )
695 {
696     // check if state Init has not completed.
697     NV_ASSERT_OR_ELSE(pKernelGmmu != NULL, return NULL);
698 
699     return pKernelGmmu->pStaticInfo;
700 }
701 
702 /*!
703  * Retrieve GMMU format based on version and big page size.
704  */
705 const GMMU_FMT *
kgmmuFmtGet_IMPL(KernelGmmu * pKernelGmmu,NvU32 version,NvU64 bigPageSize)706 kgmmuFmtGet_IMPL(KernelGmmu *pKernelGmmu, NvU32 version, NvU64 bigPageSize)
707 {
708     const GMMU_FMT_FAMILY *pFmtFamily  = kgmmuFmtGetFamily(pKernelGmmu, version);
709 
710     if (NULL != pFmtFamily)
711     {
712         NvU32 b;
713 
714         // Pick default big page size if none requested.
715         if (0 == bigPageSize)
716         {
717             //
718             // Retrieve Big Page Size. If it is not yet set, set it to 64K.
719             // Useful when this method is invoked before big page size is set.
720             //
721             if (0 == (bigPageSize = kgmmuGetBigPageSize_HAL(pKernelGmmu)))
722                 bigPageSize = NVBIT64(16);
723         }
724 
725         // Find a matching format.
726         for (b = 0; b < GMMU_FMT_MAX_BIG_PAGE_SIZES; ++b)
727         {
728             if (NVBIT64(g_gmmuFmtBigPageShifts[b]) == bigPageSize)
729             {
730                 return pFmtFamily->pFmts[b];
731             }
732         }
733     }
734 
735     return NULL;
736 }
737 
738 /*!
739  * Check if a big page size is supported.
740  */
741 NvBool
kgmmuFmtIsBigPageSizeSupported_IMPL(KernelGmmu * pKernelGmmu,NvU64 bigPageSize)742 kgmmuFmtIsBigPageSizeSupported_IMPL(KernelGmmu *pKernelGmmu, NvU64 bigPageSize)
743 {
744     if (kgmmuIsPerVaspaceBigPageEn(pKernelGmmu))
745     {
746         return NV_TRUE;
747     }
748     return kgmmuGetBigPageSize_HAL(pKernelGmmu) == bigPageSize;
749 }
750 
751 /*!
752  * @bried Returns the latest supported MMU fmt.
753  *
754  * @param[in]  pGpu          OBJGPU pointer
755  * @param[in]  pKernelGmmu   KernelGmmu pointer
756  *
757  * @returns const GMMU_FMT*
758  */
759 const GMMU_FMT*
kgmmuFmtGetLatestSupportedFormat_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)760 kgmmuFmtGetLatestSupportedFormat_IMPL(OBJGPU *pGpu, KernelGmmu *pKernelGmmu)
761 {
762     NvU32       v;
763     NvU32       maxFmtVersionSupported = 0;
764 
765     for (v = 0; v < GMMU_FMT_MAX_VERSION_COUNT; ++v)
766     {
767         const NvU32 ver = g_gmmuFmtVersions[v];
768         if (kgmmuFmtIsVersionSupported_HAL(pKernelGmmu, ver))
769         {
770             maxFmtVersionSupported = maxFmtVersionSupported < ver ? ver : maxFmtVersionSupported;
771         }
772     }
773 
774     return kgmmuFmtGet(pKernelGmmu, maxFmtVersionSupported, 0);
775 }
776 
777 /*!
778  * @brief Used for calculating total memory required for page tables
779           required for translating a given VA range.
780  *
781  * @param     pGpu
782  * @param     pKernelGmmu
783  * @param[in] pFmt              Pointer to GMMU format
784  * @param[in] vaBase            Start VA
785  * @param[in] vaLimit           End VA
786  * @param[in] pageSizeLockMask  Mask of page sizes locked down at VA reservation
787  *
788  * @returns total size of page tables.
789  */
790 NvU64
kgmmuGetSizeOfPageTables_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,const GMMU_FMT * pFmt,NvU64 vaBase,NvU64 vaLimit,NvU64 pageSizeLockMask)791 kgmmuGetSizeOfPageTables_IMPL
792 (
793     OBJGPU         *pGpu,
794     KernelGmmu     *pKernelGmmu,
795     const GMMU_FMT *pFmt,
796     NvU64           vaBase,
797     NvU64           vaLimit,
798     NvU64           pageSizeLockMask
799 )
800 {
801     const MMU_FMT_LEVEL *pPgTbl         = NULL;
802     NvU64                pgTblSize      = 0;
803     NvU64                numPgTblsCeil;
804     NvU64                numPgTblsFloor;
805     NvU64                numEntries;
806     NvU32                pageShift;
807 
808     // Loop over all page table sizes in mask
809     FOR_EACH_INDEX_IN_MASK(64, pageShift, pageSizeLockMask)
810     {
811         pPgTbl = mmuFmtFindLevelWithPageShift(pFmt->pRoot, pageShift);
812 
813         //
814         // Do not consider page directories. They are handled by
815         // @ref kgmmuGetSizeOfPageDirs.
816         //
817         if (!pPgTbl->bPageTable || (pPgTbl->numSubLevels != 0))
818         {
819             continue;
820         }
821 
822         numPgTblsCeil  = NV_DIV_AND_CEIL(vaLimit, NVBIT64(pPgTbl->virtAddrBitHi + 1)) -
823                          (vaBase / NVBIT64(pPgTbl->virtAddrBitHi + 1)) + 1;
824         numPgTblsFloor = vaLimit / NVBIT64(pPgTbl->virtAddrBitHi + 1);
825 
826         // If full page tables are not used, allocate only as much as needed.
827         if (numPgTblsFloor == 0)
828         {
829             numEntries = mmuFmtVirtAddrToEntryIndex(pPgTbl, vaLimit) -
830                          mmuFmtVirtAddrToEntryIndex(pPgTbl, vaBase) + 1;
831             pgTblSize  += numEntries * pPgTbl->entrySize;
832         }
833         else
834         {
835             pgTblSize += numPgTblsCeil * mmuFmtLevelSize(pPgTbl);
836         }
837     }
838     FOR_EACH_INDEX_IN_MASK_END
839 
840     return pgTblSize;
841 }
842 
843 /*!
844  * @brief Used for calculating total memory required for page directories
845           required for translating  a given VA range.
846  *
847  * @param       pGpu
848  * @param       pKernelGmmu
849  * @param[in]   pFmt      Pointer to GMMU format
850  * @param[in]   vaBase    Start VA
851  * @param[in]   vaLimit   End VA
852  *
853  * @returns total size of page directories
854  */
855 NvU64
kgmmuGetSizeOfPageDirs_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,const GMMU_FMT * pFmt,NvU64 vaBase,NvU64 vaLimit,NvU64 pageSizeLockMask)856 kgmmuGetSizeOfPageDirs_IMPL
857 (
858     OBJGPU         *pGpu,
859     KernelGmmu     *pKernelGmmu,
860     const GMMU_FMT *pFmt,
861     NvU64           vaBase,
862     NvU64           vaLimit,
863     NvU64           pageSizeLockMask
864 )
865 {
866     const MMU_FMT_LEVEL *pLevel = NULL;
867     NvU64                size   = 0;
868     NvU16                i;
869 
870     NV_ASSERT_OR_RETURN(pFmt != NULL, 0);
871 
872     pLevel = pFmt->pRoot;
873 
874     //
875     // Retain only the lowest set bit
876     //
877     // If the lowest set bit corresponds to a leaf page table (4K or 64K), we"ll
878     // calculate memory for all upper level page directories and if the set bit
879     // corresponds to an upper level page directory we"ll factor in all levels
880     // from the root upto that level.
881     //
882     pageSizeLockMask = pageSizeLockMask & -((NvS64)pageSizeLockMask);
883 
884     // Accumulate size for all Page Directories.
885     for (i = 0; i < GMMU_FMT_MAX_LEVELS - 1; i++)
886     {
887         NvU64 vaPerEntry = mmuFmtEntryVirtAddrMask(pLevel) + 1;
888         NvU64 numEntries = NV_DIV_AND_CEIL(vaLimit, vaPerEntry) -
889                            (vaBase / vaPerEntry) + 1;
890         NvU64 levelSize  = numEntries * pLevel->entrySize;
891         levelSize        = NV_ROUNDUP(levelSize, RM_PAGE_SIZE);
892 
893         // Stop accumulating size once we are beyond the specified level.
894         if (mmuFmtLevelPageSize(pLevel) < pageSizeLockMask)
895         {
896             break;
897         }
898 
899         size += levelSize;
900 
901         // If there's one sublevel choose that.
902         if (pLevel->numSubLevels == 1)
903         {
904             pLevel = &(pLevel->subLevels[0]);
905         }
906         else
907         {
908             // Choose the 4K page size sublevel.
909             pLevel = &(pLevel->subLevels[1]);
910         }
911         NV_ASSERT_OR_RETURN(pLevel != NULL, 0);
912 
913         // Stop accumulating size if we've exhausted all Page Dirs.
914         if (pLevel->bPageTable && (pLevel->numSubLevels == 0))
915         {
916             break;
917         }
918     }
919 
920     return size;
921 }
922 
923 /*
924  * Fill comptag field in PTE.
925  */
kgmmuFieldSetKindCompTags_IMPL(KernelGmmu * pGmmu,const GMMU_FMT * pFmt,const MMU_FMT_LEVEL * pLevel,const COMPR_INFO * pCompr,NvU64 physAddr,NvU64 surfOffset,NvU32 pteIndex,NvU8 * pEntries)926 void kgmmuFieldSetKindCompTags_IMPL
927 (
928     KernelGmmu          *pGmmu,
929     const GMMU_FMT      *pFmt,
930     const MMU_FMT_LEVEL *pLevel,
931     const COMPR_INFO    *pCompr,
932     NvU64                physAddr,
933     NvU64                surfOffset,
934     NvU32                pteIndex,
935     NvU8                *pEntries
936 )
937 {
938     OBJGPU                            *pGpu                = ENG_GET_GPU(pGmmu);
939     GMMU_COMPR_INFO                    comprInfo           = {0};
940 
941     comprInfo.compressedKind        = pCompr->kind;
942     comprInfo.compPageShift         = pCompr->compPageShift;
943 
944     if (memmgrIsKind_HAL(GPU_GET_MEMORY_MANAGER(pGpu), FB_IS_KIND_COMPRESSIBLE, pCompr->kind))
945     {
946         const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
947             kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
948 
949         if (pCompr->bPhysBasedComptags)
950         {
951             NvBool bCallingContextPlugin;
952 
953             NV_ASSERT(pMemorySystemConfig->bOneToOneComptagLineAllocation || pMemorySystemConfig->bUseRawModeComptaglineAllocation);
954 
955             NV_ASSERT_OR_RETURN_VOID(vgpuIsCallingContextPlugin(pGpu, &bCallingContextPlugin) == NV_OK);
956             if (IS_VIRTUAL_WITH_SRIOV(pGpu) || bCallingContextPlugin ||
957                 pMemorySystemConfig->bUseRawModeComptaglineAllocation)
958             {
959                 // In raw mode or when SR-IOV is enabled, HW handles compression tags
960                 comprInfo.compTagLineMin = 1;
961             }
962             else
963             {
964                 comprInfo.compTagLineMin = memmgrDetermineComptag_HAL(pGpu, GPU_GET_MEMORY_MANAGER(pGpu), physAddr);
965             }
966 
967             comprInfo.compPageIndexLo = surfOffset >> pCompr->compPageShift;
968             comprInfo.compPageIndexHi = (surfOffset + mmuFmtLevelPageSize(pLevel) - 1) >> pCompr->compPageShift;
969             comprInfo.compTagLineMultiplier = 1;
970         }
971         else
972         {
973             comprInfo.compPageIndexLo       = pCompr->compPageIndexLo;
974             comprInfo.compPageIndexHi       = pCompr->compPageIndexHi;
975             comprInfo.compTagLineMin        = pCompr->compTagLineMin;
976             comprInfo.compTagLineMultiplier = pCompr->compTagLineMultiplier;
977         }
978     }
979 
980     gmmuFmtInitPteCompTags(pFmt, pLevel, &comprInfo, surfOffset, pteIndex, 1, pEntries);
981 }
982 
983 NV_STATUS
kgmmuFaultBufferGetAddressSpace_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index,NvU32 * pFaultBufferAddrSpace,NvU32 * pFaultBufferAttr)984 kgmmuFaultBufferGetAddressSpace_IMPL
985 (
986     OBJGPU               *pGpu,
987     KernelGmmu           *pKernelGmmu,
988     NvU32                 index,
989     NvU32                *pFaultBufferAddrSpace,
990     NvU32                *pFaultBufferAttr
991 )
992 {
993     NvU32 faultBufferAddrSpace = ADDR_UNKNOWN;
994     NvU32 faultBufferAttr = 0;
995     NvBool bAllocInVidmem = NV_FALSE;
996 
997     bAllocInVidmem = gpuIsCCFeatureEnabled(pGpu);
998 
999     NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
1000 
1001     if (index == NON_REPLAYABLE_FAULT_BUFFER)
1002     {
1003         faultBufferAddrSpace = bAllocInVidmem ? ADDR_FBMEM : ADDR_SYSMEM;
1004         faultBufferAttr      = bAllocInVidmem ? NV_MEMORY_UNCACHED : NV_MEMORY_CACHED;
1005         memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC_3, _UVM_FAULT_BUFFER_NONREPLAYABLE, pGpu->instLocOverrides3),
1006                                "UVM non-replayable fault", &faultBufferAddrSpace, &faultBufferAttr);
1007     }
1008     else if (index == REPLAYABLE_FAULT_BUFFER)
1009     {
1010         faultBufferAddrSpace = bAllocInVidmem ? ADDR_FBMEM : ADDR_SYSMEM;
1011         faultBufferAttr      = bAllocInVidmem ? NV_MEMORY_UNCACHED : NV_MEMORY_CACHED;
1012         memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC_4, _UVM_FAULT_BUFFER_REPLAYABLE, pGpu->instLocOverrides4),
1013                                "UVM replayable fault", &faultBufferAddrSpace, &faultBufferAttr);
1014     }
1015     //
1016     // Whenever Hopper CC is enabled, HW requires both replayable and non-replayable
1017     // fault buffers to be in CPR vidmem. It would be illegal to allocate the buffers
1018     // in any other aperture
1019     //
1020     if (bAllocInVidmem && (faultBufferAddrSpace == ADDR_SYSMEM))
1021     {
1022         NV_PRINTF(LEVEL_ERROR, "Fault buffers must be in CPR vidmem when HCC is enabled\n");
1023         NV_ASSERT(0);
1024         return NV_ERR_INVALID_ARGUMENT;
1025     }
1026 
1027     if (pFaultBufferAddrSpace != NULL)
1028     {
1029         *pFaultBufferAddrSpace = faultBufferAddrSpace;
1030     }
1031 
1032     if (pFaultBufferAttr != NULL)
1033     {
1034         *pFaultBufferAttr = faultBufferAttr;
1035     }
1036 
1037     return NV_OK;
1038 }
1039 
1040 NV_STATUS
kgmmuFaultBufferCreateMemDesc_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index,NvU32 faultBufferSize,NvU64 memDescFlags,MEMORY_DESCRIPTOR ** ppMemDesc)1041 kgmmuFaultBufferCreateMemDesc_IMPL
1042 (
1043     OBJGPU               *pGpu,
1044     KernelGmmu           *pKernelGmmu,
1045     NvU32                 index,
1046     NvU32                 faultBufferSize,
1047     NvU64                 memDescFlags,
1048     MEMORY_DESCRIPTOR   **ppMemDesc
1049 )
1050 {
1051     NV_STATUS status;
1052     MEMORY_DESCRIPTOR *pMemDesc = NULL;
1053     NvU32 faultBufferAddrSpace = ADDR_UNKNOWN;
1054     NvU32 faultBufferAttr = 0;
1055     NvBool isContiguous = NV_FALSE;
1056 
1057     NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
1058 
1059     status = kgmmuFaultBufferGetAddressSpace(pGpu, pKernelGmmu, index,
1060                                              &faultBufferAddrSpace, &faultBufferAttr);
1061     if (status != NV_OK)
1062     {
1063         return status;
1064     }
1065 
1066     if ((IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu))
1067         || gpuIsCCFeatureEnabled(pGpu)
1068        )
1069     {
1070         // Allocate contiguous fault buffers for SR-IOV Heavy
1071         // Fault buffers get allocated in CPR vidmem when Hopper CC is enabled
1072         // We're almost assured to get contiguous allocations in vidmem
1073         isContiguous = NV_TRUE;
1074     }
1075 
1076     status = memdescCreate(&pMemDesc, pGpu,
1077                            RM_PAGE_ALIGN_UP(faultBufferSize), 0, isContiguous,
1078                            faultBufferAddrSpace, faultBufferAttr,
1079                            (memDescFlags | MEMDESC_FLAGS_LOST_ON_SUSPEND));
1080     if (status != NV_OK)
1081     {
1082         return status;
1083     }
1084 
1085     //
1086     // GPU doesn't read faultbuffer memory, so if faultBuffers are in sysmem, ensure that GpuCacheAttr
1087     // is set to UNCACHED as having a vol bit set in PTEs will ensure HUB uses L2Bypass mode and it will
1088     // save extra cycles to cache in L2 while MMU will write fault packets.
1089     //
1090     if (faultBufferAddrSpace == ADDR_SYSMEM &&
1091         pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_SYSMEM_FAULT_BUFFER_GPU_UNCACHED))
1092     {
1093         memdescSetGpuCacheAttrib(pMemDesc, NV_MEMORY_UNCACHED);
1094     }
1095 
1096     memdescSetPageSize(pMemDesc, AT_GPU, RM_PAGE_SIZE);
1097 
1098     *ppMemDesc = pMemDesc;
1099 
1100     return NV_OK;
1101 }
1102 
1103 NV_STATUS
kgmmuFaultBufferUnregister_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index)1104 kgmmuFaultBufferUnregister_IMPL
1105 (
1106     OBJGPU               *pGpu,
1107     KernelGmmu           *pKernelGmmu,
1108     NvU32                 index
1109 )
1110 {
1111     struct HW_FAULT_BUFFER *pFaultBuffer;
1112     MEMORY_DESCRIPTOR      *pMemDesc;
1113 
1114     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[index];
1115     pMemDesc = pFaultBuffer->pFaultBufferMemDesc;
1116 
1117     pFaultBuffer->faultBufferSize = 0;
1118     pFaultBuffer->pFaultBufferMemDesc = NULL;
1119 
1120     memdescDestroy(pMemDesc);
1121 
1122     return NV_OK;
1123 }
1124 
1125 NV_STATUS
kgmmuFaultBufferAlloc_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index,NvU32 faultBufferSize)1126 kgmmuFaultBufferAlloc_IMPL
1127 (
1128     OBJGPU         *pGpu,
1129     KernelGmmu     *pKernelGmmu,
1130     NvU32           index,
1131     NvU32           faultBufferSize
1132 )
1133 {
1134     NV_STATUS status;
1135     MEMORY_DESCRIPTOR *pMemDesc = NULL;
1136     struct HW_FAULT_BUFFER *pFaultBuffer;
1137     const char *name = (index == REPLAYABLE_FAULT_BUFFER ? NV_RM_SURF_NAME_REPLAYABLE_FAULT_BUFFER : NV_RM_SURF_NAME_NONREPLAYABLE_FAULT_BUFFER);
1138 
1139     NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
1140 
1141     if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
1142         return NV_OK;
1143 
1144     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[index];
1145 
1146     status = kgmmuFaultBufferCreateMemDesc(pGpu, pKernelGmmu, index, faultBufferSize,
1147                                            MEMDESC_FLAGS_NONE, &pMemDesc);
1148     if (status != NV_OK)
1149     {
1150         return status;
1151     }
1152 
1153     memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_31,
1154                     pMemDesc);
1155     if (status != NV_OK)
1156     {
1157         memdescDestroy(pMemDesc);
1158         return status;
1159     }
1160 
1161     memdescSetName(pGpu, pMemDesc, name, NULL);
1162 
1163     pFaultBuffer->faultBufferSize = faultBufferSize;
1164     pFaultBuffer->pFaultBufferMemDesc = pMemDesc;
1165 
1166     return status;
1167 }
1168 
1169 NV_STATUS
kgmmuFaultBufferFree_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index)1170 kgmmuFaultBufferFree_IMPL
1171 (
1172     OBJGPU               *pGpu,
1173     KernelGmmu           *pKernelGmmu,
1174     NvU32                 index
1175 )
1176 {
1177     struct HW_FAULT_BUFFER *pFaultBuffer;
1178 
1179     NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
1180 
1181     if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
1182         return NV_OK;
1183 
1184     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[index];
1185 
1186     memdescFree(pFaultBuffer->pFaultBufferMemDesc);
1187 
1188     kgmmuFaultBufferUnregister(pGpu, pKernelGmmu, index);
1189 
1190     return NV_OK;
1191 }
1192 
1193 NV_STATUS
kgmmuFaultBufferReplayableAllocate_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvHandle hClient,NvHandle hObject)1194 kgmmuFaultBufferReplayableAllocate_IMPL
1195 (
1196     OBJGPU               *pGpu,
1197     KernelGmmu           *pKernelGmmu,
1198     NvHandle              hClient,
1199     NvHandle              hObject
1200 )
1201 {
1202     NV_STATUS               status;
1203     struct HW_FAULT_BUFFER *pFaultBuffer;
1204     NvU32                   faultBufferSize;
1205     NvU32                   numBufferPages;
1206     const NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo = kgmmuGetStaticInfo(pGpu, pKernelGmmu);
1207 
1208     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
1209         pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
1210     {
1211         return NV_OK;
1212     }
1213 
1214     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[REPLAYABLE_FAULT_BUFFER];
1215     if (pFaultBuffer->pFaultBufferMemDesc != NULL)
1216     {
1217         return NV_ERR_NOT_SUPPORTED;
1218     }
1219 
1220     faultBufferSize = pStaticInfo->replayableFaultBufferSize;
1221 
1222     status = kgmmuFaultBufferAlloc(pGpu, pKernelGmmu,
1223                                    REPLAYABLE_FAULT_BUFFER,
1224                                    faultBufferSize);
1225     if (status != NV_OK)
1226     {
1227         return status;
1228     }
1229 
1230     if (IS_GSP_CLIENT(pGpu))
1231     {
1232         RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
1233         NV2080_CTRL_INTERNAL_GMMU_REGISTER_FAULT_BUFFER_PARAMS *pParams;
1234 
1235         pParams = portMemAllocNonPaged(sizeof(*pParams));
1236         if (pParams == NULL)
1237         {
1238             kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1239             return NV_ERR_NO_MEMORY;
1240         }
1241         portMemSet(pParams, 0, sizeof(*pParams));
1242 
1243         numBufferPages = RM_PAGE_ALIGN_UP(faultBufferSize) / RM_PAGE_SIZE;
1244         if (numBufferPages > NV_ARRAY_ELEMENTS(pParams->faultBufferPteArray))
1245         {
1246             portMemFree(pParams);
1247             kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1248             return NV_ERR_BUFFER_TOO_SMALL;
1249         }
1250 
1251         memdescGetPhysAddrs(pFaultBuffer->pFaultBufferMemDesc,
1252                             AT_GPU, 0, RM_PAGE_SIZE,
1253                             numBufferPages, pParams->faultBufferPteArray);
1254 
1255         pParams->hClient            = hClient;
1256         pParams->hObject            = hObject;
1257         pParams->faultBufferSize    = faultBufferSize;
1258 
1259         status = pRmApi->Control(pRmApi,
1260                                  pGpu->hInternalClient,
1261                                  pGpu->hInternalSubdevice,
1262                                  NV2080_CTRL_CMD_INTERNAL_GMMU_REGISTER_FAULT_BUFFER,
1263                                  pParams, sizeof(*pParams));
1264 
1265         portMemFree(pParams);
1266         if (status != NV_OK)
1267         {
1268             kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1269             return status;
1270         }
1271     }
1272     else
1273     {
1274         status = kgmmuFaultBufferLoad_HAL(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER, GPU_GFID_PF);
1275 
1276         if (status != NV_OK)
1277         {
1278             kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1279             return status;
1280         }
1281 
1282         // for non-gsp builds, set the pdb property here.
1283         pKernelGmmu->setProperty(pKernelGmmu,
1284                                  PDB_PROP_KGMMU_REPLAYABLE_FAULT_BUFFER_IN_USE,
1285                                  NV_TRUE);
1286     }
1287 
1288     pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferClient = hClient;
1289     pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferObject = hObject;
1290     pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].faultBufferGenerationCounter = 0;
1291 
1292     return NV_OK;
1293 }
1294 
1295 NV_STATUS
kgmmuFaultBufferReplayableDestroy_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)1296 kgmmuFaultBufferReplayableDestroy_IMPL
1297 (
1298     OBJGPU      *pGpu,
1299     KernelGmmu  *pKernelGmmu
1300 )
1301 {
1302     NV_STATUS               status = NV_OK;
1303     struct HW_FAULT_BUFFER *pFaultBuffer;
1304 
1305     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
1306         pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
1307     {
1308         return NV_OK;
1309     }
1310 
1311     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[REPLAYABLE_FAULT_BUFFER];
1312     if (pFaultBuffer->pFaultBufferMemDesc == NULL)
1313     {
1314         return NV_OK;
1315     }
1316 
1317     pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferClient = 0;
1318     pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferObject = 0;
1319 
1320     if (IS_GSP_CLIENT(pGpu))
1321     {
1322         RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
1323         status = pRmApi->Control(pRmApi,
1324                                  pGpu->hInternalClient,
1325                                  pGpu->hInternalSubdevice,
1326                                  NV2080_CTRL_CMD_INTERNAL_GMMU_UNREGISTER_FAULT_BUFFER,
1327                                  NULL, 0);
1328         if (status != NV_OK)
1329         {
1330             NV_PRINTF(LEVEL_ERROR,
1331                       "Unregistering Replayable Fault buffer failed (status=0x%08x), proceeding...\n",
1332                       status);
1333         }
1334     }
1335     else
1336     {
1337         status = kgmmuFaultBufferUnload_HAL(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER, GPU_GFID_PF);
1338         if (status != NV_OK)
1339         {
1340             NV_PRINTF(LEVEL_ERROR,
1341                       "Unloading Replayable Fault buffer failed (status=0x%08x), proceeding...\n",
1342                       status);
1343         }
1344         else
1345         {
1346             // for non-gsp builds, reset the pdb property here.
1347             pKernelGmmu->setProperty(pKernelGmmu,
1348                                      PDB_PROP_KGMMU_REPLAYABLE_FAULT_BUFFER_IN_USE,
1349                                      NV_FALSE);
1350         }
1351     }
1352 
1353     if (RMCFG_FEATURE_PLATFORM_GSP)
1354     {
1355         status = kgmmuFaultBufferUnregister(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1356     }
1357     else
1358     {
1359         status = kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1360     }
1361 
1362     if (status != NV_OK)
1363     {
1364         NV_PRINTF(LEVEL_ERROR,
1365                   "Destroying Replayable Fault buffer failed (status=0x%08x), proceeding...\n",
1366                   status);
1367     }
1368 
1369     return NV_OK;
1370 }
1371 
1372 /*!
1373  * @brief: Encodes peer addresses to support NVSwitch systems.
1374  *
1375  * This function prepends the fabricBaseAddress to a physical address in order
1376  * to generate a unique peer address from the global fabric address space.
1377  *
1378  * @param[in] pAddresses        : Array of physical addresses to be encoded.
1379  * @param[in] fabricBaseAddress : Unique fabric base address.
1380  * @param[in] count             : Count if physical addresses.
1381  */
1382 static void
_kgmmuEncodePeerAddrs(NvU64 * pAddresses,NvU64 fabricBaseAddress,NvU64 count)1383 _kgmmuEncodePeerAddrs
1384 (
1385     NvU64              *pAddresses,
1386     NvU64               fabricBaseAddress,
1387     NvU64               count
1388 )
1389 {
1390     NvU64 i;
1391 
1392     //
1393     // If there is no fabric address, it should be a NOP. Note, this acts as an
1394     // early complete path for other PEER addressing.
1395     //
1396     if (fabricBaseAddress == NVLINK_INVALID_FABRIC_ADDR)
1397     {
1398         return;
1399     }
1400 
1401     for (i = 0; i < count; i++)
1402     {
1403         pAddresses[i] = fabricBaseAddress + pAddresses[i];
1404     }
1405 }
1406 
1407 void
kgmmuEncodePhysAddrs_IMPL(KernelGmmu * pKernelGmmu,const GMMU_APERTURE aperture,NvU64 * pAddresses,NvU64 fabricBaseAddress,NvU64 count)1408 kgmmuEncodePhysAddrs_IMPL
1409 (
1410     KernelGmmu         *pKernelGmmu,
1411     const GMMU_APERTURE aperture,
1412     NvU64              *pAddresses,
1413     NvU64               fabricBaseAddress,
1414     NvU64               count
1415 )
1416 {
1417     NV_ASSERT(aperture != GMMU_APERTURE_INVALID);
1418 
1419     if (aperture == GMMU_APERTURE_SYS_COH ||
1420         aperture == GMMU_APERTURE_SYS_NONCOH)
1421     {
1422         kgmmuEncodeSysmemAddrs_HAL(pKernelGmmu, pAddresses, count);
1423     }
1424     else if (aperture == GMMU_APERTURE_PEER)
1425     {
1426         _kgmmuEncodePeerAddrs(pAddresses, fabricBaseAddress, count);
1427     }
1428     else
1429     {
1430         return;
1431     }
1432 }
1433 
1434 NvU64
kgmmuEncodePhysAddr_IMPL(KernelGmmu * pKernelGmmu,const GMMU_APERTURE aperture,NvU64 physAddr,NvU64 fabricBaseAddress)1435 kgmmuEncodePhysAddr_IMPL
1436 (
1437     KernelGmmu         *pKernelGmmu,
1438     const GMMU_APERTURE aperture,
1439     NvU64               physAddr,
1440     NvU64               fabricBaseAddress
1441 )
1442 {
1443     kgmmuEncodePhysAddrs(pKernelGmmu, aperture, &physAddr, fabricBaseAddress, 1);
1444     return physAddr;
1445 }
1446 
1447 static void
_kgmmuClientShadowBufferQueueCopyData(NvLength msgSize,NvLength opIdx,QueueContext * pCtx,void * pData,NvLength count,NvBool bCopyIn)1448 _kgmmuClientShadowBufferQueueCopyData
1449 (
1450     NvLength      msgSize,
1451     NvLength      opIdx,
1452     QueueContext *pCtx,
1453     void         *pData,
1454     NvLength      count,
1455     NvBool        bCopyIn
1456 )
1457 {
1458     NvLength size;
1459     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer = pCtx->pData;
1460     NvU8 *pQueueData, *pClientData = pData;
1461     void *pDst, *pSrc;
1462 
1463     if (count == 0)
1464         return;
1465 
1466     size = count * msgSize;
1467     pQueueData = KERNEL_POINTER_FROM_NvP64(NvU8 *, pClientShadowFaultBuffer->pBufferAddress);
1468     pQueueData = pQueueData + (opIdx * msgSize);
1469 
1470     pDst = bCopyIn ? pQueueData : pClientData;
1471     pSrc = bCopyIn ? pClientData : pQueueData;
1472     portMemCopy(pDst, size, pSrc, size);
1473 }
1474 
1475 static NV_STATUS
_kgmmuClientShadowFaultBufferQueueAllocate(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,FAULT_BUFFER_TYPE index)1476 _kgmmuClientShadowFaultBufferQueueAllocate
1477 (
1478     OBJGPU           *pGpu,
1479     KernelGmmu       *pKernelGmmu,
1480     FAULT_BUFFER_TYPE index
1481 )
1482 {
1483     NV_STATUS status;
1484     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1485     MEMORY_DESCRIPTOR *pQueueMemDesc;
1486     NvU64 flags = MEMDESC_FLAGS_NONE;
1487 
1488     //
1489     // On systems with SEV enabled, the client shadow buffers should be allocated
1490     // in unprotected sysmem as GSP will be writing the fault packets to these
1491     // buffers. Since GSP will be encrypting the fault packets, we don't risk
1492     // leaking any information
1493     //
1494     flags |= MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY;
1495 
1496     //
1497     // Shadow fault buffers are not implemented using circular queues when
1498     // Hopper CC is enabled
1499     //
1500     if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu))
1501         return NV_OK;
1502 
1503     pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index];
1504 
1505     status = memdescCreate(&pQueueMemDesc, pGpu,
1506                            sizeof(GMMU_SHADOW_FAULT_BUF), RM_PAGE_SIZE,
1507                            NV_TRUE, ADDR_SYSMEM, NV_MEMORY_CACHED,
1508                            flags);
1509     if (status != NV_OK)
1510     {
1511         return status;
1512     }
1513 
1514     memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_32,
1515                     pQueueMemDesc);
1516     if (status != NV_OK)
1517     {
1518         memdescDestroy(pQueueMemDesc);
1519         return status;
1520     }
1521 
1522     status = memdescMap(pQueueMemDesc, 0,
1523                         memdescGetSize(pQueueMemDesc),
1524                         NV_TRUE, NV_PROTECT_READ_WRITE,
1525                         &pClientShadowFaultBuffer->pQueueAddress,
1526                         &pClientShadowFaultBuffer->pQueuePriv);
1527     if (status != NV_OK)
1528     {
1529         memdescFree(pQueueMemDesc);
1530         memdescDestroy(pQueueMemDesc);
1531         return status;
1532     }
1533 
1534     pClientShadowFaultBuffer->queueContext.pCopyData = _kgmmuClientShadowBufferQueueCopyData;
1535     pClientShadowFaultBuffer->queueContext.pData = pClientShadowFaultBuffer;
1536     pClientShadowFaultBuffer->pQueueMemDesc = pQueueMemDesc;
1537 
1538     return NV_OK;
1539 }
1540 
1541 void
kgmmuClientShadowFaultBufferQueueDestroy_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvBool bFreeQueue,FAULT_BUFFER_TYPE index)1542 kgmmuClientShadowFaultBufferQueueDestroy_IMPL
1543 (
1544     OBJGPU           *pGpu,
1545     KernelGmmu       *pKernelGmmu,
1546     NvBool            bFreeQueue,
1547     FAULT_BUFFER_TYPE index
1548 )
1549 {
1550     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1551     MEMORY_DESCRIPTOR *pQueueMemDesc;
1552 
1553     //
1554     // Shadow fault buffers are not implemented using circular queues when
1555     // Hopper CC is enabled. So, there is nothing to free here
1556     //
1557     if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu))
1558         return;
1559 
1560     pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index];
1561 
1562     pQueueMemDesc = pClientShadowFaultBuffer->pQueueMemDesc;
1563 
1564     pClientShadowFaultBuffer->pQueueMemDesc = NULL;
1565     pClientShadowFaultBuffer->pQueueAddress = NvP64_NULL;
1566     pClientShadowFaultBuffer->pQueuePriv = NvP64_NULL;
1567 
1568     if (bFreeQueue)
1569     {
1570         memdescFree(pQueueMemDesc);
1571     }
1572     memdescDestroy(pQueueMemDesc);
1573 }
1574 
1575 static NV_STATUS
_kgmmuClientShadowFaultBufferPagesAllocate(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 shadowFaultBufferSize,NvU32 shadowFaultBufferMetadataSize,FAULT_BUFFER_TYPE index)1576 _kgmmuClientShadowFaultBufferPagesAllocate
1577 (
1578     OBJGPU           *pGpu,
1579     KernelGmmu       *pKernelGmmu,
1580     NvU32             shadowFaultBufferSize,
1581     NvU32             shadowFaultBufferMetadataSize,
1582     FAULT_BUFFER_TYPE index
1583 )
1584 {
1585     NV_STATUS status;
1586     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1587     MEMORY_DESCRIPTOR *pMemDesc;
1588     NvU64 flags = MEMDESC_FLAGS_NONE;
1589     NvU32 shadowFaultBufferSizeTotal;
1590 
1591     //
1592     // On systems with SEV enabled, the client shadow buffers should be allocated
1593     // in unprotected sysmem as GSP will be writing the fault packets to these
1594     // buffers. Since GSP will be encrypting the fault packets, we don't risk
1595     // leaking any information
1596     //
1597     flags |= MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY;
1598 
1599     pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index];
1600 
1601     shadowFaultBufferSizeTotal = RM_PAGE_ALIGN_UP(shadowFaultBufferSize) + RM_PAGE_ALIGN_UP(shadowFaultBufferMetadataSize);
1602 
1603     status = memdescCreate(&pMemDesc, pGpu,
1604                            shadowFaultBufferSizeTotal, RM_PAGE_SIZE,
1605                            NV_FALSE, ADDR_SYSMEM, NV_MEMORY_CACHED,
1606                            flags);
1607     if (status != NV_OK)
1608     {
1609         return status;
1610     }
1611 
1612     memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_33,
1613                     pMemDesc);
1614     if (status != NV_OK)
1615     {
1616         memdescDestroy(pMemDesc);
1617         return status;
1618     }
1619 
1620     status = memdescMap(pMemDesc, 0,
1621                         memdescGetSize(pMemDesc),
1622                         NV_TRUE, NV_PROTECT_READ_WRITE,
1623                         &pClientShadowFaultBuffer->pBufferAddress,
1624                         &pClientShadowFaultBuffer->pBufferPriv);
1625     if (status != NV_OK)
1626     {
1627         memdescFree(pMemDesc);
1628         memdescDestroy(pMemDesc);
1629         return status;
1630     }
1631 
1632     pClientShadowFaultBuffer->pFaultBufferMetadataAddress =
1633                              ((NvP64)(((NvU64) pClientShadowFaultBuffer->pBufferAddress) +
1634                               RM_PAGE_ALIGN_UP(shadowFaultBufferSize)));
1635     pClientShadowFaultBuffer->pBufferMemDesc = pMemDesc;
1636 
1637     return NV_OK;
1638 }
1639 
1640 void
kgmmuClientShadowFaultBufferPagesDestroy_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvBool bFreePages,FAULT_BUFFER_TYPE index)1641 kgmmuClientShadowFaultBufferPagesDestroy_IMPL
1642 (
1643     OBJGPU           *pGpu,
1644     KernelGmmu       *pKernelGmmu,
1645     NvBool            bFreePages,
1646     FAULT_BUFFER_TYPE index
1647 )
1648 {
1649     MEMORY_DESCRIPTOR *pMemDesc;
1650     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1651     GMMU_FAULT_BUFFER_PAGE *pBufferPage;
1652     NvU32 i;
1653 
1654     pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index];
1655     pMemDesc = pClientShadowFaultBuffer->pBufferMemDesc;
1656 
1657     if (bFreePages)
1658     {
1659         memdescUnmap(pMemDesc,
1660                      NV_TRUE, osGetCurrentProcess(),
1661                      pClientShadowFaultBuffer->pBufferAddress,
1662                      pClientShadowFaultBuffer->pBufferPriv);
1663 
1664         memdescFree(pMemDesc);
1665     }
1666     else
1667     {
1668         for (i = 0; i < pClientShadowFaultBuffer->numBufferPages; i++)
1669         {
1670             pBufferPage = &pClientShadowFaultBuffer->pBufferPages[i];
1671 
1672             memdescUnmap(pMemDesc, NV_TRUE, osGetCurrentProcess(),
1673                          pBufferPage->pAddress, pBufferPage->pPriv);
1674         }
1675         portMemFree(pClientShadowFaultBuffer->pBufferPages);
1676     }
1677     memdescDestroy(pMemDesc);
1678 }
1679 
1680 NV_STATUS
kgmmuClientShadowFaultBufferRegister_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,FAULT_BUFFER_TYPE index)1681 kgmmuClientShadowFaultBufferRegister_IMPL
1682 (
1683     OBJGPU           *pGpu,
1684     KernelGmmu       *pKernelGmmu,
1685     FAULT_BUFFER_TYPE index
1686 )
1687 {
1688     NV_STATUS status = NV_OK;
1689     struct GMMU_FAULT_BUFFER *pFaultBuffer;
1690     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1691     GMMU_SHADOW_FAULT_BUF *pQueue;
1692     MEMORY_DESCRIPTOR *pBufferMemDesc;
1693     RmPhysAddr shadowFaultBufferQueuePhysAddr;
1694     NvU32 queueCapacity, numBufferPages;
1695     NvU32 faultBufferSize;
1696     NvU32 shadowFaultBufferMetadataSize;
1697     const NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo = kgmmuGetStaticInfo(pGpu, pKernelGmmu);
1698     NvBool bQueueAllocated = NV_FALSE;
1699 
1700     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF];
1701     pClientShadowFaultBuffer = &pFaultBuffer->clientShadowFaultBuffer[index];
1702 
1703     if (index == NON_REPLAYABLE_FAULT_BUFFER)
1704     {
1705         faultBufferSize = pStaticInfo->nonReplayableFaultBufferSize;
1706         shadowFaultBufferMetadataSize = pStaticInfo->nonReplayableShadowFaultBufferMetadataSize;
1707     }
1708     else if (index == REPLAYABLE_FAULT_BUFFER)
1709     {
1710         faultBufferSize = pStaticInfo->replayableFaultBufferSize;
1711         shadowFaultBufferMetadataSize = pStaticInfo->replayableShadowFaultBufferMetadataSize;
1712     }
1713     else
1714     {
1715         NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
1716     }
1717 
1718     //
1719     // We don't use circular queues for shadow fault buffers when Hopper
1720     // CC is enabled
1721     //
1722     if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
1723     {
1724         pQueue = KERNEL_POINTER_FROM_NvP64(GMMU_SHADOW_FAULT_BUF *,
1725                                            pClientShadowFaultBuffer->pQueueAddress);
1726         queueCapacity = faultBufferSize / NVC369_BUF_SIZE;
1727 
1728         status = queueInitNonManaged(pQueue, queueCapacity);
1729         if (status != NV_OK)
1730         {
1731             return status;
1732         }
1733         bQueueAllocated = NV_TRUE;
1734     }
1735 
1736     if (!IS_GSP_CLIENT(pGpu))
1737     {
1738         portSyncSpinlockAcquire(pFaultBuffer->pShadowFaultBufLock);
1739 
1740         if (pFaultBuffer->pClientShadowFaultBuffer[index] == NULL)
1741         {
1742             pFaultBuffer->pClientShadowFaultBuffer[index] = pClientShadowFaultBuffer;
1743         }
1744         else
1745         {
1746             status = NV_ERR_NOT_SUPPORTED;
1747         }
1748 
1749         portSyncSpinlockRelease(pFaultBuffer->pShadowFaultBufLock);
1750 
1751         if (status != NV_OK)
1752         {
1753             if (bQueueAllocated)
1754                 queueDestroy(pQueue);
1755             return status;
1756         }
1757     }
1758     else
1759     {
1760         RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
1761         NV2080_CTRL_INTERNAL_GMMU_REGISTER_CLIENT_SHADOW_FAULT_BUFFER_PARAMS *pParams;
1762 
1763         pParams = portMemAllocNonPaged(sizeof(*pParams));
1764         if (pParams == NULL)
1765         {
1766             if (bQueueAllocated)
1767                 queueDestroy(pQueue);
1768             return NV_ERR_NO_MEMORY;
1769         }
1770         portMemSet(pParams, 0, sizeof(*pParams));
1771 
1772         pBufferMemDesc = pClientShadowFaultBuffer->pBufferMemDesc;
1773         numBufferPages = memdescGetSize(pBufferMemDesc) >> RM_PAGE_SHIFT;
1774         if (numBufferPages > NV_ARRAY_ELEMENTS(pParams->shadowFaultBufferPteArray))
1775         {
1776             portMemFree(pParams);
1777             if (bQueueAllocated)
1778                 queueDestroy(pQueue);
1779             return NV_ERR_BUFFER_TOO_SMALL;
1780         }
1781 
1782         memdescGetPhysAddrs(pBufferMemDesc,
1783                             AT_GPU,
1784                             0, RM_PAGE_SIZE,
1785                             numBufferPages, pParams->shadowFaultBufferPteArray);
1786 
1787         if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
1788         {
1789             shadowFaultBufferQueuePhysAddr = memdescGetPhysAddr(pClientShadowFaultBuffer->pQueueMemDesc,
1790                                                                 AT_GPU, 0);
1791             pParams->shadowFaultBufferQueuePhysAddr = shadowFaultBufferQueuePhysAddr;
1792         }
1793         pParams->shadowFaultBufferSize         = faultBufferSize;
1794         pParams->shadowFaultBufferMetadataSize = shadowFaultBufferMetadataSize;
1795         pParams->shadowFaultBufferType         = (index == NON_REPLAYABLE_FAULT_BUFFER) ?
1796                                                  NV2080_CTRL_FAULT_BUFFER_NON_REPLAYABLE :
1797                                                  NV2080_CTRL_FAULT_BUFFER_REPLAYABLE;
1798 
1799         if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu) && index == REPLAYABLE_FAULT_BUFFER)
1800         {
1801             pParams->faultBufferSharedMemoryPhysAddr = memdescGetPhysAddr(pClientShadowFaultBuffer->pFaultBufferSharedMemDesc,
1802                                                                           AT_GPU, 0);
1803         }
1804 
1805         status = pRmApi->Control(pRmApi,
1806                                  pGpu->hInternalClient,
1807                                  pGpu->hInternalSubdevice,
1808                                  NV2080_CTRL_CMD_INTERNAL_GMMU_REGISTER_CLIENT_SHADOW_FAULT_BUFFER,
1809                                  pParams, sizeof(*pParams));
1810 
1811         portMemFree(pParams);
1812         if (status != NV_OK)
1813         {
1814             if (bQueueAllocated)
1815                 queueDestroy(pQueue);
1816             return status;
1817         }
1818 
1819         pFaultBuffer->pClientShadowFaultBuffer[index] = pClientShadowFaultBuffer;
1820     }
1821 
1822     return NV_OK;
1823 }
1824 
1825 void
kgmmuClientShadowFaultBufferUnregister_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,FAULT_BUFFER_TYPE index)1826 kgmmuClientShadowFaultBufferUnregister_IMPL
1827 (
1828     OBJGPU           *pGpu,
1829     KernelGmmu       *pKernelGmmu,
1830     FAULT_BUFFER_TYPE index
1831 )
1832 {
1833     NV_STATUS status = NV_OK;
1834     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1835     GMMU_SHADOW_FAULT_BUF *pQueue;
1836     struct GMMU_FAULT_BUFFER *pFaultBuffer;
1837 
1838     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF];
1839 
1840     if (!IS_GSP_CLIENT(pGpu))
1841     {
1842         portSyncSpinlockAcquire(pFaultBuffer->pShadowFaultBufLock);
1843 
1844         pFaultBuffer->pClientShadowFaultBuffer[index] = NULL;
1845 
1846         portSyncSpinlockRelease(pFaultBuffer->pShadowFaultBufLock);
1847     }
1848     else
1849     {
1850         RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
1851         NV2080_CTRL_INTERNAL_GMMU_UNREGISTER_CLIENT_SHADOW_FAULT_BUFFER_PARAMS params;
1852 
1853         portMemSet(&params, 0, sizeof(params));
1854 
1855         params.shadowFaultBufferType = (index == NON_REPLAYABLE_FAULT_BUFFER) ?
1856                                        NV2080_CTRL_FAULT_BUFFER_NON_REPLAYABLE :
1857                                        NV2080_CTRL_FAULT_BUFFER_REPLAYABLE;
1858         status = pRmApi->Control(pRmApi,
1859                                  pGpu->hInternalClient,
1860                                  pGpu->hInternalSubdevice,
1861                                  NV2080_CTRL_CMD_INTERNAL_GMMU_UNREGISTER_CLIENT_SHADOW_FAULT_BUFFER,
1862                                  &params, sizeof(params));
1863         if (status != NV_OK)
1864         {
1865             NV_PRINTF(LEVEL_ERROR,
1866                       "Unregistering %s fault buffer failed (status=0x%08x), proceeding...\n",
1867                       (index == NON_REPLAYABLE_FAULT_BUFFER) ? "non-replayable" : "replayable",
1868                       status);
1869         }
1870 
1871         pFaultBuffer->pClientShadowFaultBuffer[index] = NULL;
1872     }
1873 
1874     if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
1875     {
1876         pClientShadowFaultBuffer = &pFaultBuffer->clientShadowFaultBuffer[index];
1877         pQueue = KERNEL_POINTER_FROM_NvP64(GMMU_SHADOW_FAULT_BUF *,
1878                                            pClientShadowFaultBuffer->pQueueAddress);
1879         queueDestroy(pQueue);
1880     }
1881 }
1882 
1883 /*!
1884  * @brief Creates shadow fault buffer for client handling of replayable/non-replayable
1885  *        faults in the CPU-RM, and registers it in the GSP-RM.
1886  *
1887  * @param[in] pGpu
1888  * @param[in] pKernelGmmu
1889  * @param[in] index         Replayable or non-replayable fault buffer
1890  *
1891  * @returns
1892  */
1893 NV_STATUS
kgmmuClientShadowFaultBufferAllocate_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,FAULT_BUFFER_TYPE index)1894 kgmmuClientShadowFaultBufferAllocate_IMPL
1895 (
1896     OBJGPU            *pGpu,
1897     KernelGmmu        *pKernelGmmu,
1898     FAULT_BUFFER_TYPE  index
1899 )
1900 {
1901     NV_STATUS   status;
1902     const NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo = kgmmuGetStaticInfo(pGpu, pKernelGmmu);
1903     NvU32 faultBufferSize;
1904     NvU32 shadowFaultBufferMetadataSize;
1905 
1906     ct_assert((RM_PAGE_SIZE % sizeof(struct GMMU_FAULT_PACKET)) == 0);
1907 
1908     NV_ASSERT_OR_RETURN(!pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED), NV_ERR_INVALID_STATE);
1909 
1910     NV_ASSERT_OR_RETURN(pStaticInfo->nonReplayableFaultBufferSize != 0, NV_ERR_INVALID_STATE);
1911 
1912     if (index == NON_REPLAYABLE_FAULT_BUFFER)
1913     {
1914         faultBufferSize = pStaticInfo->nonReplayableFaultBufferSize;
1915         shadowFaultBufferMetadataSize = pStaticInfo->nonReplayableShadowFaultBufferMetadataSize;
1916     }
1917     else if (index == REPLAYABLE_FAULT_BUFFER)
1918     {
1919         faultBufferSize = pStaticInfo->replayableFaultBufferSize;
1920         shadowFaultBufferMetadataSize = pStaticInfo->replayableShadowFaultBufferMetadataSize;
1921     }
1922     else
1923     {
1924         NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
1925     }
1926 
1927     status = _kgmmuClientShadowFaultBufferQueueAllocate(pGpu, pKernelGmmu, index);
1928     if (status != NV_OK)
1929     {
1930         return status;
1931     }
1932 
1933     status = _kgmmuClientShadowFaultBufferPagesAllocate(pGpu, pKernelGmmu,
1934                                                         faultBufferSize,
1935                                                         shadowFaultBufferMetadataSize,
1936                                                         index);
1937     if (status != NV_OK)
1938     {
1939         goto destroy_queue_and_exit;
1940     }
1941 
1942     status = kgmmuFaultBufferAllocSharedMemory_HAL(pGpu, pKernelGmmu, index);
1943     if (status != NV_OK)
1944     {
1945         goto destroy_pages_and_exit;
1946     }
1947 
1948     status = kgmmuClientShadowFaultBufferRegister(pGpu, pKernelGmmu,
1949                                                   index);
1950     if (status != NV_OK)
1951     {
1952         goto destroy_shared_memory_and_exit;
1953     }
1954 
1955     return NV_OK;
1956 
1957 destroy_shared_memory_and_exit:
1958     kgmmuFaultBufferFreeSharedMemory_HAL(pGpu, pKernelGmmu, index);
1959 destroy_pages_and_exit:
1960     kgmmuClientShadowFaultBufferPagesDestroy(pGpu, pKernelGmmu, NV_TRUE,
1961                                              index);
1962 destroy_queue_and_exit:
1963     kgmmuClientShadowFaultBufferQueueDestroy(pGpu, pKernelGmmu, NV_TRUE,
1964                                              index);
1965     return status;
1966 }
1967 
1968 /*!
1969  * @brief Unregister client shadow fault buffer in the GSP-RM or destroy
1970  *        it in the CPU-RM.
1971  *
1972  * @param[in] pGpu
1973  * @param[in] pKernelGmmu
1974  *
1975  * @returns
1976  */
1977 NV_STATUS
kgmmuClientShadowFaultBufferDestroy_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,FAULT_BUFFER_TYPE index)1978 kgmmuClientShadowFaultBufferDestroy_IMPL
1979 (
1980     OBJGPU           *pGpu,
1981     KernelGmmu       *pKernelGmmu,
1982     FAULT_BUFFER_TYPE index
1983 )
1984 {
1985     GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1986     NvBool bFreeMemory = !RMCFG_FEATURE_PLATFORM_GSP;
1987 
1988     pClientShadowFaultBuffer =
1989         pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].pClientShadowFaultBuffer[index];
1990 
1991     if (pClientShadowFaultBuffer != NvP64_NULL)
1992     {
1993         kgmmuClientShadowFaultBufferUnregister(pGpu, pKernelGmmu,
1994                                                index);
1995 
1996         kgmmuFaultBufferFreeSharedMemory_HAL(pGpu, pKernelGmmu, index);
1997 
1998         kgmmuClientShadowFaultBufferPagesDestroy(pGpu, pKernelGmmu, bFreeMemory,
1999                                                  index);
2000         kgmmuClientShadowFaultBufferQueueDestroy(pGpu, pKernelGmmu, bFreeMemory,
2001                                                  index);
2002     }
2003 
2004     return NV_OK;
2005 }
2006 
2007 /*!
2008  * Returns the minimum allocation size to align to big-page size in bytes
2009  *
2010  * @param[in]  pKernelGmmu
2011  *
2012  * @return NvU32
2013  */
2014 NvU64
kgmmuGetMinBigPageSize_IMPL(KernelGmmu * pKernelGmmu)2015 kgmmuGetMinBigPageSize_IMPL(KernelGmmu *pKernelGmmu)
2016 {
2017     //
2018     // Set the minimum size in the heap that we will round up to a big page instead
2019     // just 4KB. HW doesn't like 4KB pages in video memory, but SW wants to pack
2020     // physical memory sometimes.  Typically UMDs that really care about perf use
2021     // suballocation for larger RM allocations anyway.
2022     //
2023     // Promote allocates bigger than half the big page size.
2024     // (this is a policy change for Big page sizes/VASpace)
2025     //
2026     return RM_PAGE_SIZE_64K >> 1;
2027 }
2028 
2029 /*!
2030  * @brief Initializes the init block for an engine
2031  *
2032  * @param[in] pKernelGmmu
2033  * @param[in] pInstBlkDesc    Memory descriptor for the instance block of the engine
2034  * @param[in] pVAS            OBJVASPACE pointer of the engine
2035  * @param[in] subctxId        subctxId Value
2036  * @param[in] pInstBlkParams  Pointer to the structure storing the parameters passed by the caller
2037  *
2038  * @returns NV_STATUS
2039  */
2040 NV_STATUS
kgmmuInstBlkInit_IMPL(KernelGmmu * pKernelGmmu,MEMORY_DESCRIPTOR * pInstBlkDesc,OBJVASPACE * pVAS,NvU32 subctxId,INST_BLK_INIT_PARAMS * pInstBlkParams)2041 kgmmuInstBlkInit_IMPL
2042 (
2043     KernelGmmu           *pKernelGmmu,
2044     MEMORY_DESCRIPTOR    *pInstBlkDesc,
2045     OBJVASPACE           *pVAS,
2046     NvU32                 subctxId,
2047     INST_BLK_INIT_PARAMS *pInstBlkParams
2048 )
2049 {
2050     OBJGPU   *pGpu   = ENG_GET_GPU(pKernelGmmu);
2051     KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
2052     NvU8     *pInstBlk;      // CPU VA of instance block.
2053     NvU64     vaLimitData;
2054     NvU32     vaLimitOffset;
2055     NvU32     dirBaseHiOffset;
2056     NvU32     dirBaseHiData;
2057     NvU32     dirBaseLoOffset;
2058     NvU32     dirBaseLoData;
2059     NvU32     atsOffset = 0;
2060     NvU32     atsData = 0;
2061     NvU32     pasid;
2062     NvU32     magicValueOffset;
2063     NvU32     magicValueData;
2064     NV_STATUS status = NV_OK;
2065 
2066     NV_ASSERT(!gpumgrGetBcEnabledStatus(pGpu));
2067 
2068     // Get VA limit
2069     status = kgmmuInstBlkVaLimitGet_HAL(pKernelGmmu, pVAS, subctxId, pInstBlkParams, &vaLimitOffset, &vaLimitData);
2070     NV_ASSERT_OR_RETURN((status == NV_OK), status);
2071 
2072     // Get page dir base
2073     NV_ASSERT_OK_OR_RETURN(kgmmuInstBlkPageDirBaseGet_HAL(pGpu, pKernelGmmu,
2074         pVAS, pInstBlkParams, subctxId,
2075         &dirBaseLoOffset, &dirBaseLoData, &dirBaseHiOffset, &dirBaseHiData));
2076 
2077     //
2078     // Enable ATS in instance block only when both ATS is enabled in the
2079     // vaspace and a valid PASID is provisioned through
2080     // NV0080_CTRL_CMD_DMA_SET_PAGE_DIRECTORY.
2081     //
2082     if ((pVAS != NULL) && vaspaceIsAtsEnabled(pVAS))
2083     {
2084         if ((status = vaspaceGetPasid(pVAS, &pasid)) == NV_OK)
2085         {
2086             // Coherent link ATS parameters are only set on the new VMM path.
2087             status = kgmmuInstBlkAtsGet_HAL(pKernelGmmu, pVAS, subctxId,
2088                                             &atsOffset, &atsData);
2089             NV_ASSERT_OR_RETURN((status == NV_OK), status);
2090         }
2091         else
2092         {
2093             // Proceed with ATS disabled in instance block if PASID is not yet provisioned
2094             NV_ASSERT_OR_RETURN((status == NV_ERR_NOT_READY), status);
2095         }
2096     }
2097 
2098     status = kgmmuInstBlkMagicValueGet_HAL(pKernelGmmu, &magicValueOffset, &magicValueData);
2099 
2100     // Write the fields out
2101     pInstBlk = pInstBlkParams->pInstBlk;
2102 
2103     if (pInstBlk != NULL)
2104     {
2105         if (vaLimitOffset != 0)
2106         {
2107             // TO DO: FMODEL fails with MEM_WR64
2108             if (IS_SIMULATION(pGpu))
2109             {
2110                 MEM_WR32(pInstBlk + vaLimitOffset + 0, NvU64_LO32(vaLimitData));
2111                 MEM_WR32(pInstBlk + vaLimitOffset + 4, NvU64_HI32(vaLimitData));
2112             }
2113             else
2114             {
2115                 MEM_WR64(pInstBlk + vaLimitOffset, vaLimitData);
2116             }
2117         }
2118 
2119         MEM_WR32(pInstBlk + dirBaseHiOffset, dirBaseHiData);
2120         MEM_WR32(pInstBlk + dirBaseLoOffset, dirBaseLoData);
2121 
2122         if (atsOffset != 0)
2123             MEM_WR32(pInstBlk + atsOffset, atsData);
2124 
2125         if (status == NV_OK)
2126             MEM_WR32(pInstBlk + magicValueOffset, magicValueData);
2127     }
2128     else
2129     {
2130         MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2131 
2132         pInstBlk = memmgrMemDescBeginTransfer(pMemoryManager, pInstBlkDesc,
2133                                               TRANSFER_FLAGS_SHADOW_ALLOC);
2134         if (pInstBlk == NULL)
2135         {
2136             return NV_ERR_INSUFFICIENT_RESOURCES;
2137         }
2138 
2139         if (vaLimitOffset != 0)
2140         {
2141             // TO DO: FMODEL fails with MEM_WR64
2142             if (IS_SIMULATION(pGpu))
2143             {
2144                 MEM_WR32(pInstBlk + vaLimitOffset + 0, NvU64_LO32(vaLimitData));
2145                 MEM_WR32(pInstBlk + vaLimitOffset + 4, NvU64_HI32(vaLimitData));
2146             }
2147             else
2148             {
2149                 MEM_WR64(pInstBlk + vaLimitOffset, vaLimitData);
2150             }
2151         }
2152 
2153         MEM_WR32(pInstBlk + dirBaseHiOffset, dirBaseHiData);
2154         MEM_WR32(pInstBlk + dirBaseLoOffset, dirBaseLoData);
2155 
2156         if (atsOffset != 0)
2157             MEM_WR32(pInstBlk + atsOffset, atsData);
2158 
2159         if (status == NV_OK)
2160             MEM_WR32(pInstBlk + magicValueOffset, magicValueData);
2161 
2162         memmgrMemDescEndTransfer(pMemoryManager, pInstBlkDesc,
2163                                  TRANSFER_FLAGS_SHADOW_ALLOC);
2164     }
2165 
2166     if (!pInstBlkParams->bDeferFlush)
2167     {
2168         kbusFlush_HAL(pGpu, pKernelBus, kbusGetFlushAperture(pKernelBus, memdescGetAddressSpace(pInstBlkDesc)));
2169     }
2170 
2171     return NV_OK;
2172 }
2173 
2174 GMMU_APERTURE
kgmmuGetExternalAllocAperture_IMPL(NvU32 addressSpace)2175 kgmmuGetExternalAllocAperture_IMPL
2176 (
2177     NvU32 addressSpace
2178 )
2179 {
2180     switch (addressSpace)
2181     {
2182         case ADDR_FBMEM:
2183             return GMMU_APERTURE_VIDEO;
2184         case ADDR_FABRIC_V2:
2185         case ADDR_FABRIC_MC:
2186             return GMMU_APERTURE_PEER;
2187         case ADDR_SYSMEM:
2188         case ADDR_VIRTUAL:
2189             return GMMU_APERTURE_SYS_COH;
2190         default:
2191             NV_PRINTF(LEVEL_ERROR, "Unexpected addressSpace (%u) when mapping to GMMU_APERTURE.\n",
2192                       addressSpace);
2193             NV_ASSERT(0);
2194             return GMMU_APERTURE_SYS_COH;
2195     }
2196 }
2197 
2198 /*!
2199  * @brief
2200  *
2201  * @param pGpu
2202  * @param pKernelGmmu
2203  * @param bOwnedByRm
2204  */
2205 void
kgmmuAccessCntrChangeIntrOwnership_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvBool bOwnedByRm)2206 kgmmuAccessCntrChangeIntrOwnership_IMPL
2207 (
2208     OBJGPU     *pGpu,
2209     KernelGmmu *pKernelGmmu,
2210     NvBool      bOwnedByRm
2211 )
2212 {
2213     //
2214     // Disable the interrupt when RM loses the ownership and enable it back when
2215     // RM regains it. nvUvmInterfaceOwnAccessCntIntr() will rely on this behavior.
2216     //
2217     if (bOwnedByRm)
2218         pKernelGmmu->uvmSharedIntrRmOwnsMask |= RM_UVM_SHARED_INTR_MASK_HUB_ACCESS_COUNTER_NOTIFY;
2219     else
2220         pKernelGmmu->uvmSharedIntrRmOwnsMask &= ~RM_UVM_SHARED_INTR_MASK_HUB_ACCESS_COUNTER_NOTIFY;
2221 }
2222 
2223 /**
2224  * @brief Provides an opportunity to register some IntrService during intrStateInit.
2225  */
2226 void
kgmmuRegisterIntrService_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,IntrServiceRecord pRecords[MC_ENGINE_IDX_MAX])2227 kgmmuRegisterIntrService_IMPL
2228 (
2229     OBJGPU              *pGpu,
2230     KernelGmmu          *pKernelGmmu,
2231     IntrServiceRecord   pRecords[MC_ENGINE_IDX_MAX]
2232 )
2233 {
2234     NvU32 engineIdx;
2235     NvU16 *pEngineIdxList;
2236     NvU32 listSize;
2237 
2238     static NvU16 engineIdxList[] = {
2239         MC_ENGINE_IDX_REPLAYABLE_FAULT,
2240         MC_ENGINE_IDX_REPLAYABLE_FAULT_ERROR,
2241     };
2242 
2243     static NvU16 engineIdxListForCC[] = {
2244         MC_ENGINE_IDX_REPLAYABLE_FAULT_CPU,
2245         MC_ENGINE_IDX_NON_REPLAYABLE_FAULT_CPU,
2246     };
2247 
2248     if (IS_GSP_CLIENT(pGpu) && gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu))
2249     {
2250         pEngineIdxList = engineIdxListForCC;
2251         listSize = NV_ARRAY_ELEMENTS(engineIdxListForCC);
2252     }
2253     else
2254     {
2255         pEngineIdxList = engineIdxList;
2256         listSize = NV_ARRAY_ELEMENTS(engineIdxList);
2257     }
2258 
2259     for (NvU32 tableIdx = 0; tableIdx < listSize; tableIdx++)
2260     {
2261         engineIdx = (pEngineIdxList)[tableIdx];
2262         NV_ASSERT(pRecords[engineIdx].pInterruptService == NULL);
2263         pRecords[engineIdx].pInterruptService = staticCast(pKernelGmmu, IntrService);
2264     }
2265 
2266     if (!IS_GSP_CLIENT(pGpu))
2267     {
2268         engineIdx = MC_ENGINE_IDX_GMMU;
2269         NV_ASSERT(pRecords[engineIdx].pInterruptService == NULL);
2270         pRecords[engineIdx].pInterruptService = staticCast(pKernelGmmu, IntrService);
2271 
2272         NV_ASSERT(pRecords[engineIdx].pNotificationService == NULL);
2273         pRecords[engineIdx].bFifoWaiveNotify = NV_FALSE;
2274         pRecords[engineIdx].pNotificationService = staticCast(pKernelGmmu, IntrService);
2275 
2276         static NvU16 physicalEngineIdxList[] = {
2277             MC_ENGINE_IDX_NON_REPLAYABLE_FAULT,
2278             MC_ENGINE_IDX_NON_REPLAYABLE_FAULT_ERROR,
2279             MC_ENGINE_IDX_INFO_FAULT
2280         };
2281 
2282         for (NvU32 tableIdx = 0; tableIdx < NV_ARRAY_ELEMENTS(physicalEngineIdxList); tableIdx++)
2283         {
2284             engineIdx = physicalEngineIdxList[tableIdx];
2285             NV_ASSERT(pRecords[engineIdx].pInterruptService == NULL);
2286             pRecords[engineIdx].pInterruptService = staticCast(pKernelGmmu, IntrService);
2287         }
2288     }
2289 }
2290 
2291 /**
2292  * @brief Clears the stall interrupt leaf vector and return whether to call ServiceInterrupt.
2293  * @details Normally there's no need to override this function as its default is used by almost all handlers,
2294  *          but MC_ENGINE_IDX_NON_REPLAYABLE_FAULT is cleared in the top half.
2295  *
2296  * @returns NV_TRUE indicating that the interrupt should be handled.
2297  */
2298 NvBool
kgmmuClearInterrupt_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,IntrServiceClearInterruptArguments * pParams)2299 kgmmuClearInterrupt_IMPL
2300 (
2301     OBJGPU                             *pGpu,
2302     KernelGmmu                         *pKernelGmmu,
2303     IntrServiceClearInterruptArguments *pParams)
2304 {
2305     NV_ASSERT_OR_RETURN(pParams != NULL, 0);
2306     if (pParams->engineIdx == MC_ENGINE_IDX_NON_REPLAYABLE_FAULT)
2307     {
2308         // Skip clearing the interrupt; just return success.
2309         return NV_TRUE;
2310     }
2311     else
2312     {
2313         // Fallthrough to default handler, which will clear the interrupt.
2314         return intrservClearInterrupt_IMPL(pGpu, staticCast(pKernelGmmu, IntrService), pParams);
2315     }
2316 }
2317 
2318 /**
2319  * @brief Service stall interrupts.
2320  *
2321  * @returns Zero, or any implementation-chosen nonzero value. If the same nonzero value is returned enough
2322  *          times the interrupt is considered stuck.
2323  */
2324 NvU32
kgmmuServiceInterrupt_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,IntrServiceServiceInterruptArguments * pParams)2325 kgmmuServiceInterrupt_IMPL
2326 (
2327     OBJGPU      *pGpu,
2328     KernelGmmu  *pKernelGmmu,
2329     IntrServiceServiceInterruptArguments *pParams
2330 )
2331 {
2332     NV_STATUS status;
2333 
2334     NV_ASSERT_OR_RETURN(pParams != NULL, 0);
2335 
2336     switch (pParams->engineIdx)
2337     {
2338         case MC_ENGINE_IDX_GMMU:
2339         {
2340             return kgmmuService_HAL(pGpu, pKernelGmmu);
2341         }
2342         case MC_ENGINE_IDX_NON_REPLAYABLE_FAULT:
2343         {
2344 
2345             //
2346             // This interrupt was already cleared in the top half and "serviced"
2347             // in the top half since copy from HW fault buffer always happens
2348             // in the top half. This servicing is merely copying from the SW
2349             // fault buffer, so doesn't need interrupt clearing. Also, we will
2350             // only copy from the SW fault buffer if the fatalFaultIntrPending
2351             // cache tells us that there is something to copy. Else, we'll just
2352             // return early and rely on another interrupt to fire that will
2353             // eventually update this state. In the top half, we will
2354             // unconditionally write GET back, which will force HW to send us a
2355             // new pulse as long as GET != PUT and we'd be eventually guaranteed
2356             // to copy something into the SW fault buffer.
2357             //
2358             if (portAtomicCompareAndSwapS32(&pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].fatalFaultIntrPending, 0, 1))
2359             {
2360                 status = kgmmuServiceNonReplayableFault_HAL(pGpu, pKernelGmmu);
2361                 if (status != NV_OK)
2362                 {
2363                     NV_ASSERT_OK_FAILED(
2364                         "Failed to service non-replayable MMU fault error",
2365                         status);
2366                 }
2367             }
2368 
2369             break;
2370         }
2371         case MC_ENGINE_IDX_NON_REPLAYABLE_FAULT_ERROR:
2372         {
2373             status = kgmmuReportFaultBufferOverflow_HAL(pGpu, pKernelGmmu);
2374             if (status != NV_OK)
2375             {
2376                 NV_ASSERT_OK_FAILED(
2377                     "Failed to report non-replayable MMU fault buffer overflow error",
2378                     status);
2379             }
2380             break;
2381         }
2382         case MC_ENGINE_IDX_REPLAYABLE_FAULT:
2383         {
2384             NV_STATUS status = kgmmuServiceReplayableFault_HAL(pGpu, pKernelGmmu);
2385             if (status != NV_OK)
2386             {
2387                 NV_ASSERT_OK_FAILED("Failed to service replayable MMU fault error",
2388                     status);
2389             }
2390             break;
2391         }
2392         case MC_ENGINE_IDX_REPLAYABLE_FAULT_ERROR:
2393         {
2394             status = kgmmuReportFaultBufferOverflow_HAL(pGpu, pKernelGmmu);
2395             if (status != NV_OK)
2396             {
2397                 NV_ASSERT_OK_FAILED(
2398                     "Failed to report replayable MMU fault buffer overflow error",
2399                     status);
2400             }
2401             break;
2402         }
2403         case MC_ENGINE_IDX_NON_REPLAYABLE_FAULT_CPU:
2404         {
2405             //
2406             // This interrupt vector is used to enqueue the UVM top half so any outstanding Non-Replayable
2407             // faults can get processed by UVM. However, since the GSP notification mechanism is interrupt based
2408             // and the top half of the RM interrupt routine will always call into UVM's top half, it is safe to NOP here
2409             // knowing that UVM handling already gets invoked whenever the RM top half is executed.
2410             //
2411             status = 0;
2412             break;
2413         }
2414         case MC_ENGINE_IDX_REPLAYABLE_FAULT_CPU:
2415         {
2416             NV_PRINTF(LEVEL_ERROR, "Unexpected replayable interrupt routed to RM. Verify UVM took ownership.\n");
2417             status = NV_ERR_INVALID_STATE;
2418             break;
2419         }
2420         case MC_ENGINE_IDX_INFO_FAULT:
2421         {
2422             status = kgmmuServicePriFaults_HAL(pGpu, pKernelGmmu);
2423             if (status != NV_OK)
2424             {
2425                 NV_ASSERT_OK_FAILED("Failed to service PRI fault error", status);
2426             }
2427             break;
2428         }
2429         default:
2430         {
2431             NV_ASSERT_FAILED("Invalid engineIdx");
2432             break;
2433         }
2434     }
2435 
2436     return 0;
2437 }
2438 
2439 /*!
2440  * @brief Extract the PTE FIELDS from the PTE and
2441  * set the corresponding flags/fields in pParams.
2442  *
2443  * @param[in]  pKernelGmmu
2444  * @param[in]  pPte        Pointer to the PTE contents
2445  * @param[out] pPteInfo    Pointer to the PTE info structure
2446  * @param[in]  pFmt        NV0080_CTRL_DMA_PTE_INFO_PTE_BLOCK pointer to cmd params
2447  * @param[in]  pLevelFmt   Format of the level
2448  *
2449  *
2450  * @returns none
2451  */
2452 void
kgmmuExtractPteInfo_IMPL(KernelGmmu * pKernelGmmu,GMMU_ENTRY_VALUE * pPte,NV0080_CTRL_DMA_PTE_INFO_PTE_BLOCK * pPteInfo,const GMMU_FMT * pFmt,const MMU_FMT_LEVEL * pLevelFmt)2453 kgmmuExtractPteInfo_IMPL
2454 (
2455     KernelGmmu                          *pKernelGmmu,
2456     GMMU_ENTRY_VALUE                    *pPte,
2457     NV0080_CTRL_DMA_PTE_INFO_PTE_BLOCK  *pPteInfo,
2458     const GMMU_FMT                      *pFmt,
2459     const MMU_FMT_LEVEL                 *pLevelFmt
2460 )
2461 {
2462     OBJGPU             *pGpu = ENG_GET_GPU(pKernelGmmu);
2463     MemoryManager      *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2464     const GMMU_FMT_PTE *pFmtPte = pFmt->pPte;
2465     NvBool              bPteValid;
2466 
2467     bPteValid = nvFieldGetBool(&pFmtPte->fldValid, pPte->v8);
2468 
2469     pPteInfo->pteFlags = FLD_SET_DRF_NUM(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_VALID,
2470         bPteValid, pPteInfo->pteFlags);
2471 
2472     if (pFmtPte->version != GMMU_FMT_VERSION_3)
2473     {
2474         pPteInfo->pteFlags = FLD_SET_DRF_NUM(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_ENCRYPTED,
2475             nvFieldGetBool(&pFmtPte->fldEncrypted, pPte->v8), pPteInfo->pteFlags);
2476     }
2477 
2478     switch (gmmuFieldGetAperture(&pFmtPte->fldAperture, pPte->v8))
2479     {
2480         case GMMU_APERTURE_VIDEO:
2481             pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_APERTURE,
2482                 _VIDEO_MEMORY, pPteInfo->pteFlags);
2483             break;
2484         case GMMU_APERTURE_PEER:
2485             pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_APERTURE,
2486                 _PEER_MEMORY, pPteInfo->pteFlags);
2487             break;
2488         case GMMU_APERTURE_SYS_COH:
2489             pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_APERTURE,
2490                 _SYSTEM_COHERENT_MEMORY, pPteInfo->pteFlags);
2491             break;
2492         case GMMU_APERTURE_SYS_NONCOH:
2493             pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_APERTURE,
2494                 _SYSTEM_NON_COHERENT_MEMORY, pPteInfo->pteFlags);
2495             break;
2496         case GMMU_APERTURE_INVALID:
2497         default:
2498             NV_ASSERT(0);
2499             break;
2500     }
2501 
2502     if (pFmtPte->version == GMMU_FMT_VERSION_3)
2503     {
2504         KernelGmmu  *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
2505         NvU32        ptePcfHw;
2506         NvU32        ptePcfSw = 0;
2507 
2508         // In Version 3, parse the PCF bits and return those
2509         ptePcfHw = nvFieldGet32(&pFmtPte->fldPtePcf, pPte->v8);
2510         NV_ASSERT(kgmmuTranslatePtePcfFromHw_HAL(pKernelGmmu, ptePcfHw, bPteValid, &ptePcfSw) == NV_OK);
2511 
2512         // Valid 2MB PTEs follow the same format as 64K and 4K PTEs
2513         if (bPteValid)
2514         {
2515             if (!(ptePcfSw & (1 << SW_MMU_PCF_UNCACHED_IDX)))
2516             {
2517                 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2518                         _PARAMS_FLAGS_GPU_CACHED, _TRUE, pPteInfo->pteFlags);
2519             }
2520             if (ptePcfSw & (1 << SW_MMU_PCF_RO_IDX))
2521             {
2522                 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2523                         _PARAMS_FLAGS_READ_ONLY, _TRUE, pPteInfo->pteFlags);
2524             }
2525             if (ptePcfSw & (1 << SW_MMU_PCF_NOATOMIC_IDX))
2526             {
2527                 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2528                         _PARAMS_FLAGS_ATOMIC, _DISABLE, pPteInfo->pteFlags);
2529             }
2530             if (ptePcfSw & (1 << SW_MMU_PCF_REGULAR_IDX))
2531             {
2532                 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2533                         _PARAMS_FLAGS_PRIVILEGED, _FALSE, pPteInfo->pteFlags);
2534             }
2535             if (ptePcfSw & (1 << SW_MMU_PCF_ACE_IDX))
2536             {
2537                 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2538                         _PARAMS_FLAGS_ACCESS_COUNTING, _ENABLE, pPteInfo->pteFlags);
2539             }
2540         }
2541         else
2542         {
2543             if (pLevelFmt->numSubLevels == 0)
2544             {
2545                 if (ptePcfSw & (1 << SW_MMU_PCF_SPARSE_IDX))
2546                 {
2547                     pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2548                             _PARAMS_FLAGS_GPU_CACHED, _FALSE, pPteInfo->pteFlags);
2549                 }
2550                 else
2551                 {
2552                     pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2553                             _PARAMS_FLAGS_GPU_CACHED, _TRUE, pPteInfo->pteFlags);
2554                 }
2555             }
2556             else
2557             {
2558                 NvU32  pdePcfHw = 0;
2559                 NvU32  pdePcfSw = 0;
2560 
2561                 pdePcfHw = nvFieldGet32(&pFmt->pPde->fldPdePcf, pPte->v8);
2562                 NV_ASSERT(kgmmuTranslatePdePcfFromHw_HAL(pKernelGmmu, pdePcfHw, GMMU_APERTURE_INVALID, &pdePcfSw) == NV_OK);
2563                 if (pdePcfSw & (1 << SW_MMU_PCF_SPARSE_IDX))
2564                 {
2565                     pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2566                             _PARAMS_FLAGS_GPU_CACHED, _FALSE, pPteInfo->pteFlags);
2567                 }
2568                 else
2569                 {
2570                     pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2571                             _PARAMS_FLAGS_GPU_CACHED, _TRUE, pPteInfo->pteFlags);
2572                 }
2573 
2574             }
2575         }
2576     }
2577     else
2578     {
2579         pPteInfo->pteFlags = FLD_SET_DRF_NUM(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_GPU_CACHED,
2580             !nvFieldGetBool(&pFmtPte->fldVolatile, pPte->v8), pPteInfo->pteFlags);
2581 
2582         if (nvFieldIsValid32(&pFmtPte->fldReadDisable.desc) &&
2583             nvFieldIsValid32(&pFmtPte->fldWriteDisable.desc))
2584         {
2585             if (nvFieldGetBool(&pFmtPte->fldWriteDisable, pPte->v8))
2586             {
2587                 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2588                         _PARAMS_FLAGS_SHADER_ACCESS, _READ_ONLY, pPteInfo->pteFlags);
2589             }
2590             else if (nvFieldGetBool(&pFmtPte->fldReadDisable, pPte->v8))
2591             {
2592                 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2593                         _PARAMS_FLAGS_SHADER_ACCESS, _WRITE_ONLY, pPteInfo->pteFlags);
2594             }
2595             else
2596             {
2597                 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2598                         _PARAMS_FLAGS_SHADER_ACCESS, _READ_WRITE, pPteInfo->pteFlags);
2599             }
2600         }
2601         else
2602         {
2603             pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_SHADER_ACCESS,
2604                 _NOT_SUPPORTED, pPteInfo->pteFlags);
2605         }
2606 
2607         pPteInfo->pteFlags = FLD_SET_DRF_NUM(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_READ_ONLY,
2608             nvFieldGetBool(&pFmtPte->fldReadOnly, pPte->v8), pPteInfo->pteFlags);
2609 
2610         // Get comptagline
2611         pPteInfo->comptagLine = nvFieldGet32(&pFmtPte->fldCompTagLine, pPte->v8);
2612     }
2613 
2614     // Get kind
2615     pPteInfo->kind = nvFieldGet32(&pFmtPte->fldKind, pPte->v8);
2616 
2617     //
2618     // Decode the comptags value from kind.  GF100 only supports 2 bits per rop tile,
2619     // but future chips will use the other layouts.
2620     //
2621     if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE_1, pPteInfo->kind))
2622     {
2623         pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_COMPTAGS, _1, pPteInfo->pteFlags);
2624     }
2625     else if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE_2, pPteInfo->kind))
2626     {
2627         pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_COMPTAGS, _2, pPteInfo->pteFlags);
2628     }
2629     else if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE_4, pPteInfo->kind))
2630     {
2631         pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_COMPTAGS, _4, pPteInfo->pteFlags);
2632     }
2633     else
2634     {
2635         pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_COMPTAGS, _NONE, pPteInfo->pteFlags);
2636     }
2637 }
2638 
2639 NvS32*
kgmmuGetFatalFaultIntrPendingState_IMPL(KernelGmmu * pKernelGmmu,NvU8 gfid)2640 kgmmuGetFatalFaultIntrPendingState_IMPL
2641 (
2642     KernelGmmu *pKernelGmmu,
2643     NvU8 gfid
2644 )
2645 {
2646     return &pKernelGmmu->mmuFaultBuffer[gfid].fatalFaultIntrPending;
2647 }
2648 
2649 struct HW_FAULT_BUFFER*
kgmmuGetHwFaultBufferPtr_IMPL(KernelGmmu * pKernelGmmu,NvU8 gfid,NvU8 faultBufferIndex)2650 kgmmuGetHwFaultBufferPtr_IMPL
2651 (
2652     KernelGmmu *pKernelGmmu,
2653     NvU8 gfid,
2654     NvU8 faultBufferIndex
2655 )
2656 {
2657     return &pKernelGmmu->mmuFaultBuffer[gfid].hwFaultBuffers[faultBufferIndex];
2658 }
2659 
2660 NvU64
kgmmuGetFaultBufferGenCnt_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU8 gfid)2661 kgmmuGetFaultBufferGenCnt_IMPL
2662 (
2663     OBJGPU     *pGpu,
2664     KernelGmmu *pKernelGmmu,
2665     NvU8        gfid
2666 )
2667 {
2668     return pKernelGmmu->mmuFaultBuffer[gfid].faultBufferGenerationCounter;
2669 }
2670 
2671 void *
kgmmuGetShadowFaultBufferCslContext_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,FAULT_BUFFER_TYPE type)2672 kgmmuGetShadowFaultBufferCslContext_IMPL
2673 (
2674     OBJGPU *pGpu,
2675     KernelGmmu *pKernelGmmu,
2676     FAULT_BUFFER_TYPE type
2677 )
2678 {
2679     ConfidentialCompute *pConfCompute = GPU_GET_CONF_COMPUTE(pGpu);
2680 
2681     if (!gpuIsCCFeatureEnabled(pGpu))
2682     {
2683         return NULL;
2684     }
2685 
2686     switch (type)
2687     {
2688         case NON_REPLAYABLE_FAULT_BUFFER:
2689             return pConfCompute->pNonReplayableFaultCcslCtx;
2690         case REPLAYABLE_FAULT_BUFFER:
2691             return pConfCompute->pReplayableFaultCcslCtx;
2692         default:
2693             break;
2694     }
2695 
2696     return NULL;
2697 }
2698 
2699 NV_STATUS
kgmmuFaultBufferMap_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index,NvU32 gfid)2700 kgmmuFaultBufferMap_IMPL
2701 (
2702     OBJGPU     *pGpu,
2703     KernelGmmu *pKernelGmmu,
2704     NvU32       index,
2705     NvU32       gfid
2706 )
2707 {
2708     MEMORY_DESCRIPTOR      *pMemDesc;
2709     struct HW_FAULT_BUFFER *pFaultBuffer;
2710     MemoryManager          *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2711 
2712     NvU64       vaddr;
2713     NV_STATUS   status = NV_OK;
2714 
2715     // Return early if fault buffer is disabled
2716     if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
2717         return NV_OK;
2718 
2719     NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
2720     NV_ASSERT_OR_RETURN(!IS_GSP_CLIENT(pGpu), NV_ERR_INVALID_STATE);
2721 
2722     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[gfid].hwFaultBuffers[index];
2723     pMemDesc = pFaultBuffer->pFaultBufferMemDesc;
2724 
2725     memmgrSetMemDescPageSize_HAL(pGpu, pMemoryManager, pMemDesc, AT_GPU, RM_ATTR_PAGE_SIZE_4KB);
2726 
2727     {
2728         status = kbusMapCpuInvisibleBar2Aperture_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), pMemDesc,
2729                                                      &vaddr, pMemDesc->Size, 0, gfid);
2730         if (status != NV_OK)
2731         {
2732             return status;
2733         }
2734 
2735         NV_ASSERT(pFaultBuffer->bar2FaultBufferAddr == 0);
2736         pFaultBuffer->bar2FaultBufferAddr = vaddr;
2737     }
2738 
2739     if (IS_GFID_PF(gfid))
2740     {
2741         if (pMemDesc->_addressSpace == ADDR_FBMEM && !RMCFG_FEATURE_PLATFORM_GSP)
2742         {
2743             //
2744             // For Mods and testing, buffer can be allocated in FB. In such cases map it on BAR2 as
2745             // only RM will be the owner of this buffer. BAR1 mappings need code-refactoring as BAR1 vaSpace is
2746             // allocated lazily
2747             //
2748             pFaultBuffer->kernelVaddr = NV_PTR_TO_NvP64(kbusMapRmAperture_HAL(pGpu, pMemDesc));
2749             if (!pFaultBuffer->kernelVaddr)
2750             {
2751                 NV_ASSERT(0);
2752                 return NV_ERR_INVALID_ADDRESS;
2753             }
2754         }
2755         else
2756         {
2757             if (memdescGetContiguity(pMemDesc, AT_GPU))
2758             {
2759                 status = memdescMap(pMemDesc, 0, pMemDesc->Size, NV_TRUE, NV_PROTECT_READ_WRITE,
2760                                     &pFaultBuffer->kernelVaddr, &pFaultBuffer->hCpuFaultBuffer);
2761                 if (status != NV_OK)
2762                 {
2763                     return status;
2764                 }
2765             }
2766             else
2767             {
2768                 NvU32 i, j;
2769                 NvU32 numPages = NV_ROUNDUP(pMemDesc->Size, RM_PAGE_SIZE) / RM_PAGE_SIZE;
2770                 GMMU_FAULT_BUFFER_PAGE *pBufferPage;
2771 
2772                 pFaultBuffer->pBufferPages = portMemAllocNonPaged(numPages * sizeof(GMMU_FAULT_BUFFER_PAGE));
2773                 if (pFaultBuffer->pBufferPages == NULL)
2774                 {
2775                     return NV_ERR_NO_MEMORY;
2776                 }
2777 
2778                 for (i = 0; i < numPages; i++)
2779                 {
2780                     pBufferPage = &pFaultBuffer->pBufferPages[i];
2781 
2782                     status = memdescMap(pMemDesc, i * RM_PAGE_SIZE, RM_PAGE_SIZE, 1, NV_PROTECT_READ_WRITE,
2783                                         &pBufferPage->pAddress, &pBufferPage->pPriv);
2784                     if (status != NV_OK)
2785                     {
2786                         break;
2787                     }
2788                 }
2789 
2790                 if (status != NV_OK)
2791                 {
2792                     for (j = 0; j < i; j++)
2793                     {
2794                         pBufferPage = &pFaultBuffer->pBufferPages[j];
2795 
2796                         memdescUnmap(pMemDesc, NV_TRUE, osGetCurrentProcess(),
2797                                      pBufferPage->pAddress, pBufferPage->pPriv);
2798                     }
2799 
2800                     portMemFree(pFaultBuffer->pBufferPages);
2801 
2802                     return status;
2803                 }
2804             }
2805         }
2806 
2807         if (memdescGetContiguity(pMemDesc, AT_GPU))
2808         {
2809             portMemSet(NvP64_VALUE(pFaultBuffer->kernelVaddr), 0, (NvLength)pMemDesc->Size);
2810         }
2811         else
2812         {
2813             NvU32 i;
2814             for (i = 0; i * RM_PAGE_SIZE < pMemDesc->Size; i++)
2815             {
2816                 GMMU_FAULT_BUFFER_PAGE *page = &pFaultBuffer->pBufferPages[i];
2817                 portMemSet(NvP64_VALUE(page->pAddress), 0, RM_PAGE_SIZE);
2818             }
2819         }
2820     }
2821 
2822     return status;
2823 }
2824 
2825 NV_STATUS
kgmmuFaultBufferUnmap_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index,NvU32 gfid)2826 kgmmuFaultBufferUnmap_IMPL
2827 (
2828     OBJGPU               *pGpu,
2829     KernelGmmu           *pKernelGmmu,
2830     NvU32                 index,
2831     NvU32                 gfid
2832 )
2833 {
2834     struct HW_FAULT_BUFFER *pFaultBuffer;
2835 
2836     NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
2837 
2838     // Return early if fault buffer is disabled
2839     if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
2840         return NV_OK;
2841 
2842     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[gfid].hwFaultBuffers[index];
2843 
2844     if (pFaultBuffer == NULL)
2845     {
2846         return NV_OK;
2847     }
2848 
2849     if (pFaultBuffer->pFaultBufferMemDesc != NULL)
2850     {
2851         if (IS_GFID_PF(gfid))
2852         {
2853             // kbusUnmapRmAperture cannot handle discontiguous allocations on GSP-RM
2854             if (pFaultBuffer->pFaultBufferMemDesc->_addressSpace == ADDR_FBMEM &&
2855                 !RMCFG_FEATURE_PLATFORM_GSP)
2856             {
2857                 kbusUnmapRmAperture_HAL(pGpu,
2858                                         pFaultBuffer->pFaultBufferMemDesc,
2859                                         (NvU8 **)&pFaultBuffer->kernelVaddr,
2860                                         NV_TRUE);
2861             }
2862             else
2863             {
2864                 if (memdescGetContiguity(pFaultBuffer->pFaultBufferMemDesc, AT_GPU))
2865                 {
2866                     memdescUnmap(pFaultBuffer->pFaultBufferMemDesc, NV_TRUE, osGetCurrentProcess(),
2867                                  pFaultBuffer->kernelVaddr, pFaultBuffer->hCpuFaultBuffer);
2868                 }
2869                 else
2870                 {
2871                     if (pFaultBuffer->pBufferPages != NULL)
2872                     {
2873                         NvU32 i;
2874                         NvU32 numPages = NV_ROUNDUP(pFaultBuffer->pFaultBufferMemDesc->Size, RM_PAGE_SIZE) / RM_PAGE_SIZE;
2875 
2876                         for (i = 0; i < numPages; i++)
2877                         {
2878                             GMMU_FAULT_BUFFER_PAGE *pBufferPage;
2879 
2880                             pBufferPage = &pFaultBuffer->pBufferPages[i];
2881 
2882                             memdescUnmap(pFaultBuffer->pFaultBufferMemDesc, NV_TRUE, osGetCurrentProcess(),
2883                                          pBufferPage->pAddress, pBufferPage->pPriv);
2884                         }
2885 
2886                         portMemFree(pFaultBuffer->pBufferPages);
2887                     }
2888                 }
2889             }
2890         }
2891 
2892         {
2893             kbusUnmapCpuInvisibleBar2Aperture_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu),
2894                 pFaultBuffer->pFaultBufferMemDesc, pFaultBuffer->bar2FaultBufferAddr, gfid);
2895         }
2896     }
2897 
2898     pFaultBuffer->pBufferPages = NULL;
2899     pFaultBuffer->kernelVaddr = NvP64_NULL;
2900     pFaultBuffer->bar2FaultBufferAddr = 0;
2901     return NV_OK;
2902 }
2903 
2904 NV_STATUS
kgmmuServiceVfPriFaults_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 faultType)2905 kgmmuServiceVfPriFaults_IMPL
2906 (
2907     OBJGPU     *pGpu,
2908     KernelGmmu *pKernelGmmu,
2909     NvU32       faultType
2910 )
2911 {
2912     NV_STATUS status = NV_OK;
2913     NV2080_CTRL_CMD_GPU_HANDLE_VF_PRI_FAULT_PARAMS params;
2914 
2915     NV_ASSERT_OR_RETURN(IS_VIRTUAL_WITH_SRIOV(pGpu), NV_ERR_INVALID_ARGUMENT);
2916 
2917     if (faultType == NV2080_CTRL_CMD_GPU_HANDLE_VF_PRI_FAULT_TYPE_INVALID)
2918         return NV_ERR_INVALID_PARAMETER;
2919 
2920     portMemSet(&params, 0, sizeof(params));
2921     params.faultType = faultType;
2922 
2923     NV_RM_RPC_CONTROL(pGpu, pGpu->hDefaultClientShare, pGpu->hDefaultClientShareSubDevice,
2924                       NV2080_CTRL_CMD_GPU_HANDLE_VF_PRI_FAULT, &params, sizeof(params), status);
2925 
2926     return status;
2927 }
2928 
2929 NV_STATUS
kgmmuFaultCancelTargeted_VF(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,GMMU_FAULT_CANCEL_INFO * pCancelInfo)2930 kgmmuFaultCancelTargeted_VF
2931 (
2932     OBJGPU                 *pGpu,
2933     KernelGmmu             *pKernelGmmu,
2934     GMMU_FAULT_CANCEL_INFO *pCancelInfo
2935 )
2936 {
2937     TLB_INVALIDATE_PARAMS params;
2938 
2939     // Clear struct before use.
2940     portMemSet(&params, 0, sizeof(TLB_INVALIDATE_PARAMS));
2941     gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &params.timeout, 0);
2942 
2943     params.gfid = GPU_GFID_PF;
2944 
2945     // Bug 2029506 fix will remove kgmmuFaultCancelIssueInvalidate call here
2946     return kgmmuFaultCancelIssueInvalidate_HAL(pGpu, pKernelGmmu, pCancelInfo,
2947                                                &params, NV_FALSE);
2948 }
2949 
2950 NvU32
kgmmuGetFaultBufferReservedFbSpaceSize_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)2951 kgmmuGetFaultBufferReservedFbSpaceSize_IMPL
2952 (
2953     OBJGPU                 *pGpu,
2954     KernelGmmu             *pKernelGmmu
2955 )
2956 {
2957     NvU32 reservedBytes = 0;
2958     NvU32 faultBufferAddrSpace;
2959     NvU32 faultBufferSize;
2960     NvU32 i;
2961     NV_STATUS status;
2962 
2963     if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
2964         return 0;
2965 
2966     for (i = 0; i < NUM_FAULT_BUFFERS; i++)
2967     {
2968         status = kgmmuFaultBufferGetAddressSpace(pGpu, pKernelGmmu, i,
2969                                                  &faultBufferAddrSpace, NULL);
2970         NV_ASSERT(status == NV_OK);
2971         if (status != NV_OK || faultBufferAddrSpace != ADDR_FBMEM)
2972         {
2973             continue;
2974         }
2975 
2976         faultBufferSize = kgmmuSetAndGetDefaultFaultBufferSize_HAL(pGpu, pKernelGmmu, i, GPU_GFID_PF);
2977         reservedBytes += RM_PAGE_ALIGN_UP(faultBufferSize);
2978     }
2979 
2980     return reservedBytes;
2981 }
2982 
2983 NV_STATUS
subdeviceCtrlCmdGmmuGetStaticInfo_IMPL(Subdevice * pSubdevice,NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS * pParams)2984 subdeviceCtrlCmdGmmuGetStaticInfo_IMPL
2985 (
2986     Subdevice *pSubdevice,
2987     NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pParams
2988 )
2989 {
2990     OBJGPU     *pGpu        = GPU_RES_GET_GPU(pSubdevice);
2991     KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
2992 
2993     pParams->replayableFaultBufferSize = kgmmuSetAndGetDefaultFaultBufferSize_HAL(pGpu, pKernelGmmu,
2994                                                                                       REPLAYABLE_FAULT_BUFFER,
2995                                                                                       GPU_GFID_PF);
2996     pParams->nonReplayableFaultBufferSize = kgmmuSetAndGetDefaultFaultBufferSize_HAL(pGpu, pKernelGmmu,
2997                                                                                          NON_REPLAYABLE_FAULT_BUFFER,
2998                                                                                          GPU_GFID_PF);
2999 
3000     if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu))
3001     {
3002         NvU32 maxNumPacketsReplayable = pParams->replayableFaultBufferSize / sizeof(struct GMMU_FAULT_PACKET);
3003         NvU32 maxNumPacketsNonReplayable = pParams->nonReplayableFaultBufferSize / sizeof(struct GMMU_FAULT_PACKET);
3004 
3005         pParams->replayableShadowFaultBufferMetadataSize    = sizeof(struct GMMU_FAULT_PACKET_METADATA) * maxNumPacketsReplayable;
3006         pParams->nonReplayableShadowFaultBufferMetadataSize = sizeof(struct GMMU_FAULT_PACKET_METADATA) * maxNumPacketsNonReplayable;
3007     }
3008 
3009     return NV_OK;
3010 }
3011 
3012 static NV_STATUS
_kgmmuFaultBufferDescribe(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index,NvU64 * pFaultBufferPages,NvU32 faultBufferSize)3013 _kgmmuFaultBufferDescribe
3014 (
3015     OBJGPU               *pGpu,
3016     KernelGmmu           *pKernelGmmu,
3017     NvU32                 index,
3018     NvU64                *pFaultBufferPages,
3019     NvU32                 faultBufferSize
3020 )
3021 {
3022     NV_STATUS status;
3023     MEMORY_DESCRIPTOR *pMemDesc = NULL;
3024     struct HW_FAULT_BUFFER *pFaultBuffer;
3025     NvU32 faultBufferAddrSpace = ADDR_UNKNOWN;
3026 
3027     NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
3028 
3029     status = kgmmuFaultBufferCreateMemDesc(pGpu, pKernelGmmu, index, faultBufferSize,
3030                                            (MEMDESC_FLAGS_GUEST_ALLOCATED |
3031                                             MEMDESC_FLAGS_EXT_PAGE_ARRAY_MEM),
3032                                            &pMemDesc);
3033     if (status != NV_OK)
3034     {
3035         return status;
3036     }
3037 
3038     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[index];
3039 
3040     pFaultBuffer->faultBufferSize = faultBufferSize;
3041     pFaultBuffer->pFaultBufferMemDesc = NULL;
3042 
3043     {
3044         NvBool bIsContiguous = memdescGetContiguity(pMemDesc, AT_GPU);
3045 
3046         if (bIsContiguous)
3047         {
3048             status = kgmmuFaultBufferGetAddressSpace(pGpu, pKernelGmmu, index, &faultBufferAddrSpace, NULL);
3049             if (status != NV_OK)
3050             {
3051                 memdescDestroy(pMemDesc);
3052                 return status;
3053             }
3054 
3055             memdescDescribe(pMemDesc, faultBufferAddrSpace,
3056                             pFaultBufferPages[0], faultBufferSize);
3057         }
3058         else
3059         {
3060             memdescFillPages(pMemDesc, 0, pFaultBufferPages,
3061                              RM_PAGE_ALIGN_UP(faultBufferSize)/RM_PAGE_SIZE,
3062                              RM_PAGE_SIZE);
3063         }
3064     }
3065 
3066     pFaultBuffer->pFaultBufferMemDesc = pMemDesc;
3067 
3068     return NV_OK;
3069 }
3070 
3071 NV_STATUS
kgmmuFaultBufferReplayableSetup_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvHandle hClient,NvHandle hObject,NvU32 faultBufferSize,NvU64 * pFaultBufferPages)3072 kgmmuFaultBufferReplayableSetup_IMPL
3073 (
3074     OBJGPU               *pGpu,
3075     KernelGmmu           *pKernelGmmu,
3076     NvHandle              hClient,
3077     NvHandle              hObject,
3078     NvU32                 faultBufferSize,
3079     NvU64                *pFaultBufferPages
3080 )
3081 {
3082     NV_STATUS status;
3083     struct HW_FAULT_BUFFER *pFaultBuffer;
3084 
3085     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
3086         pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
3087     {
3088         return NV_OK;
3089     }
3090 
3091     pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[REPLAYABLE_FAULT_BUFFER];
3092     if (pFaultBuffer->pFaultBufferMemDesc != NULL)
3093     {
3094         return NV_ERR_NOT_SUPPORTED;
3095     }
3096 
3097     status = _kgmmuFaultBufferDescribe(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER,
3098                                        pFaultBufferPages, faultBufferSize);
3099 
3100     if (status != NV_OK)
3101     {
3102         return status;
3103     }
3104 
3105     status = kgmmuFaultBufferLoad_HAL(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER, GPU_GFID_PF);
3106     if (status != NV_OK)
3107     {
3108         kgmmuFaultBufferUnregister(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
3109         return status;
3110     }
3111 
3112     pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferClient = hClient;
3113     pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferObject = hObject;
3114 
3115     pKernelGmmu->setProperty(pKernelGmmu,
3116                              PDB_PROP_KGMMU_REPLAYABLE_FAULT_BUFFER_IN_USE,
3117                              NV_TRUE);
3118 
3119     return NV_OK;
3120 }
3121 
3122 NV_STATUS
subdeviceCtrlCmdInternalGmmuRegisterFaultBuffer_IMPL(Subdevice * pSubdevice,NV2080_CTRL_INTERNAL_GMMU_REGISTER_FAULT_BUFFER_PARAMS * pParams)3123 subdeviceCtrlCmdInternalGmmuRegisterFaultBuffer_IMPL
3124 (
3125     Subdevice *pSubdevice,
3126     NV2080_CTRL_INTERNAL_GMMU_REGISTER_FAULT_BUFFER_PARAMS *pParams
3127 )
3128 {
3129     OBJGPU     *pGpu        = GPU_RES_GET_GPU(pSubdevice);
3130     KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
3131 
3132     NV_PRINTF(LEVEL_INFO, "GMMU_REGISTER_FAULT_BUFFER\n");
3133     return kgmmuFaultBufferReplayableSetup(pGpu, pKernelGmmu,
3134                                            pParams->hClient,
3135                                            pParams->hObject,
3136                                            pParams->faultBufferSize,
3137                                            pParams->faultBufferPteArray);
3138 }
3139 
3140 NV_STATUS
subdeviceCtrlCmdInternalGmmuUnregisterFaultBuffer_IMPL(Subdevice * pSubdevice)3141 subdeviceCtrlCmdInternalGmmuUnregisterFaultBuffer_IMPL
3142 (
3143     Subdevice *pSubdevice
3144 )
3145 {
3146     OBJGPU     *pGpu        = GPU_RES_GET_GPU(pSubdevice);
3147     KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
3148     NV_STATUS status;
3149 
3150     NV_PRINTF(LEVEL_INFO, "GMMU_UNREGISTER_FAULT_BUFFER\n");
3151     status = kgmmuFaultBufferReplayableDestroy(pGpu, pKernelGmmu);
3152     if (status == NV_OK)
3153     {
3154         pKernelGmmu->setProperty(pKernelGmmu,
3155                                  PDB_PROP_KGMMU_REPLAYABLE_FAULT_BUFFER_IN_USE,
3156                                  NV_FALSE);
3157     }
3158     return status;
3159 }
3160