1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /******************************************************************************
25 *
26 * Kernel GMMU module header
27 * Defines and structures used on CPU RM for the GMMU object.
28 *
29 ******************************************************************************/
30
31 #define NVOC_KERN_GMMU_H_PRIVATE_ACCESS_ALLOWED
32
33 #include "gpu/bif/kernel_bif.h"
34 #include "gpu/mmu/kern_gmmu.h"
35 #include "gpu/bus/kern_bus.h"
36 #include "gpu/nvlink/kernel_nvlink.h"
37 #include "gpu/mem_sys/kern_mem_sys.h"
38 #include "gpu/mem_mgr/mem_mgr.h"
39 #include "vgpu/vgpu_events.h"
40 #include "gpu/mem_mgr/mem_desc.h"
41 #include "gpu/subdevice/subdevice.h"
42 #include "os/os.h"
43 #include "rmapi/rmapi.h"
44 #include "gpu/gpu.h"
45 #include "nvRmReg.h"
46 #include "vgpu/rpc.h"
47 #include "kernel/gpu/intr/engine_idx.h"
48
49 #include "kernel/gpu/conf_compute/ccsl.h"
50
51 static void _kgmmuInitRegistryOverrides(OBJGPU *pGpu, KernelGmmu *pKernelGmmu);
52
53 /*!
54 * KERNEL_GMMU constructor
55 *
56 * @param[in] pGpu
57 * @param[in] pKernelGmmu
58 * @param[in] engDesc Engine descriptor
59 *
60 * @return NV_OK on success, pertinent error code on failure.
61 */
62 NV_STATUS
kgmmuConstructEngine_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,ENGDESCRIPTOR engDesc)63 kgmmuConstructEngine_IMPL(OBJGPU *pGpu, KernelGmmu *pKernelGmmu, ENGDESCRIPTOR engDesc)
64 {
65 NvU32 v;
66
67 kgmmuDetermineMaxVASize_HAL(pGpu, pKernelGmmu);
68
69 if (gpuIsCacheOnlyModeEnabled(pGpu))
70 {
71 pKernelGmmu->bHugePageSupported = NV_FALSE;
72 pKernelGmmu->bPageSize512mbSupported = NV_FALSE;
73 }
74
75 // Allocate and init MMU format families.
76 kgmmuFmtInitPdeApertures_HAL(pKernelGmmu, pKernelGmmu->pdeApertures);
77 kgmmuFmtInitPteApertures_HAL(pKernelGmmu, pKernelGmmu->pteApertures);
78
79 for (v = 0; v < GMMU_FMT_MAX_VERSION_COUNT; ++v)
80 {
81 const NvU32 ver = g_gmmuFmtVersions[v];
82 if (kgmmuFmtIsVersionSupported_HAL(pKernelGmmu, ver))
83 {
84 GMMU_FMT_FAMILY *pFam = NULL;
85
86 // Alloc version struct.
87 pFam = portMemAllocNonPaged(sizeof(*pFam));
88 NV_ASSERT_OR_RETURN((pFam != NULL), NV_ERR_NO_MEMORY);
89 portMemSet(pFam, 0, sizeof(*pFam));
90 pKernelGmmu->pFmtFamilies[v] = pFam;
91
92 // Init PDE/PTE formats.
93 kgmmuFmtInitPdeMulti_HAL(pKernelGmmu, &pFam->pdeMulti, ver, pKernelGmmu->pdeApertures);
94 kgmmuFmtInitPde_HAL(pKernelGmmu, &pFam->pde, ver, pKernelGmmu->pdeApertures);
95 kgmmuFmtInitPte_HAL(pKernelGmmu, &pFam->pte, ver, pKernelGmmu->pteApertures,
96 gpuIsUnifiedMemorySpaceEnabled(pGpu));
97
98 kgmmuFmtInitPteComptagLine_HAL(pKernelGmmu, &pFam->pte, ver);
99 }
100 else
101 {
102 pKernelGmmu->pFmtFamilies[v] = NULL;
103 }
104 }
105
106 NV_ASSERT_OK_OR_RETURN(kgmmuFmtInit(pKernelGmmu));
107
108 portMemSet(&pKernelGmmu->mmuFaultBuffer, 0, sizeof(pKernelGmmu->mmuFaultBuffer));
109
110 // Default placement for PDEs is in vidmem.
111 pKernelGmmu->PDEAperture = ADDR_FBMEM;
112 pKernelGmmu->PDEAttr = NV_MEMORY_WRITECOMBINED;
113 pKernelGmmu->PDEBAR1Aperture = ADDR_FBMEM;
114 pKernelGmmu->PDEBAR1Attr = NV_MEMORY_WRITECOMBINED;
115
116 // Default placement for PTEs is in vidmem.
117 pKernelGmmu->PTEAperture = ADDR_FBMEM;
118 pKernelGmmu->PTEAttr = NV_MEMORY_WRITECOMBINED;
119 pKernelGmmu->PTEBAR1Aperture = ADDR_FBMEM;
120 pKernelGmmu->PTEBAR1Attr = NV_MEMORY_WRITECOMBINED;
121
122 _kgmmuInitRegistryOverrides(pGpu, pKernelGmmu);
123
124 return NV_OK;
125 }
126
127 static NV_STATUS
_kgmmuInitStaticInfo(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)128 _kgmmuInitStaticInfo
129 (
130 OBJGPU *pGpu,
131 KernelGmmu *pKernelGmmu
132 )
133 {
134 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
135 NV_STATUS status;
136
137 //
138 // On vGPU, all hardware management is done by the host except for full SR-IOV.
139 // Thus, only do any further HW initialization on the host.
140 //
141 if (!(IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
142 (IS_VIRTUAL_WITH_SRIOV(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu))))
143 {
144 // Init HAL specific features.
145 NV_ASSERT_OK_OR_RETURN(kgmmuFmtFamiliesInit_HAL(pGpu, pKernelGmmu));
146 }
147
148 pKernelGmmu->pStaticInfo = portMemAllocNonPaged(sizeof(*pKernelGmmu->pStaticInfo));
149 NV_CHECK_OR_RETURN(LEVEL_ERROR, pKernelGmmu->pStaticInfo != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
150 portMemSet(pKernelGmmu->pStaticInfo, 0, sizeof(*pKernelGmmu->pStaticInfo));
151
152 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
153 pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice,
154 NV2080_CTRL_CMD_INTERNAL_GMMU_GET_STATIC_INFO,
155 pKernelGmmu->pStaticInfo, sizeof(*pKernelGmmu->pStaticInfo)), fail);
156
157 fail:
158 if (status != NV_OK)
159 {
160 portMemFree(pKernelGmmu->pStaticInfo);
161 }
162
163 return status;
164 }
165
166 /*
167 * Initialize the Kernel GMMU state.
168 *
169 * @param pGpu
170 * @param pKernelGmmu
171 */
kgmmuStateInitLocked_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)172 NV_STATUS kgmmuStateInitLocked_IMPL
173 (
174 OBJGPU *pGpu,
175 KernelGmmu *pKernelGmmu
176 )
177 {
178 KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
179 NV_STATUS status;
180
181 if (pKernelBif != NULL)
182 {
183 // This value shouldn't change after initialization, so cache it now
184 pKernelGmmu->sysmemBaseAddress = pKernelBif->dmaWindowStartAddress;
185 }
186
187 status = _kgmmuInitStaticInfo(pGpu, pKernelGmmu);
188 if (status != NV_OK)
189 {
190 return status;
191 }
192
193 if (IS_VIRTUAL_WITH_SRIOV(pGpu))
194 {
195 VGPU_STATIC_INFO *pVSI = GPU_GET_STATIC_INFO(pGpu);
196 pGpu->setProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED, pVSI->bAtsSupported);
197 }
198
199 // Setup Fault buffer if enabled
200 if (!pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
201 {
202 NV_ASSERT_OK_OR_RETURN(kgmmuFaultBufferInit_HAL(pGpu, pKernelGmmu));
203 }
204
205 return status;
206 }
207
208 static NV_STATUS
_kgmmuCreateGlobalVASpace(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 flags)209 _kgmmuCreateGlobalVASpace
210 (
211 OBJGPU *pGpu,
212 KernelGmmu *pKernelGmmu,
213 NvU32 flags
214 )
215 {
216 NvU32 constructFlags = VASPACE_FLAGS_NONE;
217 OBJVASPACE *pGlobalVAS = NULL;
218 NV_STATUS rmStatus;
219 OBJGPUGRP *pGpuGrp = NULL;
220
221 // Bail out early on sleep/suspend cases
222 if (flags & GPU_STATE_FLAGS_PRESERVING)
223 return NV_OK;
224 if (!gpumgrIsParentGPU(pGpu))
225 return NV_OK;
226
227 //
228 // We create the device vaspace at this point. Assemble the flags needed
229 // for construction.
230 //
231
232 // Allow PTE in SYS
233 constructFlags |= VASPACE_FLAGS_RETRY_PTE_ALLOC_IN_SYS;
234 constructFlags |= DRF_DEF(_VASPACE, _FLAGS, _BIG_PAGE_SIZE, _DEFAULT);
235
236 pGpuGrp = gpumgrGetGpuGrpFromGpu(pGpu);
237 NV_ASSERT_OR_RETURN(pGpuGrp != NULL, NV_ERR_INVALID_DATA);
238
239 rmStatus = gpugrpCreateGlobalVASpace(pGpuGrp, pGpu,
240 FERMI_VASPACE_A,
241 0, 0,
242 constructFlags,
243 &pGlobalVAS);
244 NV_ASSERT_OR_RETURN((NV_OK == rmStatus), rmStatus);
245
246 return NV_OK;
247 }
248
249 static NV_STATUS
_kgmmuDestroyGlobalVASpace(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 flags)250 _kgmmuDestroyGlobalVASpace
251 (
252 OBJGPU *pGpu,
253 KernelGmmu *pKernelGmmu,
254 NvU32 flags
255 )
256 {
257 OBJGPUGRP *pGpuGrp = NULL;
258
259 if (flags & GPU_STATE_FLAGS_PRESERVING)
260 return NV_OK;
261
262 pGpuGrp = gpumgrGetGpuGrpFromGpu(pGpu);
263 return gpugrpDestroyGlobalVASpace(pGpuGrp, pGpu);
264 }
265
266 /*
267 * Helper function to enable ComputePeerMode
268 */
269 NV_STATUS
kgmmuEnableComputePeerAddressing_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 flags)270 kgmmuEnableComputePeerAddressing_IMPL
271 (
272 OBJGPU *pGpu,
273 KernelGmmu *pKernelGmmu,
274 NvU32 flags
275 )
276 {
277 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
278 OBJSYS *pSys = SYS_GET_INSTANCE();
279 NV_STATUS status = NV_OK;
280 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
281 NvBool bComputePeerMode = NV_FALSE;
282
283 if (pSys->getProperty(pSys, PDB_PROP_SYS_NVSWITCH_IS_PRESENT) ||
284 kbusIsFlaSupported(pKernelBus))
285 {
286 bComputePeerMode = NV_TRUE;
287 }
288
289 if (bComputePeerMode)
290 {
291 status = kgmmuEnableNvlinkComputePeerAddressing_HAL(pKernelGmmu);
292 if (status != NV_OK)
293 {
294 NV_PRINTF(LEVEL_ERROR,
295 "Failed to enable GMMU property compute addressing for GPU %x , status:%x\n",
296 pGpu->gpuInstance, status);
297 return status;
298 }
299
300 status = pRmApi->Control(pRmApi,
301 pGpu->hInternalClient,
302 pGpu->hInternalSubdevice,
303 NV2080_CTRL_CMD_INTERNAL_NVLINK_ENABLE_COMPUTE_PEER_ADDR,
304 NULL, 0);
305 }
306 return status;
307 }
308
309 /*
310 * State Post Load
311 */
kgmmuStatePostLoad_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 flags)312 NV_STATUS kgmmuStatePostLoad_IMPL
313 (
314 OBJGPU *pGpu,
315 KernelGmmu *pKernelGmmu,
316 NvU32 flags
317 )
318 {
319 NV_STATUS status = NV_OK;
320
321 status = _kgmmuCreateGlobalVASpace(pGpu, pKernelGmmu, flags);
322
323 if (status != NV_OK)
324 {
325 NV_PRINTF(LEVEL_ERROR,
326 "Failed to create GVASpace, status:%x\n",
327 status);
328 return status;
329 }
330
331 status = kgmmuEnableComputePeerAddressing(pGpu, pKernelGmmu, flags);
332
333 if (status != NV_OK)
334 {
335 NV_PRINTF(LEVEL_ERROR,
336 "Failed to enable compute peer addressing, status:%x\n",
337 status);
338 return status;
339 }
340
341 NV_ASSERT_OK_OR_RETURN(kgmmuInitCeMmuFaultIdRange_HAL(pGpu, pKernelGmmu));
342
343 return status;
344 }
345
346 /*
347 * State Pre Unload
348 */
349 NV_STATUS
kgmmuStatePreUnload_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 flags)350 kgmmuStatePreUnload_IMPL
351 (
352 OBJGPU *pGpu,
353 KernelGmmu *pKernelGmmu,
354 NvU32 flags
355 )
356 {
357 NV_STATUS status = NV_OK;
358
359 status = _kgmmuDestroyGlobalVASpace(pGpu, pKernelGmmu, flags);
360
361 if (status != NV_OK)
362 {
363 NV_PRINTF(LEVEL_ERROR,
364 "Failed to destory GVASpace, status:%x\n",
365 status);
366 return status;
367 }
368 return status;
369 }
370
371 /*!
372 * KernelGmmu destructor
373 *
374 * @param[in] pKernelGmmu KernelGmmu object pointer
375 */
376 void
kgmmuDestruct_IMPL(KernelGmmu * pKernelGmmu)377 kgmmuDestruct_IMPL(KernelGmmu *pKernelGmmu)
378 {
379 NvU32 v;
380 NvU32 b;
381
382 // Free per big page size format and format-family storage.
383 for (v = 0; v < GMMU_FMT_MAX_VERSION_COUNT; ++v)
384 {
385 if (NULL != pKernelGmmu->pFmtFamilies[v])
386 {
387 for (b = 0; b < GMMU_FMT_MAX_BIG_PAGE_SIZES; ++b)
388 {
389 portMemFree(pKernelGmmu->pFmtFamilies[v]->pFmts[b]);
390 pKernelGmmu->pFmtFamilies[v]->pFmts[b] = NULL;
391 }
392 portMemFree(pKernelGmmu->pFmtFamilies[v]);
393 }
394 }
395 }
396
397 void
kgmmuStateDestroy_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)398 kgmmuStateDestroy_IMPL(OBJGPU *pGpu, KernelGmmu *pKernelGmmu)
399 {
400 if (NULL != pKernelGmmu->pStaticInfo)
401 {
402 portMemFree((void *)pKernelGmmu->pStaticInfo);
403 pKernelGmmu->pStaticInfo = NULL;
404 }
405 if (NULL != pKernelGmmu->pWarSmallPageTable)
406 {
407 memdescFree(pKernelGmmu->pWarSmallPageTable);
408 memdescDestroy(pKernelGmmu->pWarSmallPageTable);
409 pKernelGmmu->pWarSmallPageTable = NULL;
410 }
411 if (NULL != pKernelGmmu->pWarPageDirectory0)
412 {
413 memdescFree(pKernelGmmu->pWarPageDirectory0);
414 memdescDestroy(pKernelGmmu->pWarPageDirectory0);
415 pKernelGmmu->pWarPageDirectory0 = NULL;
416 }
417
418 // Only if faultBuffer is enabled
419 if (!pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
420 {
421 (void)kgmmuFaultBufferDestroy_HAL(pGpu, pKernelGmmu);
422 }
423 }
424
425 NV_STATUS
kgmmuStateLoad_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 flags)426 kgmmuStateLoad_IMPL
427 (
428 OBJGPU *pGpu,
429 KernelGmmu *pKernelGmmu,
430 NvU32 flags
431 )
432 {
433 NV_STATUS status = NV_OK;
434
435 // Only if faultBuffer is enabled
436 if (!pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
437 {
438 status = kgmmuFaultBufferLoad_HAL(pGpu, pKernelGmmu,
439 NON_REPLAYABLE_FAULT_BUFFER, GPU_GFID_PF);
440 NV_ASSERT_OK_OR_RETURN(status);
441
442 //
443 // Note1: We check both enablement of replayable fault buffer as well
444 // as PM codepath because replayable fault buffer is client controlled
445 // and it may be or may not be enabled at the time of S3 entry / exit.
446 // Also, the state of the replayable fault buffer needs to be
447 // disabled / enabled during S3 entry / exit since the client is
448 // unaware of its state being lost during S3 entry.
449 //
450 if ((pKernelGmmu->getProperty(pKernelGmmu,
451 PDB_PROP_KGMMU_REPLAYABLE_FAULT_BUFFER_IN_USE)) &&
452 (pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_PM_CODEPATH)))
453 {
454 status = kgmmuFaultBufferLoad_HAL(pGpu, pKernelGmmu,
455 REPLAYABLE_FAULT_BUFFER, GPU_GFID_PF);
456 }
457 return status;
458 }
459
460 return NV_OK;
461 }
462
463 NV_STATUS
kgmmuStateUnload_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 flags)464 kgmmuStateUnload_IMPL
465 (
466 OBJGPU *pGpu,
467 KernelGmmu *pKernelGmmu,
468 NvU32 flags
469 )
470 {
471 NV_STATUS status = NV_OK;
472
473 // Only if faultBuffer is enabled
474 if (!pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
475 {
476 status = kgmmuFaultBufferUnload_HAL(pGpu, pKernelGmmu,
477 NON_REPLAYABLE_FAULT_BUFFER, GPU_GFID_PF);
478 NV_ASSERT_OK_OR_RETURN(status);
479
480 // See Note1:
481 if ((pKernelGmmu->getProperty(pKernelGmmu,
482 PDB_PROP_KGMMU_REPLAYABLE_FAULT_BUFFER_IN_USE)) &&
483 (pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_PM_CODEPATH)))
484 {
485 status = kgmmuFaultBufferUnload_HAL(pGpu, pKernelGmmu,
486 REPLAYABLE_FAULT_BUFFER, GPU_GFID_PF);
487 }
488 return status;
489 }
490
491 return NV_OK;
492 }
493
494 /*!
495 * Initializes KERN_GMMU state based on registry key overrides
496 *
497 * @param[in] pGpu
498 * @param[in] pKernelGmmu
499 */
500 static void
_kgmmuInitRegistryOverrides(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)501 _kgmmuInitRegistryOverrides(OBJGPU *pGpu, KernelGmmu *pKernelGmmu)
502 {
503 NvU32 data;
504
505 memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC, _PDE, pGpu->instLocOverrides),
506 "GMMU PDE",
507 &pKernelGmmu->PDEAperture,
508 &pKernelGmmu->PDEAttr);
509 memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC, _BAR_PDE, pGpu->instLocOverrides),
510 "BAR1 PDE",
511 &pKernelGmmu->PDEBAR1Aperture,
512 &pKernelGmmu->PDEBAR1Attr);
513 memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC, _PTE, pGpu->instLocOverrides),
514 "GMMU PTE",
515 &pKernelGmmu->PTEAperture,
516 &pKernelGmmu->PTEAttr);
517 memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC, _BAR_PTE, pGpu->instLocOverrides),
518 "BAR1 PTE",
519 &pKernelGmmu->PTEBAR1Aperture,
520 &pKernelGmmu->PTEBAR1Attr);
521
522 //
523 // Check if we want to disable big page size per address space
524 //
525 pKernelGmmu->bEnablePerVaspaceBigPage = IsGM20X(pGpu);
526 if (NV_OK == osReadRegistryDword(pGpu,
527 NV_REG_STR_RM_DISABLE_BIG_PAGE_PER_ADDRESS_SPACE, &data))
528 {
529 pKernelGmmu->bEnablePerVaspaceBigPage = !data;
530 }
531
532 if (NV_OK == osReadRegistryDword(pGpu,
533 NV_REG_STR_FERMI_BIG_PAGE_SIZE, &data))
534 {
535 if (pGpu->optimizeUseCaseOverride !=
536 NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX_DEFAULT)
537 {
538 NV_PRINTF(LEVEL_ERROR,
539 "The %s regkey cannot be used with the %s regkey!\n",
540 NV_REG_STR_FERMI_BIG_PAGE_SIZE,
541 NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX);
542 return;
543 }
544 else
545 {
546 switch (data)
547 {
548 case NV_REG_STR_FERMI_BIG_PAGE_SIZE_64KB:
549 case NV_REG_STR_FERMI_BIG_PAGE_SIZE_128KB:
550 pKernelGmmu->overrideBigPageSize = data;
551 break;
552 default:
553 break;
554 }
555 }
556 }
557 else if (pGpu->optimizeUseCaseOverride !=
558 NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX_DEFAULT)
559 {
560 switch (pGpu->optimizeUseCaseOverride)
561 {
562 case NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX_SPARSE_TEX:
563 pKernelGmmu->overrideBigPageSize = RM_PAGE_SIZE_64K;
564 break;
565 case NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX_COMPUTE:
566 pKernelGmmu->overrideBigPageSize = RM_PAGE_SIZE_128K;
567 break;
568 default:
569 break;
570 }
571 }
572
573 // Check if HW fault buffer is disabled
574 if (NV_OK == osReadRegistryDword(pGpu,
575 NV_REG_STR_RM_DISABLE_HW_FAULT_BUFFER, &data))
576 {
577 NV_PRINTF(LEVEL_ERROR,
578 "Overriding HW Fault buffer state to 0x%x due to regkey!\n",
579 data);
580 pKernelGmmu->setProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED, data);
581 }
582
583 }
584
585 GMMU_APERTURE
kgmmuGetMemAperture_IMPL(KernelGmmu * pKernelGmmu,MEMORY_DESCRIPTOR * pMemDesc)586 kgmmuGetMemAperture_IMPL
587 (
588 KernelGmmu *pKernelGmmu,
589 MEMORY_DESCRIPTOR *pMemDesc
590 )
591 {
592 switch (memdescGetAddressSpace(pMemDesc))
593 {
594 case ADDR_FBMEM:
595 return GMMU_APERTURE_VIDEO;
596 case ADDR_SYSMEM:
597 if (NV_MEMORY_CACHED == memdescGetCpuCacheAttrib(pMemDesc))
598 {
599 return GMMU_APERTURE_SYS_COH;
600 }
601 return GMMU_APERTURE_SYS_NONCOH;
602 default:
603 NV_ASSERT(0);
604 return GMMU_APERTURE_INVALID;
605 }
606 }
607
608 /*!
609 * Initialize GMMU format structures dependent on big page size.
610 */
611 NV_STATUS
kgmmuFmtInit_IMPL(KernelGmmu * pKernelGmmu)612 kgmmuFmtInit_IMPL(KernelGmmu *pKernelGmmu)
613 {
614 NvU32 v;
615 NvU32 b;
616
617 // Allocate and init MMU formats for the supported big page sizes.
618 for (v = 0; v < GMMU_FMT_MAX_VERSION_COUNT; ++v)
619 {
620 const NvU32 ver = g_gmmuFmtVersions[v];
621 GMMU_FMT_FAMILY *pFam = pKernelGmmu->pFmtFamilies[v];
622 if (NULL != pFam)
623 {
624 for (b = 0; b < GMMU_FMT_MAX_BIG_PAGE_SIZES; ++b)
625 {
626 const NvU32 bigPageShift = g_gmmuFmtBigPageShifts[b];
627
628 // Allocate +1 level for the last dual-level.
629 const NvU32 numLevels = GMMU_FMT_MAX_LEVELS + 1;
630 const NvU32 size = sizeof(GMMU_FMT) + sizeof(MMU_FMT_LEVEL) * numLevels;
631 MMU_FMT_LEVEL *pLvls;
632
633 // Allocate format and levels in one chunk.
634 pFam->pFmts[b] = portMemAllocNonPaged(size);
635 NV_ASSERT_OR_RETURN((pFam->pFmts[b] != NULL), NV_ERR_NO_MEMORY);
636 portMemSet(pFam->pFmts[b], 0, size);
637
638 // Levels stored contiguously after the format struct.
639 pLvls = (MMU_FMT_LEVEL *)(pFam->pFmts[b] + 1);
640
641 // Common init.
642 pFam->pFmts[b]->version = ver;
643 pFam->pFmts[b]->pRoot = pLvls;
644 pFam->pFmts[b]->pPdeMulti = &pFam->pdeMulti;
645 pFam->pFmts[b]->pPde = &pFam->pde;
646 pFam->pFmts[b]->pPte = &pFam->pte;
647
648 kgmmuFmtInitLevels_HAL(pKernelGmmu, pLvls, numLevels, ver, bigPageShift);
649 kgmmuFmtInitCaps_HAL(pKernelGmmu, pFam->pFmts[b]);
650 }
651 }
652 }
653
654 return NV_OK;
655 }
656
657 /*!
658 * Retrieve GMMU format family based on version.
659 */
660 const GMMU_FMT_FAMILY *
kgmmuFmtGetFamily_IMPL(KernelGmmu * pKernelGmmu,NvU32 version)661 kgmmuFmtGetFamily_IMPL(KernelGmmu *pKernelGmmu, NvU32 version)
662 {
663 NvU32 v;
664
665 // Find a matching format.
666 for (v = GMMU_FMT_MAX_VERSION_COUNT; v > 0; --v)
667 {
668 if (0 == version)
669 {
670 // Pick newest default version if none requested.
671 if (NULL != pKernelGmmu->pFmtFamilies[v - 1])
672 {
673 return pKernelGmmu->pFmtFamilies[v - 1];
674 }
675 }
676 else if (g_gmmuFmtVersions[v - 1] == version)
677 {
678 return pKernelGmmu->pFmtFamilies[v - 1];
679 }
680 }
681
682 return NULL;
683 }
684
685 /*!
686 * Returns GMMU settings that are static after GPU state init/load is
687 * finished.
688 */
689 const NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *
kgmmuGetStaticInfo_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)690 kgmmuGetStaticInfo_IMPL
691 (
692 OBJGPU *pGpu,
693 KernelGmmu *pKernelGmmu
694 )
695 {
696 // check if state Init has not completed.
697 NV_ASSERT_OR_ELSE(pKernelGmmu != NULL, return NULL);
698
699 return pKernelGmmu->pStaticInfo;
700 }
701
702 /*!
703 * Retrieve GMMU format based on version and big page size.
704 */
705 const GMMU_FMT *
kgmmuFmtGet_IMPL(KernelGmmu * pKernelGmmu,NvU32 version,NvU64 bigPageSize)706 kgmmuFmtGet_IMPL(KernelGmmu *pKernelGmmu, NvU32 version, NvU64 bigPageSize)
707 {
708 const GMMU_FMT_FAMILY *pFmtFamily = kgmmuFmtGetFamily(pKernelGmmu, version);
709
710 if (NULL != pFmtFamily)
711 {
712 NvU32 b;
713
714 // Pick default big page size if none requested.
715 if (0 == bigPageSize)
716 {
717 //
718 // Retrieve Big Page Size. If it is not yet set, set it to 64K.
719 // Useful when this method is invoked before big page size is set.
720 //
721 if (0 == (bigPageSize = kgmmuGetBigPageSize_HAL(pKernelGmmu)))
722 bigPageSize = NVBIT64(16);
723 }
724
725 // Find a matching format.
726 for (b = 0; b < GMMU_FMT_MAX_BIG_PAGE_SIZES; ++b)
727 {
728 if (NVBIT64(g_gmmuFmtBigPageShifts[b]) == bigPageSize)
729 {
730 return pFmtFamily->pFmts[b];
731 }
732 }
733 }
734
735 return NULL;
736 }
737
738 /*!
739 * Check if a big page size is supported.
740 */
741 NvBool
kgmmuFmtIsBigPageSizeSupported_IMPL(KernelGmmu * pKernelGmmu,NvU64 bigPageSize)742 kgmmuFmtIsBigPageSizeSupported_IMPL(KernelGmmu *pKernelGmmu, NvU64 bigPageSize)
743 {
744 if (kgmmuIsPerVaspaceBigPageEn(pKernelGmmu))
745 {
746 return NV_TRUE;
747 }
748 return kgmmuGetBigPageSize_HAL(pKernelGmmu) == bigPageSize;
749 }
750
751 /*!
752 * @bried Returns the latest supported MMU fmt.
753 *
754 * @param[in] pGpu OBJGPU pointer
755 * @param[in] pKernelGmmu KernelGmmu pointer
756 *
757 * @returns const GMMU_FMT*
758 */
759 const GMMU_FMT*
kgmmuFmtGetLatestSupportedFormat_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)760 kgmmuFmtGetLatestSupportedFormat_IMPL(OBJGPU *pGpu, KernelGmmu *pKernelGmmu)
761 {
762 NvU32 v;
763 NvU32 maxFmtVersionSupported = 0;
764
765 for (v = 0; v < GMMU_FMT_MAX_VERSION_COUNT; ++v)
766 {
767 const NvU32 ver = g_gmmuFmtVersions[v];
768 if (kgmmuFmtIsVersionSupported_HAL(pKernelGmmu, ver))
769 {
770 maxFmtVersionSupported = maxFmtVersionSupported < ver ? ver : maxFmtVersionSupported;
771 }
772 }
773
774 return kgmmuFmtGet(pKernelGmmu, maxFmtVersionSupported, 0);
775 }
776
777 /*!
778 * @brief Used for calculating total memory required for page tables
779 required for translating a given VA range.
780 *
781 * @param pGpu
782 * @param pKernelGmmu
783 * @param[in] pFmt Pointer to GMMU format
784 * @param[in] vaBase Start VA
785 * @param[in] vaLimit End VA
786 * @param[in] pageSizeLockMask Mask of page sizes locked down at VA reservation
787 *
788 * @returns total size of page tables.
789 */
790 NvU64
kgmmuGetSizeOfPageTables_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,const GMMU_FMT * pFmt,NvU64 vaBase,NvU64 vaLimit,NvU64 pageSizeLockMask)791 kgmmuGetSizeOfPageTables_IMPL
792 (
793 OBJGPU *pGpu,
794 KernelGmmu *pKernelGmmu,
795 const GMMU_FMT *pFmt,
796 NvU64 vaBase,
797 NvU64 vaLimit,
798 NvU64 pageSizeLockMask
799 )
800 {
801 const MMU_FMT_LEVEL *pPgTbl = NULL;
802 NvU64 pgTblSize = 0;
803 NvU64 numPgTblsCeil;
804 NvU64 numPgTblsFloor;
805 NvU64 numEntries;
806 NvU32 pageShift;
807
808 // Loop over all page table sizes in mask
809 FOR_EACH_INDEX_IN_MASK(64, pageShift, pageSizeLockMask)
810 {
811 pPgTbl = mmuFmtFindLevelWithPageShift(pFmt->pRoot, pageShift);
812
813 //
814 // Do not consider page directories. They are handled by
815 // @ref kgmmuGetSizeOfPageDirs.
816 //
817 if (!pPgTbl->bPageTable || (pPgTbl->numSubLevels != 0))
818 {
819 continue;
820 }
821
822 numPgTblsCeil = NV_DIV_AND_CEIL(vaLimit, NVBIT64(pPgTbl->virtAddrBitHi + 1)) -
823 (vaBase / NVBIT64(pPgTbl->virtAddrBitHi + 1)) + 1;
824 numPgTblsFloor = vaLimit / NVBIT64(pPgTbl->virtAddrBitHi + 1);
825
826 // If full page tables are not used, allocate only as much as needed.
827 if (numPgTblsFloor == 0)
828 {
829 numEntries = mmuFmtVirtAddrToEntryIndex(pPgTbl, vaLimit) -
830 mmuFmtVirtAddrToEntryIndex(pPgTbl, vaBase) + 1;
831 pgTblSize += numEntries * pPgTbl->entrySize;
832 }
833 else
834 {
835 pgTblSize += numPgTblsCeil * mmuFmtLevelSize(pPgTbl);
836 }
837 }
838 FOR_EACH_INDEX_IN_MASK_END
839
840 return pgTblSize;
841 }
842
843 /*!
844 * @brief Used for calculating total memory required for page directories
845 required for translating a given VA range.
846 *
847 * @param pGpu
848 * @param pKernelGmmu
849 * @param[in] pFmt Pointer to GMMU format
850 * @param[in] vaBase Start VA
851 * @param[in] vaLimit End VA
852 *
853 * @returns total size of page directories
854 */
855 NvU64
kgmmuGetSizeOfPageDirs_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,const GMMU_FMT * pFmt,NvU64 vaBase,NvU64 vaLimit,NvU64 pageSizeLockMask)856 kgmmuGetSizeOfPageDirs_IMPL
857 (
858 OBJGPU *pGpu,
859 KernelGmmu *pKernelGmmu,
860 const GMMU_FMT *pFmt,
861 NvU64 vaBase,
862 NvU64 vaLimit,
863 NvU64 pageSizeLockMask
864 )
865 {
866 const MMU_FMT_LEVEL *pLevel = NULL;
867 NvU64 size = 0;
868 NvU16 i;
869
870 NV_ASSERT_OR_RETURN(pFmt != NULL, 0);
871
872 pLevel = pFmt->pRoot;
873
874 //
875 // Retain only the lowest set bit
876 //
877 // If the lowest set bit corresponds to a leaf page table (4K or 64K), we"ll
878 // calculate memory for all upper level page directories and if the set bit
879 // corresponds to an upper level page directory we"ll factor in all levels
880 // from the root upto that level.
881 //
882 pageSizeLockMask = pageSizeLockMask & -((NvS64)pageSizeLockMask);
883
884 // Accumulate size for all Page Directories.
885 for (i = 0; i < GMMU_FMT_MAX_LEVELS - 1; i++)
886 {
887 NvU64 vaPerEntry = mmuFmtEntryVirtAddrMask(pLevel) + 1;
888 NvU64 numEntries = NV_DIV_AND_CEIL(vaLimit, vaPerEntry) -
889 (vaBase / vaPerEntry) + 1;
890 NvU64 levelSize = numEntries * pLevel->entrySize;
891 levelSize = NV_ROUNDUP(levelSize, RM_PAGE_SIZE);
892
893 // Stop accumulating size once we are beyond the specified level.
894 if (mmuFmtLevelPageSize(pLevel) < pageSizeLockMask)
895 {
896 break;
897 }
898
899 size += levelSize;
900
901 // If there's one sublevel choose that.
902 if (pLevel->numSubLevels == 1)
903 {
904 pLevel = &(pLevel->subLevels[0]);
905 }
906 else
907 {
908 // Choose the 4K page size sublevel.
909 pLevel = &(pLevel->subLevels[1]);
910 }
911 NV_ASSERT_OR_RETURN(pLevel != NULL, 0);
912
913 // Stop accumulating size if we've exhausted all Page Dirs.
914 if (pLevel->bPageTable && (pLevel->numSubLevels == 0))
915 {
916 break;
917 }
918 }
919
920 return size;
921 }
922
923 /*
924 * Fill comptag field in PTE.
925 */
kgmmuFieldSetKindCompTags_IMPL(KernelGmmu * pGmmu,const GMMU_FMT * pFmt,const MMU_FMT_LEVEL * pLevel,const COMPR_INFO * pCompr,NvU64 physAddr,NvU64 surfOffset,NvU32 pteIndex,NvU8 * pEntries)926 void kgmmuFieldSetKindCompTags_IMPL
927 (
928 KernelGmmu *pGmmu,
929 const GMMU_FMT *pFmt,
930 const MMU_FMT_LEVEL *pLevel,
931 const COMPR_INFO *pCompr,
932 NvU64 physAddr,
933 NvU64 surfOffset,
934 NvU32 pteIndex,
935 NvU8 *pEntries
936 )
937 {
938 OBJGPU *pGpu = ENG_GET_GPU(pGmmu);
939 GMMU_COMPR_INFO comprInfo = {0};
940
941 comprInfo.compressedKind = pCompr->kind;
942 comprInfo.compPageShift = pCompr->compPageShift;
943
944 if (memmgrIsKind_HAL(GPU_GET_MEMORY_MANAGER(pGpu), FB_IS_KIND_COMPRESSIBLE, pCompr->kind))
945 {
946 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
947 kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
948
949 if (pCompr->bPhysBasedComptags)
950 {
951 NvBool bCallingContextPlugin;
952
953 NV_ASSERT(pMemorySystemConfig->bOneToOneComptagLineAllocation || pMemorySystemConfig->bUseRawModeComptaglineAllocation);
954
955 NV_ASSERT_OR_RETURN_VOID(vgpuIsCallingContextPlugin(pGpu, &bCallingContextPlugin) == NV_OK);
956 if (IS_VIRTUAL_WITH_SRIOV(pGpu) || bCallingContextPlugin ||
957 pMemorySystemConfig->bUseRawModeComptaglineAllocation)
958 {
959 // In raw mode or when SR-IOV is enabled, HW handles compression tags
960 comprInfo.compTagLineMin = 1;
961 }
962 else
963 {
964 comprInfo.compTagLineMin = memmgrDetermineComptag_HAL(pGpu, GPU_GET_MEMORY_MANAGER(pGpu), physAddr);
965 }
966
967 comprInfo.compPageIndexLo = surfOffset >> pCompr->compPageShift;
968 comprInfo.compPageIndexHi = (surfOffset + mmuFmtLevelPageSize(pLevel) - 1) >> pCompr->compPageShift;
969 comprInfo.compTagLineMultiplier = 1;
970 }
971 else
972 {
973 comprInfo.compPageIndexLo = pCompr->compPageIndexLo;
974 comprInfo.compPageIndexHi = pCompr->compPageIndexHi;
975 comprInfo.compTagLineMin = pCompr->compTagLineMin;
976 comprInfo.compTagLineMultiplier = pCompr->compTagLineMultiplier;
977 }
978 }
979
980 gmmuFmtInitPteCompTags(pFmt, pLevel, &comprInfo, surfOffset, pteIndex, 1, pEntries);
981 }
982
983 NV_STATUS
kgmmuFaultBufferGetAddressSpace_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index,NvU32 * pFaultBufferAddrSpace,NvU32 * pFaultBufferAttr)984 kgmmuFaultBufferGetAddressSpace_IMPL
985 (
986 OBJGPU *pGpu,
987 KernelGmmu *pKernelGmmu,
988 NvU32 index,
989 NvU32 *pFaultBufferAddrSpace,
990 NvU32 *pFaultBufferAttr
991 )
992 {
993 NvU32 faultBufferAddrSpace = ADDR_UNKNOWN;
994 NvU32 faultBufferAttr = 0;
995 NvBool bAllocInVidmem = NV_FALSE;
996
997 bAllocInVidmem = gpuIsCCFeatureEnabled(pGpu);
998
999 NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
1000
1001 if (index == NON_REPLAYABLE_FAULT_BUFFER)
1002 {
1003 faultBufferAddrSpace = bAllocInVidmem ? ADDR_FBMEM : ADDR_SYSMEM;
1004 faultBufferAttr = bAllocInVidmem ? NV_MEMORY_UNCACHED : NV_MEMORY_CACHED;
1005 memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC_3, _UVM_FAULT_BUFFER_NONREPLAYABLE, pGpu->instLocOverrides3),
1006 "UVM non-replayable fault", &faultBufferAddrSpace, &faultBufferAttr);
1007 }
1008 else if (index == REPLAYABLE_FAULT_BUFFER)
1009 {
1010 faultBufferAddrSpace = bAllocInVidmem ? ADDR_FBMEM : ADDR_SYSMEM;
1011 faultBufferAttr = bAllocInVidmem ? NV_MEMORY_UNCACHED : NV_MEMORY_CACHED;
1012 memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC_4, _UVM_FAULT_BUFFER_REPLAYABLE, pGpu->instLocOverrides4),
1013 "UVM replayable fault", &faultBufferAddrSpace, &faultBufferAttr);
1014 }
1015 //
1016 // Whenever Hopper CC is enabled, HW requires both replayable and non-replayable
1017 // fault buffers to be in CPR vidmem. It would be illegal to allocate the buffers
1018 // in any other aperture
1019 //
1020 if (bAllocInVidmem && (faultBufferAddrSpace == ADDR_SYSMEM))
1021 {
1022 NV_PRINTF(LEVEL_ERROR, "Fault buffers must be in CPR vidmem when HCC is enabled\n");
1023 NV_ASSERT(0);
1024 return NV_ERR_INVALID_ARGUMENT;
1025 }
1026
1027 if (pFaultBufferAddrSpace != NULL)
1028 {
1029 *pFaultBufferAddrSpace = faultBufferAddrSpace;
1030 }
1031
1032 if (pFaultBufferAttr != NULL)
1033 {
1034 *pFaultBufferAttr = faultBufferAttr;
1035 }
1036
1037 return NV_OK;
1038 }
1039
1040 NV_STATUS
kgmmuFaultBufferCreateMemDesc_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index,NvU32 faultBufferSize,NvU64 memDescFlags,MEMORY_DESCRIPTOR ** ppMemDesc)1041 kgmmuFaultBufferCreateMemDesc_IMPL
1042 (
1043 OBJGPU *pGpu,
1044 KernelGmmu *pKernelGmmu,
1045 NvU32 index,
1046 NvU32 faultBufferSize,
1047 NvU64 memDescFlags,
1048 MEMORY_DESCRIPTOR **ppMemDesc
1049 )
1050 {
1051 NV_STATUS status;
1052 MEMORY_DESCRIPTOR *pMemDesc = NULL;
1053 NvU32 faultBufferAddrSpace = ADDR_UNKNOWN;
1054 NvU32 faultBufferAttr = 0;
1055 NvBool isContiguous = NV_FALSE;
1056
1057 NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
1058
1059 status = kgmmuFaultBufferGetAddressSpace(pGpu, pKernelGmmu, index,
1060 &faultBufferAddrSpace, &faultBufferAttr);
1061 if (status != NV_OK)
1062 {
1063 return status;
1064 }
1065
1066 if ((IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu))
1067 || gpuIsCCFeatureEnabled(pGpu)
1068 )
1069 {
1070 // Allocate contiguous fault buffers for SR-IOV Heavy
1071 // Fault buffers get allocated in CPR vidmem when Hopper CC is enabled
1072 // We're almost assured to get contiguous allocations in vidmem
1073 isContiguous = NV_TRUE;
1074 }
1075
1076 status = memdescCreate(&pMemDesc, pGpu,
1077 RM_PAGE_ALIGN_UP(faultBufferSize), 0, isContiguous,
1078 faultBufferAddrSpace, faultBufferAttr,
1079 (memDescFlags | MEMDESC_FLAGS_LOST_ON_SUSPEND));
1080 if (status != NV_OK)
1081 {
1082 return status;
1083 }
1084
1085 //
1086 // GPU doesn't read faultbuffer memory, so if faultBuffers are in sysmem, ensure that GpuCacheAttr
1087 // is set to UNCACHED as having a vol bit set in PTEs will ensure HUB uses L2Bypass mode and it will
1088 // save extra cycles to cache in L2 while MMU will write fault packets.
1089 //
1090 if (faultBufferAddrSpace == ADDR_SYSMEM &&
1091 pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_SYSMEM_FAULT_BUFFER_GPU_UNCACHED))
1092 {
1093 memdescSetGpuCacheAttrib(pMemDesc, NV_MEMORY_UNCACHED);
1094 }
1095
1096 memdescSetPageSize(pMemDesc, AT_GPU, RM_PAGE_SIZE);
1097
1098 *ppMemDesc = pMemDesc;
1099
1100 return NV_OK;
1101 }
1102
1103 NV_STATUS
kgmmuFaultBufferUnregister_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index)1104 kgmmuFaultBufferUnregister_IMPL
1105 (
1106 OBJGPU *pGpu,
1107 KernelGmmu *pKernelGmmu,
1108 NvU32 index
1109 )
1110 {
1111 struct HW_FAULT_BUFFER *pFaultBuffer;
1112 MEMORY_DESCRIPTOR *pMemDesc;
1113
1114 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[index];
1115 pMemDesc = pFaultBuffer->pFaultBufferMemDesc;
1116
1117 pFaultBuffer->faultBufferSize = 0;
1118 pFaultBuffer->pFaultBufferMemDesc = NULL;
1119
1120 memdescDestroy(pMemDesc);
1121
1122 return NV_OK;
1123 }
1124
1125 NV_STATUS
kgmmuFaultBufferAlloc_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index,NvU32 faultBufferSize)1126 kgmmuFaultBufferAlloc_IMPL
1127 (
1128 OBJGPU *pGpu,
1129 KernelGmmu *pKernelGmmu,
1130 NvU32 index,
1131 NvU32 faultBufferSize
1132 )
1133 {
1134 NV_STATUS status;
1135 MEMORY_DESCRIPTOR *pMemDesc = NULL;
1136 struct HW_FAULT_BUFFER *pFaultBuffer;
1137 const char *name = (index == REPLAYABLE_FAULT_BUFFER ? NV_RM_SURF_NAME_REPLAYABLE_FAULT_BUFFER : NV_RM_SURF_NAME_NONREPLAYABLE_FAULT_BUFFER);
1138
1139 NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
1140
1141 if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
1142 return NV_OK;
1143
1144 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[index];
1145
1146 status = kgmmuFaultBufferCreateMemDesc(pGpu, pKernelGmmu, index, faultBufferSize,
1147 MEMDESC_FLAGS_NONE, &pMemDesc);
1148 if (status != NV_OK)
1149 {
1150 return status;
1151 }
1152
1153 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_31,
1154 pMemDesc);
1155 if (status != NV_OK)
1156 {
1157 memdescDestroy(pMemDesc);
1158 return status;
1159 }
1160
1161 memdescSetName(pGpu, pMemDesc, name, NULL);
1162
1163 pFaultBuffer->faultBufferSize = faultBufferSize;
1164 pFaultBuffer->pFaultBufferMemDesc = pMemDesc;
1165
1166 return status;
1167 }
1168
1169 NV_STATUS
kgmmuFaultBufferFree_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index)1170 kgmmuFaultBufferFree_IMPL
1171 (
1172 OBJGPU *pGpu,
1173 KernelGmmu *pKernelGmmu,
1174 NvU32 index
1175 )
1176 {
1177 struct HW_FAULT_BUFFER *pFaultBuffer;
1178
1179 NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
1180
1181 if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
1182 return NV_OK;
1183
1184 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[index];
1185
1186 memdescFree(pFaultBuffer->pFaultBufferMemDesc);
1187
1188 kgmmuFaultBufferUnregister(pGpu, pKernelGmmu, index);
1189
1190 return NV_OK;
1191 }
1192
1193 NV_STATUS
kgmmuFaultBufferReplayableAllocate_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvHandle hClient,NvHandle hObject)1194 kgmmuFaultBufferReplayableAllocate_IMPL
1195 (
1196 OBJGPU *pGpu,
1197 KernelGmmu *pKernelGmmu,
1198 NvHandle hClient,
1199 NvHandle hObject
1200 )
1201 {
1202 NV_STATUS status;
1203 struct HW_FAULT_BUFFER *pFaultBuffer;
1204 NvU32 faultBufferSize;
1205 NvU32 numBufferPages;
1206 const NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo = kgmmuGetStaticInfo(pGpu, pKernelGmmu);
1207
1208 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
1209 pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
1210 {
1211 return NV_OK;
1212 }
1213
1214 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[REPLAYABLE_FAULT_BUFFER];
1215 if (pFaultBuffer->pFaultBufferMemDesc != NULL)
1216 {
1217 return NV_ERR_NOT_SUPPORTED;
1218 }
1219
1220 faultBufferSize = pStaticInfo->replayableFaultBufferSize;
1221
1222 status = kgmmuFaultBufferAlloc(pGpu, pKernelGmmu,
1223 REPLAYABLE_FAULT_BUFFER,
1224 faultBufferSize);
1225 if (status != NV_OK)
1226 {
1227 return status;
1228 }
1229
1230 if (IS_GSP_CLIENT(pGpu))
1231 {
1232 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
1233 NV2080_CTRL_INTERNAL_GMMU_REGISTER_FAULT_BUFFER_PARAMS *pParams;
1234
1235 pParams = portMemAllocNonPaged(sizeof(*pParams));
1236 if (pParams == NULL)
1237 {
1238 kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1239 return NV_ERR_NO_MEMORY;
1240 }
1241 portMemSet(pParams, 0, sizeof(*pParams));
1242
1243 numBufferPages = RM_PAGE_ALIGN_UP(faultBufferSize) / RM_PAGE_SIZE;
1244 if (numBufferPages > NV_ARRAY_ELEMENTS(pParams->faultBufferPteArray))
1245 {
1246 portMemFree(pParams);
1247 kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1248 return NV_ERR_BUFFER_TOO_SMALL;
1249 }
1250
1251 memdescGetPhysAddrs(pFaultBuffer->pFaultBufferMemDesc,
1252 AT_GPU, 0, RM_PAGE_SIZE,
1253 numBufferPages, pParams->faultBufferPteArray);
1254
1255 pParams->hClient = hClient;
1256 pParams->hObject = hObject;
1257 pParams->faultBufferSize = faultBufferSize;
1258
1259 status = pRmApi->Control(pRmApi,
1260 pGpu->hInternalClient,
1261 pGpu->hInternalSubdevice,
1262 NV2080_CTRL_CMD_INTERNAL_GMMU_REGISTER_FAULT_BUFFER,
1263 pParams, sizeof(*pParams));
1264
1265 portMemFree(pParams);
1266 if (status != NV_OK)
1267 {
1268 kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1269 return status;
1270 }
1271 }
1272 else
1273 {
1274 status = kgmmuFaultBufferLoad_HAL(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER, GPU_GFID_PF);
1275
1276 if (status != NV_OK)
1277 {
1278 kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1279 return status;
1280 }
1281
1282 // for non-gsp builds, set the pdb property here.
1283 pKernelGmmu->setProperty(pKernelGmmu,
1284 PDB_PROP_KGMMU_REPLAYABLE_FAULT_BUFFER_IN_USE,
1285 NV_TRUE);
1286 }
1287
1288 pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferClient = hClient;
1289 pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferObject = hObject;
1290 pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].faultBufferGenerationCounter = 0;
1291
1292 return NV_OK;
1293 }
1294
1295 NV_STATUS
kgmmuFaultBufferReplayableDestroy_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)1296 kgmmuFaultBufferReplayableDestroy_IMPL
1297 (
1298 OBJGPU *pGpu,
1299 KernelGmmu *pKernelGmmu
1300 )
1301 {
1302 NV_STATUS status = NV_OK;
1303 struct HW_FAULT_BUFFER *pFaultBuffer;
1304
1305 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
1306 pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
1307 {
1308 return NV_OK;
1309 }
1310
1311 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[REPLAYABLE_FAULT_BUFFER];
1312 if (pFaultBuffer->pFaultBufferMemDesc == NULL)
1313 {
1314 return NV_OK;
1315 }
1316
1317 pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferClient = 0;
1318 pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferObject = 0;
1319
1320 if (IS_GSP_CLIENT(pGpu))
1321 {
1322 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
1323 status = pRmApi->Control(pRmApi,
1324 pGpu->hInternalClient,
1325 pGpu->hInternalSubdevice,
1326 NV2080_CTRL_CMD_INTERNAL_GMMU_UNREGISTER_FAULT_BUFFER,
1327 NULL, 0);
1328 if (status != NV_OK)
1329 {
1330 NV_PRINTF(LEVEL_ERROR,
1331 "Unregistering Replayable Fault buffer failed (status=0x%08x), proceeding...\n",
1332 status);
1333 }
1334 }
1335 else
1336 {
1337 status = kgmmuFaultBufferUnload_HAL(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER, GPU_GFID_PF);
1338 if (status != NV_OK)
1339 {
1340 NV_PRINTF(LEVEL_ERROR,
1341 "Unloading Replayable Fault buffer failed (status=0x%08x), proceeding...\n",
1342 status);
1343 }
1344 else
1345 {
1346 // for non-gsp builds, reset the pdb property here.
1347 pKernelGmmu->setProperty(pKernelGmmu,
1348 PDB_PROP_KGMMU_REPLAYABLE_FAULT_BUFFER_IN_USE,
1349 NV_FALSE);
1350 }
1351 }
1352
1353 if (RMCFG_FEATURE_PLATFORM_GSP)
1354 {
1355 status = kgmmuFaultBufferUnregister(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1356 }
1357 else
1358 {
1359 status = kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
1360 }
1361
1362 if (status != NV_OK)
1363 {
1364 NV_PRINTF(LEVEL_ERROR,
1365 "Destroying Replayable Fault buffer failed (status=0x%08x), proceeding...\n",
1366 status);
1367 }
1368
1369 return NV_OK;
1370 }
1371
1372 /*!
1373 * @brief: Encodes peer addresses to support NVSwitch systems.
1374 *
1375 * This function prepends the fabricBaseAddress to a physical address in order
1376 * to generate a unique peer address from the global fabric address space.
1377 *
1378 * @param[in] pAddresses : Array of physical addresses to be encoded.
1379 * @param[in] fabricBaseAddress : Unique fabric base address.
1380 * @param[in] count : Count if physical addresses.
1381 */
1382 static void
_kgmmuEncodePeerAddrs(NvU64 * pAddresses,NvU64 fabricBaseAddress,NvU64 count)1383 _kgmmuEncodePeerAddrs
1384 (
1385 NvU64 *pAddresses,
1386 NvU64 fabricBaseAddress,
1387 NvU64 count
1388 )
1389 {
1390 NvU64 i;
1391
1392 //
1393 // If there is no fabric address, it should be a NOP. Note, this acts as an
1394 // early complete path for other PEER addressing.
1395 //
1396 if (fabricBaseAddress == NVLINK_INVALID_FABRIC_ADDR)
1397 {
1398 return;
1399 }
1400
1401 for (i = 0; i < count; i++)
1402 {
1403 pAddresses[i] = fabricBaseAddress + pAddresses[i];
1404 }
1405 }
1406
1407 void
kgmmuEncodePhysAddrs_IMPL(KernelGmmu * pKernelGmmu,const GMMU_APERTURE aperture,NvU64 * pAddresses,NvU64 fabricBaseAddress,NvU64 count)1408 kgmmuEncodePhysAddrs_IMPL
1409 (
1410 KernelGmmu *pKernelGmmu,
1411 const GMMU_APERTURE aperture,
1412 NvU64 *pAddresses,
1413 NvU64 fabricBaseAddress,
1414 NvU64 count
1415 )
1416 {
1417 NV_ASSERT(aperture != GMMU_APERTURE_INVALID);
1418
1419 if (aperture == GMMU_APERTURE_SYS_COH ||
1420 aperture == GMMU_APERTURE_SYS_NONCOH)
1421 {
1422 kgmmuEncodeSysmemAddrs_HAL(pKernelGmmu, pAddresses, count);
1423 }
1424 else if (aperture == GMMU_APERTURE_PEER)
1425 {
1426 _kgmmuEncodePeerAddrs(pAddresses, fabricBaseAddress, count);
1427 }
1428 else
1429 {
1430 return;
1431 }
1432 }
1433
1434 NvU64
kgmmuEncodePhysAddr_IMPL(KernelGmmu * pKernelGmmu,const GMMU_APERTURE aperture,NvU64 physAddr,NvU64 fabricBaseAddress)1435 kgmmuEncodePhysAddr_IMPL
1436 (
1437 KernelGmmu *pKernelGmmu,
1438 const GMMU_APERTURE aperture,
1439 NvU64 physAddr,
1440 NvU64 fabricBaseAddress
1441 )
1442 {
1443 kgmmuEncodePhysAddrs(pKernelGmmu, aperture, &physAddr, fabricBaseAddress, 1);
1444 return physAddr;
1445 }
1446
1447 static void
_kgmmuClientShadowBufferQueueCopyData(NvLength msgSize,NvLength opIdx,QueueContext * pCtx,void * pData,NvLength count,NvBool bCopyIn)1448 _kgmmuClientShadowBufferQueueCopyData
1449 (
1450 NvLength msgSize,
1451 NvLength opIdx,
1452 QueueContext *pCtx,
1453 void *pData,
1454 NvLength count,
1455 NvBool bCopyIn
1456 )
1457 {
1458 NvLength size;
1459 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer = pCtx->pData;
1460 NvU8 *pQueueData, *pClientData = pData;
1461 void *pDst, *pSrc;
1462
1463 if (count == 0)
1464 return;
1465
1466 size = count * msgSize;
1467 pQueueData = KERNEL_POINTER_FROM_NvP64(NvU8 *, pClientShadowFaultBuffer->pBufferAddress);
1468 pQueueData = pQueueData + (opIdx * msgSize);
1469
1470 pDst = bCopyIn ? pQueueData : pClientData;
1471 pSrc = bCopyIn ? pClientData : pQueueData;
1472 portMemCopy(pDst, size, pSrc, size);
1473 }
1474
1475 static NV_STATUS
_kgmmuClientShadowFaultBufferQueueAllocate(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,FAULT_BUFFER_TYPE index)1476 _kgmmuClientShadowFaultBufferQueueAllocate
1477 (
1478 OBJGPU *pGpu,
1479 KernelGmmu *pKernelGmmu,
1480 FAULT_BUFFER_TYPE index
1481 )
1482 {
1483 NV_STATUS status;
1484 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1485 MEMORY_DESCRIPTOR *pQueueMemDesc;
1486 NvU64 flags = MEMDESC_FLAGS_NONE;
1487
1488 //
1489 // On systems with SEV enabled, the client shadow buffers should be allocated
1490 // in unprotected sysmem as GSP will be writing the fault packets to these
1491 // buffers. Since GSP will be encrypting the fault packets, we don't risk
1492 // leaking any information
1493 //
1494 flags |= MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY;
1495
1496 //
1497 // Shadow fault buffers are not implemented using circular queues when
1498 // Hopper CC is enabled
1499 //
1500 if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu))
1501 return NV_OK;
1502
1503 pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index];
1504
1505 status = memdescCreate(&pQueueMemDesc, pGpu,
1506 sizeof(GMMU_SHADOW_FAULT_BUF), RM_PAGE_SIZE,
1507 NV_TRUE, ADDR_SYSMEM, NV_MEMORY_CACHED,
1508 flags);
1509 if (status != NV_OK)
1510 {
1511 return status;
1512 }
1513
1514 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_32,
1515 pQueueMemDesc);
1516 if (status != NV_OK)
1517 {
1518 memdescDestroy(pQueueMemDesc);
1519 return status;
1520 }
1521
1522 status = memdescMap(pQueueMemDesc, 0,
1523 memdescGetSize(pQueueMemDesc),
1524 NV_TRUE, NV_PROTECT_READ_WRITE,
1525 &pClientShadowFaultBuffer->pQueueAddress,
1526 &pClientShadowFaultBuffer->pQueuePriv);
1527 if (status != NV_OK)
1528 {
1529 memdescFree(pQueueMemDesc);
1530 memdescDestroy(pQueueMemDesc);
1531 return status;
1532 }
1533
1534 pClientShadowFaultBuffer->queueContext.pCopyData = _kgmmuClientShadowBufferQueueCopyData;
1535 pClientShadowFaultBuffer->queueContext.pData = pClientShadowFaultBuffer;
1536 pClientShadowFaultBuffer->pQueueMemDesc = pQueueMemDesc;
1537
1538 return NV_OK;
1539 }
1540
1541 void
kgmmuClientShadowFaultBufferQueueDestroy_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvBool bFreeQueue,FAULT_BUFFER_TYPE index)1542 kgmmuClientShadowFaultBufferQueueDestroy_IMPL
1543 (
1544 OBJGPU *pGpu,
1545 KernelGmmu *pKernelGmmu,
1546 NvBool bFreeQueue,
1547 FAULT_BUFFER_TYPE index
1548 )
1549 {
1550 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1551 MEMORY_DESCRIPTOR *pQueueMemDesc;
1552
1553 //
1554 // Shadow fault buffers are not implemented using circular queues when
1555 // Hopper CC is enabled. So, there is nothing to free here
1556 //
1557 if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu))
1558 return;
1559
1560 pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index];
1561
1562 pQueueMemDesc = pClientShadowFaultBuffer->pQueueMemDesc;
1563
1564 pClientShadowFaultBuffer->pQueueMemDesc = NULL;
1565 pClientShadowFaultBuffer->pQueueAddress = NvP64_NULL;
1566 pClientShadowFaultBuffer->pQueuePriv = NvP64_NULL;
1567
1568 if (bFreeQueue)
1569 {
1570 memdescFree(pQueueMemDesc);
1571 }
1572 memdescDestroy(pQueueMemDesc);
1573 }
1574
1575 static NV_STATUS
_kgmmuClientShadowFaultBufferPagesAllocate(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 shadowFaultBufferSize,NvU32 shadowFaultBufferMetadataSize,FAULT_BUFFER_TYPE index)1576 _kgmmuClientShadowFaultBufferPagesAllocate
1577 (
1578 OBJGPU *pGpu,
1579 KernelGmmu *pKernelGmmu,
1580 NvU32 shadowFaultBufferSize,
1581 NvU32 shadowFaultBufferMetadataSize,
1582 FAULT_BUFFER_TYPE index
1583 )
1584 {
1585 NV_STATUS status;
1586 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1587 MEMORY_DESCRIPTOR *pMemDesc;
1588 NvU64 flags = MEMDESC_FLAGS_NONE;
1589 NvU32 shadowFaultBufferSizeTotal;
1590
1591 //
1592 // On systems with SEV enabled, the client shadow buffers should be allocated
1593 // in unprotected sysmem as GSP will be writing the fault packets to these
1594 // buffers. Since GSP will be encrypting the fault packets, we don't risk
1595 // leaking any information
1596 //
1597 flags |= MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY;
1598
1599 pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index];
1600
1601 shadowFaultBufferSizeTotal = RM_PAGE_ALIGN_UP(shadowFaultBufferSize) + RM_PAGE_ALIGN_UP(shadowFaultBufferMetadataSize);
1602
1603 status = memdescCreate(&pMemDesc, pGpu,
1604 shadowFaultBufferSizeTotal, RM_PAGE_SIZE,
1605 NV_FALSE, ADDR_SYSMEM, NV_MEMORY_CACHED,
1606 flags);
1607 if (status != NV_OK)
1608 {
1609 return status;
1610 }
1611
1612 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_33,
1613 pMemDesc);
1614 if (status != NV_OK)
1615 {
1616 memdescDestroy(pMemDesc);
1617 return status;
1618 }
1619
1620 status = memdescMap(pMemDesc, 0,
1621 memdescGetSize(pMemDesc),
1622 NV_TRUE, NV_PROTECT_READ_WRITE,
1623 &pClientShadowFaultBuffer->pBufferAddress,
1624 &pClientShadowFaultBuffer->pBufferPriv);
1625 if (status != NV_OK)
1626 {
1627 memdescFree(pMemDesc);
1628 memdescDestroy(pMemDesc);
1629 return status;
1630 }
1631
1632 pClientShadowFaultBuffer->pFaultBufferMetadataAddress =
1633 ((NvP64)(((NvU64) pClientShadowFaultBuffer->pBufferAddress) +
1634 RM_PAGE_ALIGN_UP(shadowFaultBufferSize)));
1635 pClientShadowFaultBuffer->pBufferMemDesc = pMemDesc;
1636
1637 return NV_OK;
1638 }
1639
1640 void
kgmmuClientShadowFaultBufferPagesDestroy_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvBool bFreePages,FAULT_BUFFER_TYPE index)1641 kgmmuClientShadowFaultBufferPagesDestroy_IMPL
1642 (
1643 OBJGPU *pGpu,
1644 KernelGmmu *pKernelGmmu,
1645 NvBool bFreePages,
1646 FAULT_BUFFER_TYPE index
1647 )
1648 {
1649 MEMORY_DESCRIPTOR *pMemDesc;
1650 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1651 GMMU_FAULT_BUFFER_PAGE *pBufferPage;
1652 NvU32 i;
1653
1654 pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index];
1655 pMemDesc = pClientShadowFaultBuffer->pBufferMemDesc;
1656
1657 if (bFreePages)
1658 {
1659 memdescUnmap(pMemDesc,
1660 NV_TRUE, osGetCurrentProcess(),
1661 pClientShadowFaultBuffer->pBufferAddress,
1662 pClientShadowFaultBuffer->pBufferPriv);
1663
1664 memdescFree(pMemDesc);
1665 }
1666 else
1667 {
1668 for (i = 0; i < pClientShadowFaultBuffer->numBufferPages; i++)
1669 {
1670 pBufferPage = &pClientShadowFaultBuffer->pBufferPages[i];
1671
1672 memdescUnmap(pMemDesc, NV_TRUE, osGetCurrentProcess(),
1673 pBufferPage->pAddress, pBufferPage->pPriv);
1674 }
1675 portMemFree(pClientShadowFaultBuffer->pBufferPages);
1676 }
1677 memdescDestroy(pMemDesc);
1678 }
1679
1680 NV_STATUS
kgmmuClientShadowFaultBufferRegister_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,FAULT_BUFFER_TYPE index)1681 kgmmuClientShadowFaultBufferRegister_IMPL
1682 (
1683 OBJGPU *pGpu,
1684 KernelGmmu *pKernelGmmu,
1685 FAULT_BUFFER_TYPE index
1686 )
1687 {
1688 NV_STATUS status = NV_OK;
1689 struct GMMU_FAULT_BUFFER *pFaultBuffer;
1690 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1691 GMMU_SHADOW_FAULT_BUF *pQueue;
1692 MEMORY_DESCRIPTOR *pBufferMemDesc;
1693 RmPhysAddr shadowFaultBufferQueuePhysAddr;
1694 NvU32 queueCapacity, numBufferPages;
1695 NvU32 faultBufferSize;
1696 NvU32 shadowFaultBufferMetadataSize;
1697 const NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo = kgmmuGetStaticInfo(pGpu, pKernelGmmu);
1698 NvBool bQueueAllocated = NV_FALSE;
1699
1700 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF];
1701 pClientShadowFaultBuffer = &pFaultBuffer->clientShadowFaultBuffer[index];
1702
1703 if (index == NON_REPLAYABLE_FAULT_BUFFER)
1704 {
1705 faultBufferSize = pStaticInfo->nonReplayableFaultBufferSize;
1706 shadowFaultBufferMetadataSize = pStaticInfo->nonReplayableShadowFaultBufferMetadataSize;
1707 }
1708 else if (index == REPLAYABLE_FAULT_BUFFER)
1709 {
1710 faultBufferSize = pStaticInfo->replayableFaultBufferSize;
1711 shadowFaultBufferMetadataSize = pStaticInfo->replayableShadowFaultBufferMetadataSize;
1712 }
1713 else
1714 {
1715 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
1716 }
1717
1718 //
1719 // We don't use circular queues for shadow fault buffers when Hopper
1720 // CC is enabled
1721 //
1722 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
1723 {
1724 pQueue = KERNEL_POINTER_FROM_NvP64(GMMU_SHADOW_FAULT_BUF *,
1725 pClientShadowFaultBuffer->pQueueAddress);
1726 queueCapacity = faultBufferSize / NVC369_BUF_SIZE;
1727
1728 status = queueInitNonManaged(pQueue, queueCapacity);
1729 if (status != NV_OK)
1730 {
1731 return status;
1732 }
1733 bQueueAllocated = NV_TRUE;
1734 }
1735
1736 if (!IS_GSP_CLIENT(pGpu))
1737 {
1738 portSyncSpinlockAcquire(pFaultBuffer->pShadowFaultBufLock);
1739
1740 if (pFaultBuffer->pClientShadowFaultBuffer[index] == NULL)
1741 {
1742 pFaultBuffer->pClientShadowFaultBuffer[index] = pClientShadowFaultBuffer;
1743 }
1744 else
1745 {
1746 status = NV_ERR_NOT_SUPPORTED;
1747 }
1748
1749 portSyncSpinlockRelease(pFaultBuffer->pShadowFaultBufLock);
1750
1751 if (status != NV_OK)
1752 {
1753 if (bQueueAllocated)
1754 queueDestroy(pQueue);
1755 return status;
1756 }
1757 }
1758 else
1759 {
1760 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
1761 NV2080_CTRL_INTERNAL_GMMU_REGISTER_CLIENT_SHADOW_FAULT_BUFFER_PARAMS *pParams;
1762
1763 pParams = portMemAllocNonPaged(sizeof(*pParams));
1764 if (pParams == NULL)
1765 {
1766 if (bQueueAllocated)
1767 queueDestroy(pQueue);
1768 return NV_ERR_NO_MEMORY;
1769 }
1770 portMemSet(pParams, 0, sizeof(*pParams));
1771
1772 pBufferMemDesc = pClientShadowFaultBuffer->pBufferMemDesc;
1773 numBufferPages = memdescGetSize(pBufferMemDesc) >> RM_PAGE_SHIFT;
1774 if (numBufferPages > NV_ARRAY_ELEMENTS(pParams->shadowFaultBufferPteArray))
1775 {
1776 portMemFree(pParams);
1777 if (bQueueAllocated)
1778 queueDestroy(pQueue);
1779 return NV_ERR_BUFFER_TOO_SMALL;
1780 }
1781
1782 memdescGetPhysAddrs(pBufferMemDesc,
1783 AT_GPU,
1784 0, RM_PAGE_SIZE,
1785 numBufferPages, pParams->shadowFaultBufferPteArray);
1786
1787 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
1788 {
1789 shadowFaultBufferQueuePhysAddr = memdescGetPhysAddr(pClientShadowFaultBuffer->pQueueMemDesc,
1790 AT_GPU, 0);
1791 pParams->shadowFaultBufferQueuePhysAddr = shadowFaultBufferQueuePhysAddr;
1792 }
1793 pParams->shadowFaultBufferSize = faultBufferSize;
1794 pParams->shadowFaultBufferMetadataSize = shadowFaultBufferMetadataSize;
1795 pParams->shadowFaultBufferType = (index == NON_REPLAYABLE_FAULT_BUFFER) ?
1796 NV2080_CTRL_FAULT_BUFFER_NON_REPLAYABLE :
1797 NV2080_CTRL_FAULT_BUFFER_REPLAYABLE;
1798
1799 if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu) && index == REPLAYABLE_FAULT_BUFFER)
1800 {
1801 pParams->faultBufferSharedMemoryPhysAddr = memdescGetPhysAddr(pClientShadowFaultBuffer->pFaultBufferSharedMemDesc,
1802 AT_GPU, 0);
1803 }
1804
1805 status = pRmApi->Control(pRmApi,
1806 pGpu->hInternalClient,
1807 pGpu->hInternalSubdevice,
1808 NV2080_CTRL_CMD_INTERNAL_GMMU_REGISTER_CLIENT_SHADOW_FAULT_BUFFER,
1809 pParams, sizeof(*pParams));
1810
1811 portMemFree(pParams);
1812 if (status != NV_OK)
1813 {
1814 if (bQueueAllocated)
1815 queueDestroy(pQueue);
1816 return status;
1817 }
1818
1819 pFaultBuffer->pClientShadowFaultBuffer[index] = pClientShadowFaultBuffer;
1820 }
1821
1822 return NV_OK;
1823 }
1824
1825 void
kgmmuClientShadowFaultBufferUnregister_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,FAULT_BUFFER_TYPE index)1826 kgmmuClientShadowFaultBufferUnregister_IMPL
1827 (
1828 OBJGPU *pGpu,
1829 KernelGmmu *pKernelGmmu,
1830 FAULT_BUFFER_TYPE index
1831 )
1832 {
1833 NV_STATUS status = NV_OK;
1834 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1835 GMMU_SHADOW_FAULT_BUF *pQueue;
1836 struct GMMU_FAULT_BUFFER *pFaultBuffer;
1837
1838 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF];
1839
1840 if (!IS_GSP_CLIENT(pGpu))
1841 {
1842 portSyncSpinlockAcquire(pFaultBuffer->pShadowFaultBufLock);
1843
1844 pFaultBuffer->pClientShadowFaultBuffer[index] = NULL;
1845
1846 portSyncSpinlockRelease(pFaultBuffer->pShadowFaultBufLock);
1847 }
1848 else
1849 {
1850 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
1851 NV2080_CTRL_INTERNAL_GMMU_UNREGISTER_CLIENT_SHADOW_FAULT_BUFFER_PARAMS params;
1852
1853 portMemSet(¶ms, 0, sizeof(params));
1854
1855 params.shadowFaultBufferType = (index == NON_REPLAYABLE_FAULT_BUFFER) ?
1856 NV2080_CTRL_FAULT_BUFFER_NON_REPLAYABLE :
1857 NV2080_CTRL_FAULT_BUFFER_REPLAYABLE;
1858 status = pRmApi->Control(pRmApi,
1859 pGpu->hInternalClient,
1860 pGpu->hInternalSubdevice,
1861 NV2080_CTRL_CMD_INTERNAL_GMMU_UNREGISTER_CLIENT_SHADOW_FAULT_BUFFER,
1862 ¶ms, sizeof(params));
1863 if (status != NV_OK)
1864 {
1865 NV_PRINTF(LEVEL_ERROR,
1866 "Unregistering %s fault buffer failed (status=0x%08x), proceeding...\n",
1867 (index == NON_REPLAYABLE_FAULT_BUFFER) ? "non-replayable" : "replayable",
1868 status);
1869 }
1870
1871 pFaultBuffer->pClientShadowFaultBuffer[index] = NULL;
1872 }
1873
1874 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
1875 {
1876 pClientShadowFaultBuffer = &pFaultBuffer->clientShadowFaultBuffer[index];
1877 pQueue = KERNEL_POINTER_FROM_NvP64(GMMU_SHADOW_FAULT_BUF *,
1878 pClientShadowFaultBuffer->pQueueAddress);
1879 queueDestroy(pQueue);
1880 }
1881 }
1882
1883 /*!
1884 * @brief Creates shadow fault buffer for client handling of replayable/non-replayable
1885 * faults in the CPU-RM, and registers it in the GSP-RM.
1886 *
1887 * @param[in] pGpu
1888 * @param[in] pKernelGmmu
1889 * @param[in] index Replayable or non-replayable fault buffer
1890 *
1891 * @returns
1892 */
1893 NV_STATUS
kgmmuClientShadowFaultBufferAllocate_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,FAULT_BUFFER_TYPE index)1894 kgmmuClientShadowFaultBufferAllocate_IMPL
1895 (
1896 OBJGPU *pGpu,
1897 KernelGmmu *pKernelGmmu,
1898 FAULT_BUFFER_TYPE index
1899 )
1900 {
1901 NV_STATUS status;
1902 const NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo = kgmmuGetStaticInfo(pGpu, pKernelGmmu);
1903 NvU32 faultBufferSize;
1904 NvU32 shadowFaultBufferMetadataSize;
1905
1906 ct_assert((RM_PAGE_SIZE % sizeof(struct GMMU_FAULT_PACKET)) == 0);
1907
1908 NV_ASSERT_OR_RETURN(!pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED), NV_ERR_INVALID_STATE);
1909
1910 NV_ASSERT_OR_RETURN(pStaticInfo->nonReplayableFaultBufferSize != 0, NV_ERR_INVALID_STATE);
1911
1912 if (index == NON_REPLAYABLE_FAULT_BUFFER)
1913 {
1914 faultBufferSize = pStaticInfo->nonReplayableFaultBufferSize;
1915 shadowFaultBufferMetadataSize = pStaticInfo->nonReplayableShadowFaultBufferMetadataSize;
1916 }
1917 else if (index == REPLAYABLE_FAULT_BUFFER)
1918 {
1919 faultBufferSize = pStaticInfo->replayableFaultBufferSize;
1920 shadowFaultBufferMetadataSize = pStaticInfo->replayableShadowFaultBufferMetadataSize;
1921 }
1922 else
1923 {
1924 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
1925 }
1926
1927 status = _kgmmuClientShadowFaultBufferQueueAllocate(pGpu, pKernelGmmu, index);
1928 if (status != NV_OK)
1929 {
1930 return status;
1931 }
1932
1933 status = _kgmmuClientShadowFaultBufferPagesAllocate(pGpu, pKernelGmmu,
1934 faultBufferSize,
1935 shadowFaultBufferMetadataSize,
1936 index);
1937 if (status != NV_OK)
1938 {
1939 goto destroy_queue_and_exit;
1940 }
1941
1942 status = kgmmuFaultBufferAllocSharedMemory_HAL(pGpu, pKernelGmmu, index);
1943 if (status != NV_OK)
1944 {
1945 goto destroy_pages_and_exit;
1946 }
1947
1948 status = kgmmuClientShadowFaultBufferRegister(pGpu, pKernelGmmu,
1949 index);
1950 if (status != NV_OK)
1951 {
1952 goto destroy_shared_memory_and_exit;
1953 }
1954
1955 return NV_OK;
1956
1957 destroy_shared_memory_and_exit:
1958 kgmmuFaultBufferFreeSharedMemory_HAL(pGpu, pKernelGmmu, index);
1959 destroy_pages_and_exit:
1960 kgmmuClientShadowFaultBufferPagesDestroy(pGpu, pKernelGmmu, NV_TRUE,
1961 index);
1962 destroy_queue_and_exit:
1963 kgmmuClientShadowFaultBufferQueueDestroy(pGpu, pKernelGmmu, NV_TRUE,
1964 index);
1965 return status;
1966 }
1967
1968 /*!
1969 * @brief Unregister client shadow fault buffer in the GSP-RM or destroy
1970 * it in the CPU-RM.
1971 *
1972 * @param[in] pGpu
1973 * @param[in] pKernelGmmu
1974 *
1975 * @returns
1976 */
1977 NV_STATUS
kgmmuClientShadowFaultBufferDestroy_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,FAULT_BUFFER_TYPE index)1978 kgmmuClientShadowFaultBufferDestroy_IMPL
1979 (
1980 OBJGPU *pGpu,
1981 KernelGmmu *pKernelGmmu,
1982 FAULT_BUFFER_TYPE index
1983 )
1984 {
1985 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
1986 NvBool bFreeMemory = !RMCFG_FEATURE_PLATFORM_GSP;
1987
1988 pClientShadowFaultBuffer =
1989 pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].pClientShadowFaultBuffer[index];
1990
1991 if (pClientShadowFaultBuffer != NvP64_NULL)
1992 {
1993 kgmmuClientShadowFaultBufferUnregister(pGpu, pKernelGmmu,
1994 index);
1995
1996 kgmmuFaultBufferFreeSharedMemory_HAL(pGpu, pKernelGmmu, index);
1997
1998 kgmmuClientShadowFaultBufferPagesDestroy(pGpu, pKernelGmmu, bFreeMemory,
1999 index);
2000 kgmmuClientShadowFaultBufferQueueDestroy(pGpu, pKernelGmmu, bFreeMemory,
2001 index);
2002 }
2003
2004 return NV_OK;
2005 }
2006
2007 /*!
2008 * Returns the minimum allocation size to align to big-page size in bytes
2009 *
2010 * @param[in] pKernelGmmu
2011 *
2012 * @return NvU32
2013 */
2014 NvU64
kgmmuGetMinBigPageSize_IMPL(KernelGmmu * pKernelGmmu)2015 kgmmuGetMinBigPageSize_IMPL(KernelGmmu *pKernelGmmu)
2016 {
2017 //
2018 // Set the minimum size in the heap that we will round up to a big page instead
2019 // just 4KB. HW doesn't like 4KB pages in video memory, but SW wants to pack
2020 // physical memory sometimes. Typically UMDs that really care about perf use
2021 // suballocation for larger RM allocations anyway.
2022 //
2023 // Promote allocates bigger than half the big page size.
2024 // (this is a policy change for Big page sizes/VASpace)
2025 //
2026 return RM_PAGE_SIZE_64K >> 1;
2027 }
2028
2029 /*!
2030 * @brief Initializes the init block for an engine
2031 *
2032 * @param[in] pKernelGmmu
2033 * @param[in] pInstBlkDesc Memory descriptor for the instance block of the engine
2034 * @param[in] pVAS OBJVASPACE pointer of the engine
2035 * @param[in] subctxId subctxId Value
2036 * @param[in] pInstBlkParams Pointer to the structure storing the parameters passed by the caller
2037 *
2038 * @returns NV_STATUS
2039 */
2040 NV_STATUS
kgmmuInstBlkInit_IMPL(KernelGmmu * pKernelGmmu,MEMORY_DESCRIPTOR * pInstBlkDesc,OBJVASPACE * pVAS,NvU32 subctxId,INST_BLK_INIT_PARAMS * pInstBlkParams)2041 kgmmuInstBlkInit_IMPL
2042 (
2043 KernelGmmu *pKernelGmmu,
2044 MEMORY_DESCRIPTOR *pInstBlkDesc,
2045 OBJVASPACE *pVAS,
2046 NvU32 subctxId,
2047 INST_BLK_INIT_PARAMS *pInstBlkParams
2048 )
2049 {
2050 OBJGPU *pGpu = ENG_GET_GPU(pKernelGmmu);
2051 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
2052 NvU8 *pInstBlk; // CPU VA of instance block.
2053 NvU64 vaLimitData;
2054 NvU32 vaLimitOffset;
2055 NvU32 dirBaseHiOffset;
2056 NvU32 dirBaseHiData;
2057 NvU32 dirBaseLoOffset;
2058 NvU32 dirBaseLoData;
2059 NvU32 atsOffset = 0;
2060 NvU32 atsData = 0;
2061 NvU32 pasid;
2062 NvU32 magicValueOffset;
2063 NvU32 magicValueData;
2064 NV_STATUS status = NV_OK;
2065
2066 NV_ASSERT(!gpumgrGetBcEnabledStatus(pGpu));
2067
2068 // Get VA limit
2069 status = kgmmuInstBlkVaLimitGet_HAL(pKernelGmmu, pVAS, subctxId, pInstBlkParams, &vaLimitOffset, &vaLimitData);
2070 NV_ASSERT_OR_RETURN((status == NV_OK), status);
2071
2072 // Get page dir base
2073 NV_ASSERT_OK_OR_RETURN(kgmmuInstBlkPageDirBaseGet_HAL(pGpu, pKernelGmmu,
2074 pVAS, pInstBlkParams, subctxId,
2075 &dirBaseLoOffset, &dirBaseLoData, &dirBaseHiOffset, &dirBaseHiData));
2076
2077 //
2078 // Enable ATS in instance block only when both ATS is enabled in the
2079 // vaspace and a valid PASID is provisioned through
2080 // NV0080_CTRL_CMD_DMA_SET_PAGE_DIRECTORY.
2081 //
2082 if ((pVAS != NULL) && vaspaceIsAtsEnabled(pVAS))
2083 {
2084 if ((status = vaspaceGetPasid(pVAS, &pasid)) == NV_OK)
2085 {
2086 // Coherent link ATS parameters are only set on the new VMM path.
2087 status = kgmmuInstBlkAtsGet_HAL(pKernelGmmu, pVAS, subctxId,
2088 &atsOffset, &atsData);
2089 NV_ASSERT_OR_RETURN((status == NV_OK), status);
2090 }
2091 else
2092 {
2093 // Proceed with ATS disabled in instance block if PASID is not yet provisioned
2094 NV_ASSERT_OR_RETURN((status == NV_ERR_NOT_READY), status);
2095 }
2096 }
2097
2098 status = kgmmuInstBlkMagicValueGet_HAL(pKernelGmmu, &magicValueOffset, &magicValueData);
2099
2100 // Write the fields out
2101 pInstBlk = pInstBlkParams->pInstBlk;
2102
2103 if (pInstBlk != NULL)
2104 {
2105 if (vaLimitOffset != 0)
2106 {
2107 // TO DO: FMODEL fails with MEM_WR64
2108 if (IS_SIMULATION(pGpu))
2109 {
2110 MEM_WR32(pInstBlk + vaLimitOffset + 0, NvU64_LO32(vaLimitData));
2111 MEM_WR32(pInstBlk + vaLimitOffset + 4, NvU64_HI32(vaLimitData));
2112 }
2113 else
2114 {
2115 MEM_WR64(pInstBlk + vaLimitOffset, vaLimitData);
2116 }
2117 }
2118
2119 MEM_WR32(pInstBlk + dirBaseHiOffset, dirBaseHiData);
2120 MEM_WR32(pInstBlk + dirBaseLoOffset, dirBaseLoData);
2121
2122 if (atsOffset != 0)
2123 MEM_WR32(pInstBlk + atsOffset, atsData);
2124
2125 if (status == NV_OK)
2126 MEM_WR32(pInstBlk + magicValueOffset, magicValueData);
2127 }
2128 else
2129 {
2130 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2131
2132 pInstBlk = memmgrMemDescBeginTransfer(pMemoryManager, pInstBlkDesc,
2133 TRANSFER_FLAGS_SHADOW_ALLOC);
2134 if (pInstBlk == NULL)
2135 {
2136 return NV_ERR_INSUFFICIENT_RESOURCES;
2137 }
2138
2139 if (vaLimitOffset != 0)
2140 {
2141 // TO DO: FMODEL fails with MEM_WR64
2142 if (IS_SIMULATION(pGpu))
2143 {
2144 MEM_WR32(pInstBlk + vaLimitOffset + 0, NvU64_LO32(vaLimitData));
2145 MEM_WR32(pInstBlk + vaLimitOffset + 4, NvU64_HI32(vaLimitData));
2146 }
2147 else
2148 {
2149 MEM_WR64(pInstBlk + vaLimitOffset, vaLimitData);
2150 }
2151 }
2152
2153 MEM_WR32(pInstBlk + dirBaseHiOffset, dirBaseHiData);
2154 MEM_WR32(pInstBlk + dirBaseLoOffset, dirBaseLoData);
2155
2156 if (atsOffset != 0)
2157 MEM_WR32(pInstBlk + atsOffset, atsData);
2158
2159 if (status == NV_OK)
2160 MEM_WR32(pInstBlk + magicValueOffset, magicValueData);
2161
2162 memmgrMemDescEndTransfer(pMemoryManager, pInstBlkDesc,
2163 TRANSFER_FLAGS_SHADOW_ALLOC);
2164 }
2165
2166 if (!pInstBlkParams->bDeferFlush)
2167 {
2168 kbusFlush_HAL(pGpu, pKernelBus, kbusGetFlushAperture(pKernelBus, memdescGetAddressSpace(pInstBlkDesc)));
2169 }
2170
2171 return NV_OK;
2172 }
2173
2174 GMMU_APERTURE
kgmmuGetExternalAllocAperture_IMPL(NvU32 addressSpace)2175 kgmmuGetExternalAllocAperture_IMPL
2176 (
2177 NvU32 addressSpace
2178 )
2179 {
2180 switch (addressSpace)
2181 {
2182 case ADDR_FBMEM:
2183 return GMMU_APERTURE_VIDEO;
2184 case ADDR_FABRIC_V2:
2185 case ADDR_FABRIC_MC:
2186 return GMMU_APERTURE_PEER;
2187 case ADDR_SYSMEM:
2188 case ADDR_VIRTUAL:
2189 return GMMU_APERTURE_SYS_COH;
2190 default:
2191 NV_PRINTF(LEVEL_ERROR, "Unexpected addressSpace (%u) when mapping to GMMU_APERTURE.\n",
2192 addressSpace);
2193 NV_ASSERT(0);
2194 return GMMU_APERTURE_SYS_COH;
2195 }
2196 }
2197
2198 /*!
2199 * @brief
2200 *
2201 * @param pGpu
2202 * @param pKernelGmmu
2203 * @param bOwnedByRm
2204 */
2205 void
kgmmuAccessCntrChangeIntrOwnership_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvBool bOwnedByRm)2206 kgmmuAccessCntrChangeIntrOwnership_IMPL
2207 (
2208 OBJGPU *pGpu,
2209 KernelGmmu *pKernelGmmu,
2210 NvBool bOwnedByRm
2211 )
2212 {
2213 //
2214 // Disable the interrupt when RM loses the ownership and enable it back when
2215 // RM regains it. nvUvmInterfaceOwnAccessCntIntr() will rely on this behavior.
2216 //
2217 if (bOwnedByRm)
2218 pKernelGmmu->uvmSharedIntrRmOwnsMask |= RM_UVM_SHARED_INTR_MASK_HUB_ACCESS_COUNTER_NOTIFY;
2219 else
2220 pKernelGmmu->uvmSharedIntrRmOwnsMask &= ~RM_UVM_SHARED_INTR_MASK_HUB_ACCESS_COUNTER_NOTIFY;
2221 }
2222
2223 /**
2224 * @brief Provides an opportunity to register some IntrService during intrStateInit.
2225 */
2226 void
kgmmuRegisterIntrService_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,IntrServiceRecord pRecords[MC_ENGINE_IDX_MAX])2227 kgmmuRegisterIntrService_IMPL
2228 (
2229 OBJGPU *pGpu,
2230 KernelGmmu *pKernelGmmu,
2231 IntrServiceRecord pRecords[MC_ENGINE_IDX_MAX]
2232 )
2233 {
2234 NvU32 engineIdx;
2235 NvU16 *pEngineIdxList;
2236 NvU32 listSize;
2237
2238 static NvU16 engineIdxList[] = {
2239 MC_ENGINE_IDX_REPLAYABLE_FAULT,
2240 MC_ENGINE_IDX_REPLAYABLE_FAULT_ERROR,
2241 };
2242
2243 static NvU16 engineIdxListForCC[] = {
2244 MC_ENGINE_IDX_REPLAYABLE_FAULT_CPU,
2245 MC_ENGINE_IDX_NON_REPLAYABLE_FAULT_CPU,
2246 };
2247
2248 if (IS_GSP_CLIENT(pGpu) && gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu))
2249 {
2250 pEngineIdxList = engineIdxListForCC;
2251 listSize = NV_ARRAY_ELEMENTS(engineIdxListForCC);
2252 }
2253 else
2254 {
2255 pEngineIdxList = engineIdxList;
2256 listSize = NV_ARRAY_ELEMENTS(engineIdxList);
2257 }
2258
2259 for (NvU32 tableIdx = 0; tableIdx < listSize; tableIdx++)
2260 {
2261 engineIdx = (pEngineIdxList)[tableIdx];
2262 NV_ASSERT(pRecords[engineIdx].pInterruptService == NULL);
2263 pRecords[engineIdx].pInterruptService = staticCast(pKernelGmmu, IntrService);
2264 }
2265
2266 if (!IS_GSP_CLIENT(pGpu))
2267 {
2268 engineIdx = MC_ENGINE_IDX_GMMU;
2269 NV_ASSERT(pRecords[engineIdx].pInterruptService == NULL);
2270 pRecords[engineIdx].pInterruptService = staticCast(pKernelGmmu, IntrService);
2271
2272 NV_ASSERT(pRecords[engineIdx].pNotificationService == NULL);
2273 pRecords[engineIdx].bFifoWaiveNotify = NV_FALSE;
2274 pRecords[engineIdx].pNotificationService = staticCast(pKernelGmmu, IntrService);
2275
2276 static NvU16 physicalEngineIdxList[] = {
2277 MC_ENGINE_IDX_NON_REPLAYABLE_FAULT,
2278 MC_ENGINE_IDX_NON_REPLAYABLE_FAULT_ERROR,
2279 MC_ENGINE_IDX_INFO_FAULT
2280 };
2281
2282 for (NvU32 tableIdx = 0; tableIdx < NV_ARRAY_ELEMENTS(physicalEngineIdxList); tableIdx++)
2283 {
2284 engineIdx = physicalEngineIdxList[tableIdx];
2285 NV_ASSERT(pRecords[engineIdx].pInterruptService == NULL);
2286 pRecords[engineIdx].pInterruptService = staticCast(pKernelGmmu, IntrService);
2287 }
2288 }
2289 }
2290
2291 /**
2292 * @brief Clears the stall interrupt leaf vector and return whether to call ServiceInterrupt.
2293 * @details Normally there's no need to override this function as its default is used by almost all handlers,
2294 * but MC_ENGINE_IDX_NON_REPLAYABLE_FAULT is cleared in the top half.
2295 *
2296 * @returns NV_TRUE indicating that the interrupt should be handled.
2297 */
2298 NvBool
kgmmuClearInterrupt_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,IntrServiceClearInterruptArguments * pParams)2299 kgmmuClearInterrupt_IMPL
2300 (
2301 OBJGPU *pGpu,
2302 KernelGmmu *pKernelGmmu,
2303 IntrServiceClearInterruptArguments *pParams)
2304 {
2305 NV_ASSERT_OR_RETURN(pParams != NULL, 0);
2306 if (pParams->engineIdx == MC_ENGINE_IDX_NON_REPLAYABLE_FAULT)
2307 {
2308 // Skip clearing the interrupt; just return success.
2309 return NV_TRUE;
2310 }
2311 else
2312 {
2313 // Fallthrough to default handler, which will clear the interrupt.
2314 return intrservClearInterrupt_IMPL(pGpu, staticCast(pKernelGmmu, IntrService), pParams);
2315 }
2316 }
2317
2318 /**
2319 * @brief Service stall interrupts.
2320 *
2321 * @returns Zero, or any implementation-chosen nonzero value. If the same nonzero value is returned enough
2322 * times the interrupt is considered stuck.
2323 */
2324 NvU32
kgmmuServiceInterrupt_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,IntrServiceServiceInterruptArguments * pParams)2325 kgmmuServiceInterrupt_IMPL
2326 (
2327 OBJGPU *pGpu,
2328 KernelGmmu *pKernelGmmu,
2329 IntrServiceServiceInterruptArguments *pParams
2330 )
2331 {
2332 NV_STATUS status;
2333
2334 NV_ASSERT_OR_RETURN(pParams != NULL, 0);
2335
2336 switch (pParams->engineIdx)
2337 {
2338 case MC_ENGINE_IDX_GMMU:
2339 {
2340 return kgmmuService_HAL(pGpu, pKernelGmmu);
2341 }
2342 case MC_ENGINE_IDX_NON_REPLAYABLE_FAULT:
2343 {
2344
2345 //
2346 // This interrupt was already cleared in the top half and "serviced"
2347 // in the top half since copy from HW fault buffer always happens
2348 // in the top half. This servicing is merely copying from the SW
2349 // fault buffer, so doesn't need interrupt clearing. Also, we will
2350 // only copy from the SW fault buffer if the fatalFaultIntrPending
2351 // cache tells us that there is something to copy. Else, we'll just
2352 // return early and rely on another interrupt to fire that will
2353 // eventually update this state. In the top half, we will
2354 // unconditionally write GET back, which will force HW to send us a
2355 // new pulse as long as GET != PUT and we'd be eventually guaranteed
2356 // to copy something into the SW fault buffer.
2357 //
2358 if (portAtomicCompareAndSwapS32(&pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].fatalFaultIntrPending, 0, 1))
2359 {
2360 status = kgmmuServiceNonReplayableFault_HAL(pGpu, pKernelGmmu);
2361 if (status != NV_OK)
2362 {
2363 NV_ASSERT_OK_FAILED(
2364 "Failed to service non-replayable MMU fault error",
2365 status);
2366 }
2367 }
2368
2369 break;
2370 }
2371 case MC_ENGINE_IDX_NON_REPLAYABLE_FAULT_ERROR:
2372 {
2373 status = kgmmuReportFaultBufferOverflow_HAL(pGpu, pKernelGmmu);
2374 if (status != NV_OK)
2375 {
2376 NV_ASSERT_OK_FAILED(
2377 "Failed to report non-replayable MMU fault buffer overflow error",
2378 status);
2379 }
2380 break;
2381 }
2382 case MC_ENGINE_IDX_REPLAYABLE_FAULT:
2383 {
2384 NV_STATUS status = kgmmuServiceReplayableFault_HAL(pGpu, pKernelGmmu);
2385 if (status != NV_OK)
2386 {
2387 NV_ASSERT_OK_FAILED("Failed to service replayable MMU fault error",
2388 status);
2389 }
2390 break;
2391 }
2392 case MC_ENGINE_IDX_REPLAYABLE_FAULT_ERROR:
2393 {
2394 status = kgmmuReportFaultBufferOverflow_HAL(pGpu, pKernelGmmu);
2395 if (status != NV_OK)
2396 {
2397 NV_ASSERT_OK_FAILED(
2398 "Failed to report replayable MMU fault buffer overflow error",
2399 status);
2400 }
2401 break;
2402 }
2403 case MC_ENGINE_IDX_NON_REPLAYABLE_FAULT_CPU:
2404 {
2405 //
2406 // This interrupt vector is used to enqueue the UVM top half so any outstanding Non-Replayable
2407 // faults can get processed by UVM. However, since the GSP notification mechanism is interrupt based
2408 // and the top half of the RM interrupt routine will always call into UVM's top half, it is safe to NOP here
2409 // knowing that UVM handling already gets invoked whenever the RM top half is executed.
2410 //
2411 status = 0;
2412 break;
2413 }
2414 case MC_ENGINE_IDX_REPLAYABLE_FAULT_CPU:
2415 {
2416 NV_PRINTF(LEVEL_ERROR, "Unexpected replayable interrupt routed to RM. Verify UVM took ownership.\n");
2417 status = NV_ERR_INVALID_STATE;
2418 break;
2419 }
2420 case MC_ENGINE_IDX_INFO_FAULT:
2421 {
2422 status = kgmmuServicePriFaults_HAL(pGpu, pKernelGmmu);
2423 if (status != NV_OK)
2424 {
2425 NV_ASSERT_OK_FAILED("Failed to service PRI fault error", status);
2426 }
2427 break;
2428 }
2429 default:
2430 {
2431 NV_ASSERT_FAILED("Invalid engineIdx");
2432 break;
2433 }
2434 }
2435
2436 return 0;
2437 }
2438
2439 /*!
2440 * @brief Extract the PTE FIELDS from the PTE and
2441 * set the corresponding flags/fields in pParams.
2442 *
2443 * @param[in] pKernelGmmu
2444 * @param[in] pPte Pointer to the PTE contents
2445 * @param[out] pPteInfo Pointer to the PTE info structure
2446 * @param[in] pFmt NV0080_CTRL_DMA_PTE_INFO_PTE_BLOCK pointer to cmd params
2447 * @param[in] pLevelFmt Format of the level
2448 *
2449 *
2450 * @returns none
2451 */
2452 void
kgmmuExtractPteInfo_IMPL(KernelGmmu * pKernelGmmu,GMMU_ENTRY_VALUE * pPte,NV0080_CTRL_DMA_PTE_INFO_PTE_BLOCK * pPteInfo,const GMMU_FMT * pFmt,const MMU_FMT_LEVEL * pLevelFmt)2453 kgmmuExtractPteInfo_IMPL
2454 (
2455 KernelGmmu *pKernelGmmu,
2456 GMMU_ENTRY_VALUE *pPte,
2457 NV0080_CTRL_DMA_PTE_INFO_PTE_BLOCK *pPteInfo,
2458 const GMMU_FMT *pFmt,
2459 const MMU_FMT_LEVEL *pLevelFmt
2460 )
2461 {
2462 OBJGPU *pGpu = ENG_GET_GPU(pKernelGmmu);
2463 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2464 const GMMU_FMT_PTE *pFmtPte = pFmt->pPte;
2465 NvBool bPteValid;
2466
2467 bPteValid = nvFieldGetBool(&pFmtPte->fldValid, pPte->v8);
2468
2469 pPteInfo->pteFlags = FLD_SET_DRF_NUM(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_VALID,
2470 bPteValid, pPteInfo->pteFlags);
2471
2472 if (pFmtPte->version != GMMU_FMT_VERSION_3)
2473 {
2474 pPteInfo->pteFlags = FLD_SET_DRF_NUM(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_ENCRYPTED,
2475 nvFieldGetBool(&pFmtPte->fldEncrypted, pPte->v8), pPteInfo->pteFlags);
2476 }
2477
2478 switch (gmmuFieldGetAperture(&pFmtPte->fldAperture, pPte->v8))
2479 {
2480 case GMMU_APERTURE_VIDEO:
2481 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_APERTURE,
2482 _VIDEO_MEMORY, pPteInfo->pteFlags);
2483 break;
2484 case GMMU_APERTURE_PEER:
2485 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_APERTURE,
2486 _PEER_MEMORY, pPteInfo->pteFlags);
2487 break;
2488 case GMMU_APERTURE_SYS_COH:
2489 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_APERTURE,
2490 _SYSTEM_COHERENT_MEMORY, pPteInfo->pteFlags);
2491 break;
2492 case GMMU_APERTURE_SYS_NONCOH:
2493 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_APERTURE,
2494 _SYSTEM_NON_COHERENT_MEMORY, pPteInfo->pteFlags);
2495 break;
2496 case GMMU_APERTURE_INVALID:
2497 default:
2498 NV_ASSERT(0);
2499 break;
2500 }
2501
2502 if (pFmtPte->version == GMMU_FMT_VERSION_3)
2503 {
2504 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
2505 NvU32 ptePcfHw;
2506 NvU32 ptePcfSw = 0;
2507
2508 // In Version 3, parse the PCF bits and return those
2509 ptePcfHw = nvFieldGet32(&pFmtPte->fldPtePcf, pPte->v8);
2510 NV_ASSERT(kgmmuTranslatePtePcfFromHw_HAL(pKernelGmmu, ptePcfHw, bPteValid, &ptePcfSw) == NV_OK);
2511
2512 // Valid 2MB PTEs follow the same format as 64K and 4K PTEs
2513 if (bPteValid)
2514 {
2515 if (!(ptePcfSw & (1 << SW_MMU_PCF_UNCACHED_IDX)))
2516 {
2517 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2518 _PARAMS_FLAGS_GPU_CACHED, _TRUE, pPteInfo->pteFlags);
2519 }
2520 if (ptePcfSw & (1 << SW_MMU_PCF_RO_IDX))
2521 {
2522 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2523 _PARAMS_FLAGS_READ_ONLY, _TRUE, pPteInfo->pteFlags);
2524 }
2525 if (ptePcfSw & (1 << SW_MMU_PCF_NOATOMIC_IDX))
2526 {
2527 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2528 _PARAMS_FLAGS_ATOMIC, _DISABLE, pPteInfo->pteFlags);
2529 }
2530 if (ptePcfSw & (1 << SW_MMU_PCF_REGULAR_IDX))
2531 {
2532 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2533 _PARAMS_FLAGS_PRIVILEGED, _FALSE, pPteInfo->pteFlags);
2534 }
2535 if (ptePcfSw & (1 << SW_MMU_PCF_ACE_IDX))
2536 {
2537 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2538 _PARAMS_FLAGS_ACCESS_COUNTING, _ENABLE, pPteInfo->pteFlags);
2539 }
2540 }
2541 else
2542 {
2543 if (pLevelFmt->numSubLevels == 0)
2544 {
2545 if (ptePcfSw & (1 << SW_MMU_PCF_SPARSE_IDX))
2546 {
2547 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2548 _PARAMS_FLAGS_GPU_CACHED, _FALSE, pPteInfo->pteFlags);
2549 }
2550 else
2551 {
2552 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2553 _PARAMS_FLAGS_GPU_CACHED, _TRUE, pPteInfo->pteFlags);
2554 }
2555 }
2556 else
2557 {
2558 NvU32 pdePcfHw = 0;
2559 NvU32 pdePcfSw = 0;
2560
2561 pdePcfHw = nvFieldGet32(&pFmt->pPde->fldPdePcf, pPte->v8);
2562 NV_ASSERT(kgmmuTranslatePdePcfFromHw_HAL(pKernelGmmu, pdePcfHw, GMMU_APERTURE_INVALID, &pdePcfSw) == NV_OK);
2563 if (pdePcfSw & (1 << SW_MMU_PCF_SPARSE_IDX))
2564 {
2565 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2566 _PARAMS_FLAGS_GPU_CACHED, _FALSE, pPteInfo->pteFlags);
2567 }
2568 else
2569 {
2570 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2571 _PARAMS_FLAGS_GPU_CACHED, _TRUE, pPteInfo->pteFlags);
2572 }
2573
2574 }
2575 }
2576 }
2577 else
2578 {
2579 pPteInfo->pteFlags = FLD_SET_DRF_NUM(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_GPU_CACHED,
2580 !nvFieldGetBool(&pFmtPte->fldVolatile, pPte->v8), pPteInfo->pteFlags);
2581
2582 if (nvFieldIsValid32(&pFmtPte->fldReadDisable.desc) &&
2583 nvFieldIsValid32(&pFmtPte->fldWriteDisable.desc))
2584 {
2585 if (nvFieldGetBool(&pFmtPte->fldWriteDisable, pPte->v8))
2586 {
2587 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2588 _PARAMS_FLAGS_SHADER_ACCESS, _READ_ONLY, pPteInfo->pteFlags);
2589 }
2590 else if (nvFieldGetBool(&pFmtPte->fldReadDisable, pPte->v8))
2591 {
2592 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2593 _PARAMS_FLAGS_SHADER_ACCESS, _WRITE_ONLY, pPteInfo->pteFlags);
2594 }
2595 else
2596 {
2597 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO,
2598 _PARAMS_FLAGS_SHADER_ACCESS, _READ_WRITE, pPteInfo->pteFlags);
2599 }
2600 }
2601 else
2602 {
2603 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_SHADER_ACCESS,
2604 _NOT_SUPPORTED, pPteInfo->pteFlags);
2605 }
2606
2607 pPteInfo->pteFlags = FLD_SET_DRF_NUM(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_READ_ONLY,
2608 nvFieldGetBool(&pFmtPte->fldReadOnly, pPte->v8), pPteInfo->pteFlags);
2609
2610 // Get comptagline
2611 pPteInfo->comptagLine = nvFieldGet32(&pFmtPte->fldCompTagLine, pPte->v8);
2612 }
2613
2614 // Get kind
2615 pPteInfo->kind = nvFieldGet32(&pFmtPte->fldKind, pPte->v8);
2616
2617 //
2618 // Decode the comptags value from kind. GF100 only supports 2 bits per rop tile,
2619 // but future chips will use the other layouts.
2620 //
2621 if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE_1, pPteInfo->kind))
2622 {
2623 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_COMPTAGS, _1, pPteInfo->pteFlags);
2624 }
2625 else if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE_2, pPteInfo->kind))
2626 {
2627 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_COMPTAGS, _2, pPteInfo->pteFlags);
2628 }
2629 else if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE_4, pPteInfo->kind))
2630 {
2631 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_COMPTAGS, _4, pPteInfo->pteFlags);
2632 }
2633 else
2634 {
2635 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_COMPTAGS, _NONE, pPteInfo->pteFlags);
2636 }
2637 }
2638
2639 NvS32*
kgmmuGetFatalFaultIntrPendingState_IMPL(KernelGmmu * pKernelGmmu,NvU8 gfid)2640 kgmmuGetFatalFaultIntrPendingState_IMPL
2641 (
2642 KernelGmmu *pKernelGmmu,
2643 NvU8 gfid
2644 )
2645 {
2646 return &pKernelGmmu->mmuFaultBuffer[gfid].fatalFaultIntrPending;
2647 }
2648
2649 struct HW_FAULT_BUFFER*
kgmmuGetHwFaultBufferPtr_IMPL(KernelGmmu * pKernelGmmu,NvU8 gfid,NvU8 faultBufferIndex)2650 kgmmuGetHwFaultBufferPtr_IMPL
2651 (
2652 KernelGmmu *pKernelGmmu,
2653 NvU8 gfid,
2654 NvU8 faultBufferIndex
2655 )
2656 {
2657 return &pKernelGmmu->mmuFaultBuffer[gfid].hwFaultBuffers[faultBufferIndex];
2658 }
2659
2660 NvU64
kgmmuGetFaultBufferGenCnt_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU8 gfid)2661 kgmmuGetFaultBufferGenCnt_IMPL
2662 (
2663 OBJGPU *pGpu,
2664 KernelGmmu *pKernelGmmu,
2665 NvU8 gfid
2666 )
2667 {
2668 return pKernelGmmu->mmuFaultBuffer[gfid].faultBufferGenerationCounter;
2669 }
2670
2671 void *
kgmmuGetShadowFaultBufferCslContext_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,FAULT_BUFFER_TYPE type)2672 kgmmuGetShadowFaultBufferCslContext_IMPL
2673 (
2674 OBJGPU *pGpu,
2675 KernelGmmu *pKernelGmmu,
2676 FAULT_BUFFER_TYPE type
2677 )
2678 {
2679 ConfidentialCompute *pConfCompute = GPU_GET_CONF_COMPUTE(pGpu);
2680
2681 if (!gpuIsCCFeatureEnabled(pGpu))
2682 {
2683 return NULL;
2684 }
2685
2686 switch (type)
2687 {
2688 case NON_REPLAYABLE_FAULT_BUFFER:
2689 return pConfCompute->pNonReplayableFaultCcslCtx;
2690 case REPLAYABLE_FAULT_BUFFER:
2691 return pConfCompute->pReplayableFaultCcslCtx;
2692 default:
2693 break;
2694 }
2695
2696 return NULL;
2697 }
2698
2699 NV_STATUS
kgmmuFaultBufferMap_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index,NvU32 gfid)2700 kgmmuFaultBufferMap_IMPL
2701 (
2702 OBJGPU *pGpu,
2703 KernelGmmu *pKernelGmmu,
2704 NvU32 index,
2705 NvU32 gfid
2706 )
2707 {
2708 MEMORY_DESCRIPTOR *pMemDesc;
2709 struct HW_FAULT_BUFFER *pFaultBuffer;
2710 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2711
2712 NvU64 vaddr;
2713 NV_STATUS status = NV_OK;
2714
2715 // Return early if fault buffer is disabled
2716 if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
2717 return NV_OK;
2718
2719 NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
2720 NV_ASSERT_OR_RETURN(!IS_GSP_CLIENT(pGpu), NV_ERR_INVALID_STATE);
2721
2722 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[gfid].hwFaultBuffers[index];
2723 pMemDesc = pFaultBuffer->pFaultBufferMemDesc;
2724
2725 memmgrSetMemDescPageSize_HAL(pGpu, pMemoryManager, pMemDesc, AT_GPU, RM_ATTR_PAGE_SIZE_4KB);
2726
2727 {
2728 status = kbusMapCpuInvisibleBar2Aperture_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), pMemDesc,
2729 &vaddr, pMemDesc->Size, 0, gfid);
2730 if (status != NV_OK)
2731 {
2732 return status;
2733 }
2734
2735 NV_ASSERT(pFaultBuffer->bar2FaultBufferAddr == 0);
2736 pFaultBuffer->bar2FaultBufferAddr = vaddr;
2737 }
2738
2739 if (IS_GFID_PF(gfid))
2740 {
2741 if (pMemDesc->_addressSpace == ADDR_FBMEM && !RMCFG_FEATURE_PLATFORM_GSP)
2742 {
2743 //
2744 // For Mods and testing, buffer can be allocated in FB. In such cases map it on BAR2 as
2745 // only RM will be the owner of this buffer. BAR1 mappings need code-refactoring as BAR1 vaSpace is
2746 // allocated lazily
2747 //
2748 pFaultBuffer->kernelVaddr = NV_PTR_TO_NvP64(kbusMapRmAperture_HAL(pGpu, pMemDesc));
2749 if (!pFaultBuffer->kernelVaddr)
2750 {
2751 NV_ASSERT(0);
2752 return NV_ERR_INVALID_ADDRESS;
2753 }
2754 }
2755 else
2756 {
2757 if (memdescGetContiguity(pMemDesc, AT_GPU))
2758 {
2759 status = memdescMap(pMemDesc, 0, pMemDesc->Size, NV_TRUE, NV_PROTECT_READ_WRITE,
2760 &pFaultBuffer->kernelVaddr, &pFaultBuffer->hCpuFaultBuffer);
2761 if (status != NV_OK)
2762 {
2763 return status;
2764 }
2765 }
2766 else
2767 {
2768 NvU32 i, j;
2769 NvU32 numPages = NV_ROUNDUP(pMemDesc->Size, RM_PAGE_SIZE) / RM_PAGE_SIZE;
2770 GMMU_FAULT_BUFFER_PAGE *pBufferPage;
2771
2772 pFaultBuffer->pBufferPages = portMemAllocNonPaged(numPages * sizeof(GMMU_FAULT_BUFFER_PAGE));
2773 if (pFaultBuffer->pBufferPages == NULL)
2774 {
2775 return NV_ERR_NO_MEMORY;
2776 }
2777
2778 for (i = 0; i < numPages; i++)
2779 {
2780 pBufferPage = &pFaultBuffer->pBufferPages[i];
2781
2782 status = memdescMap(pMemDesc, i * RM_PAGE_SIZE, RM_PAGE_SIZE, 1, NV_PROTECT_READ_WRITE,
2783 &pBufferPage->pAddress, &pBufferPage->pPriv);
2784 if (status != NV_OK)
2785 {
2786 break;
2787 }
2788 }
2789
2790 if (status != NV_OK)
2791 {
2792 for (j = 0; j < i; j++)
2793 {
2794 pBufferPage = &pFaultBuffer->pBufferPages[j];
2795
2796 memdescUnmap(pMemDesc, NV_TRUE, osGetCurrentProcess(),
2797 pBufferPage->pAddress, pBufferPage->pPriv);
2798 }
2799
2800 portMemFree(pFaultBuffer->pBufferPages);
2801
2802 return status;
2803 }
2804 }
2805 }
2806
2807 if (memdescGetContiguity(pMemDesc, AT_GPU))
2808 {
2809 portMemSet(NvP64_VALUE(pFaultBuffer->kernelVaddr), 0, (NvLength)pMemDesc->Size);
2810 }
2811 else
2812 {
2813 NvU32 i;
2814 for (i = 0; i * RM_PAGE_SIZE < pMemDesc->Size; i++)
2815 {
2816 GMMU_FAULT_BUFFER_PAGE *page = &pFaultBuffer->pBufferPages[i];
2817 portMemSet(NvP64_VALUE(page->pAddress), 0, RM_PAGE_SIZE);
2818 }
2819 }
2820 }
2821
2822 return status;
2823 }
2824
2825 NV_STATUS
kgmmuFaultBufferUnmap_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index,NvU32 gfid)2826 kgmmuFaultBufferUnmap_IMPL
2827 (
2828 OBJGPU *pGpu,
2829 KernelGmmu *pKernelGmmu,
2830 NvU32 index,
2831 NvU32 gfid
2832 )
2833 {
2834 struct HW_FAULT_BUFFER *pFaultBuffer;
2835
2836 NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
2837
2838 // Return early if fault buffer is disabled
2839 if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
2840 return NV_OK;
2841
2842 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[gfid].hwFaultBuffers[index];
2843
2844 if (pFaultBuffer == NULL)
2845 {
2846 return NV_OK;
2847 }
2848
2849 if (pFaultBuffer->pFaultBufferMemDesc != NULL)
2850 {
2851 if (IS_GFID_PF(gfid))
2852 {
2853 // kbusUnmapRmAperture cannot handle discontiguous allocations on GSP-RM
2854 if (pFaultBuffer->pFaultBufferMemDesc->_addressSpace == ADDR_FBMEM &&
2855 !RMCFG_FEATURE_PLATFORM_GSP)
2856 {
2857 kbusUnmapRmAperture_HAL(pGpu,
2858 pFaultBuffer->pFaultBufferMemDesc,
2859 (NvU8 **)&pFaultBuffer->kernelVaddr,
2860 NV_TRUE);
2861 }
2862 else
2863 {
2864 if (memdescGetContiguity(pFaultBuffer->pFaultBufferMemDesc, AT_GPU))
2865 {
2866 memdescUnmap(pFaultBuffer->pFaultBufferMemDesc, NV_TRUE, osGetCurrentProcess(),
2867 pFaultBuffer->kernelVaddr, pFaultBuffer->hCpuFaultBuffer);
2868 }
2869 else
2870 {
2871 if (pFaultBuffer->pBufferPages != NULL)
2872 {
2873 NvU32 i;
2874 NvU32 numPages = NV_ROUNDUP(pFaultBuffer->pFaultBufferMemDesc->Size, RM_PAGE_SIZE) / RM_PAGE_SIZE;
2875
2876 for (i = 0; i < numPages; i++)
2877 {
2878 GMMU_FAULT_BUFFER_PAGE *pBufferPage;
2879
2880 pBufferPage = &pFaultBuffer->pBufferPages[i];
2881
2882 memdescUnmap(pFaultBuffer->pFaultBufferMemDesc, NV_TRUE, osGetCurrentProcess(),
2883 pBufferPage->pAddress, pBufferPage->pPriv);
2884 }
2885
2886 portMemFree(pFaultBuffer->pBufferPages);
2887 }
2888 }
2889 }
2890 }
2891
2892 {
2893 kbusUnmapCpuInvisibleBar2Aperture_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu),
2894 pFaultBuffer->pFaultBufferMemDesc, pFaultBuffer->bar2FaultBufferAddr, gfid);
2895 }
2896 }
2897
2898 pFaultBuffer->pBufferPages = NULL;
2899 pFaultBuffer->kernelVaddr = NvP64_NULL;
2900 pFaultBuffer->bar2FaultBufferAddr = 0;
2901 return NV_OK;
2902 }
2903
2904 NV_STATUS
kgmmuServiceVfPriFaults_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 faultType)2905 kgmmuServiceVfPriFaults_IMPL
2906 (
2907 OBJGPU *pGpu,
2908 KernelGmmu *pKernelGmmu,
2909 NvU32 faultType
2910 )
2911 {
2912 NV_STATUS status = NV_OK;
2913 NV2080_CTRL_CMD_GPU_HANDLE_VF_PRI_FAULT_PARAMS params;
2914
2915 NV_ASSERT_OR_RETURN(IS_VIRTUAL_WITH_SRIOV(pGpu), NV_ERR_INVALID_ARGUMENT);
2916
2917 if (faultType == NV2080_CTRL_CMD_GPU_HANDLE_VF_PRI_FAULT_TYPE_INVALID)
2918 return NV_ERR_INVALID_PARAMETER;
2919
2920 portMemSet(¶ms, 0, sizeof(params));
2921 params.faultType = faultType;
2922
2923 NV_RM_RPC_CONTROL(pGpu, pGpu->hDefaultClientShare, pGpu->hDefaultClientShareSubDevice,
2924 NV2080_CTRL_CMD_GPU_HANDLE_VF_PRI_FAULT, ¶ms, sizeof(params), status);
2925
2926 return status;
2927 }
2928
2929 NV_STATUS
kgmmuFaultCancelTargeted_VF(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,GMMU_FAULT_CANCEL_INFO * pCancelInfo)2930 kgmmuFaultCancelTargeted_VF
2931 (
2932 OBJGPU *pGpu,
2933 KernelGmmu *pKernelGmmu,
2934 GMMU_FAULT_CANCEL_INFO *pCancelInfo
2935 )
2936 {
2937 TLB_INVALIDATE_PARAMS params;
2938
2939 // Clear struct before use.
2940 portMemSet(¶ms, 0, sizeof(TLB_INVALIDATE_PARAMS));
2941 gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, ¶ms.timeout, 0);
2942
2943 params.gfid = GPU_GFID_PF;
2944
2945 // Bug 2029506 fix will remove kgmmuFaultCancelIssueInvalidate call here
2946 return kgmmuFaultCancelIssueInvalidate_HAL(pGpu, pKernelGmmu, pCancelInfo,
2947 ¶ms, NV_FALSE);
2948 }
2949
2950 NvU32
kgmmuGetFaultBufferReservedFbSpaceSize_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)2951 kgmmuGetFaultBufferReservedFbSpaceSize_IMPL
2952 (
2953 OBJGPU *pGpu,
2954 KernelGmmu *pKernelGmmu
2955 )
2956 {
2957 NvU32 reservedBytes = 0;
2958 NvU32 faultBufferAddrSpace;
2959 NvU32 faultBufferSize;
2960 NvU32 i;
2961 NV_STATUS status;
2962
2963 if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
2964 return 0;
2965
2966 for (i = 0; i < NUM_FAULT_BUFFERS; i++)
2967 {
2968 status = kgmmuFaultBufferGetAddressSpace(pGpu, pKernelGmmu, i,
2969 &faultBufferAddrSpace, NULL);
2970 NV_ASSERT(status == NV_OK);
2971 if (status != NV_OK || faultBufferAddrSpace != ADDR_FBMEM)
2972 {
2973 continue;
2974 }
2975
2976 faultBufferSize = kgmmuSetAndGetDefaultFaultBufferSize_HAL(pGpu, pKernelGmmu, i, GPU_GFID_PF);
2977 reservedBytes += RM_PAGE_ALIGN_UP(faultBufferSize);
2978 }
2979
2980 return reservedBytes;
2981 }
2982
2983 NV_STATUS
subdeviceCtrlCmdGmmuGetStaticInfo_IMPL(Subdevice * pSubdevice,NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS * pParams)2984 subdeviceCtrlCmdGmmuGetStaticInfo_IMPL
2985 (
2986 Subdevice *pSubdevice,
2987 NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pParams
2988 )
2989 {
2990 OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice);
2991 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
2992
2993 pParams->replayableFaultBufferSize = kgmmuSetAndGetDefaultFaultBufferSize_HAL(pGpu, pKernelGmmu,
2994 REPLAYABLE_FAULT_BUFFER,
2995 GPU_GFID_PF);
2996 pParams->nonReplayableFaultBufferSize = kgmmuSetAndGetDefaultFaultBufferSize_HAL(pGpu, pKernelGmmu,
2997 NON_REPLAYABLE_FAULT_BUFFER,
2998 GPU_GFID_PF);
2999
3000 if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu))
3001 {
3002 NvU32 maxNumPacketsReplayable = pParams->replayableFaultBufferSize / sizeof(struct GMMU_FAULT_PACKET);
3003 NvU32 maxNumPacketsNonReplayable = pParams->nonReplayableFaultBufferSize / sizeof(struct GMMU_FAULT_PACKET);
3004
3005 pParams->replayableShadowFaultBufferMetadataSize = sizeof(struct GMMU_FAULT_PACKET_METADATA) * maxNumPacketsReplayable;
3006 pParams->nonReplayableShadowFaultBufferMetadataSize = sizeof(struct GMMU_FAULT_PACKET_METADATA) * maxNumPacketsNonReplayable;
3007 }
3008
3009 return NV_OK;
3010 }
3011
3012 static NV_STATUS
_kgmmuFaultBufferDescribe(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index,NvU64 * pFaultBufferPages,NvU32 faultBufferSize)3013 _kgmmuFaultBufferDescribe
3014 (
3015 OBJGPU *pGpu,
3016 KernelGmmu *pKernelGmmu,
3017 NvU32 index,
3018 NvU64 *pFaultBufferPages,
3019 NvU32 faultBufferSize
3020 )
3021 {
3022 NV_STATUS status;
3023 MEMORY_DESCRIPTOR *pMemDesc = NULL;
3024 struct HW_FAULT_BUFFER *pFaultBuffer;
3025 NvU32 faultBufferAddrSpace = ADDR_UNKNOWN;
3026
3027 NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
3028
3029 status = kgmmuFaultBufferCreateMemDesc(pGpu, pKernelGmmu, index, faultBufferSize,
3030 (MEMDESC_FLAGS_GUEST_ALLOCATED |
3031 MEMDESC_FLAGS_EXT_PAGE_ARRAY_MEM),
3032 &pMemDesc);
3033 if (status != NV_OK)
3034 {
3035 return status;
3036 }
3037
3038 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[index];
3039
3040 pFaultBuffer->faultBufferSize = faultBufferSize;
3041 pFaultBuffer->pFaultBufferMemDesc = NULL;
3042
3043 {
3044 NvBool bIsContiguous = memdescGetContiguity(pMemDesc, AT_GPU);
3045
3046 if (bIsContiguous)
3047 {
3048 status = kgmmuFaultBufferGetAddressSpace(pGpu, pKernelGmmu, index, &faultBufferAddrSpace, NULL);
3049 if (status != NV_OK)
3050 {
3051 memdescDestroy(pMemDesc);
3052 return status;
3053 }
3054
3055 memdescDescribe(pMemDesc, faultBufferAddrSpace,
3056 pFaultBufferPages[0], faultBufferSize);
3057 }
3058 else
3059 {
3060 memdescFillPages(pMemDesc, 0, pFaultBufferPages,
3061 RM_PAGE_ALIGN_UP(faultBufferSize)/RM_PAGE_SIZE,
3062 RM_PAGE_SIZE);
3063 }
3064 }
3065
3066 pFaultBuffer->pFaultBufferMemDesc = pMemDesc;
3067
3068 return NV_OK;
3069 }
3070
3071 NV_STATUS
kgmmuFaultBufferReplayableSetup_IMPL(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvHandle hClient,NvHandle hObject,NvU32 faultBufferSize,NvU64 * pFaultBufferPages)3072 kgmmuFaultBufferReplayableSetup_IMPL
3073 (
3074 OBJGPU *pGpu,
3075 KernelGmmu *pKernelGmmu,
3076 NvHandle hClient,
3077 NvHandle hObject,
3078 NvU32 faultBufferSize,
3079 NvU64 *pFaultBufferPages
3080 )
3081 {
3082 NV_STATUS status;
3083 struct HW_FAULT_BUFFER *pFaultBuffer;
3084
3085 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
3086 pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
3087 {
3088 return NV_OK;
3089 }
3090
3091 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[REPLAYABLE_FAULT_BUFFER];
3092 if (pFaultBuffer->pFaultBufferMemDesc != NULL)
3093 {
3094 return NV_ERR_NOT_SUPPORTED;
3095 }
3096
3097 status = _kgmmuFaultBufferDescribe(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER,
3098 pFaultBufferPages, faultBufferSize);
3099
3100 if (status != NV_OK)
3101 {
3102 return status;
3103 }
3104
3105 status = kgmmuFaultBufferLoad_HAL(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER, GPU_GFID_PF);
3106 if (status != NV_OK)
3107 {
3108 kgmmuFaultBufferUnregister(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER);
3109 return status;
3110 }
3111
3112 pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferClient = hClient;
3113 pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferObject = hObject;
3114
3115 pKernelGmmu->setProperty(pKernelGmmu,
3116 PDB_PROP_KGMMU_REPLAYABLE_FAULT_BUFFER_IN_USE,
3117 NV_TRUE);
3118
3119 return NV_OK;
3120 }
3121
3122 NV_STATUS
subdeviceCtrlCmdInternalGmmuRegisterFaultBuffer_IMPL(Subdevice * pSubdevice,NV2080_CTRL_INTERNAL_GMMU_REGISTER_FAULT_BUFFER_PARAMS * pParams)3123 subdeviceCtrlCmdInternalGmmuRegisterFaultBuffer_IMPL
3124 (
3125 Subdevice *pSubdevice,
3126 NV2080_CTRL_INTERNAL_GMMU_REGISTER_FAULT_BUFFER_PARAMS *pParams
3127 )
3128 {
3129 OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice);
3130 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
3131
3132 NV_PRINTF(LEVEL_INFO, "GMMU_REGISTER_FAULT_BUFFER\n");
3133 return kgmmuFaultBufferReplayableSetup(pGpu, pKernelGmmu,
3134 pParams->hClient,
3135 pParams->hObject,
3136 pParams->faultBufferSize,
3137 pParams->faultBufferPteArray);
3138 }
3139
3140 NV_STATUS
subdeviceCtrlCmdInternalGmmuUnregisterFaultBuffer_IMPL(Subdevice * pSubdevice)3141 subdeviceCtrlCmdInternalGmmuUnregisterFaultBuffer_IMPL
3142 (
3143 Subdevice *pSubdevice
3144 )
3145 {
3146 OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice);
3147 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
3148 NV_STATUS status;
3149
3150 NV_PRINTF(LEVEL_INFO, "GMMU_UNREGISTER_FAULT_BUFFER\n");
3151 status = kgmmuFaultBufferReplayableDestroy(pGpu, pKernelGmmu);
3152 if (status == NV_OK)
3153 {
3154 pKernelGmmu->setProperty(pKernelGmmu,
3155 PDB_PROP_KGMMU_REPLAYABLE_FAULT_BUFFER_IN_USE,
3156 NV_FALSE);
3157 }
3158 return status;
3159 }
3160