1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #define NVOC_KERN_GMMU_H_PRIVATE_ACCESS_ALLOWED
25
26 #include "gpu/mmu/kern_gmmu.h"
27 #include "gpu/mem_mgr/mem_mgr.h"
28 #include "vgpu/vgpu_events.h"
29 #include "nv_sriov_defines.h"
30 #include "kernel/gpu/intr/intr.h"
31 #include "kernel/gpu/gsp/kernel_gsp.h"
32 #include "kernel/gpu/conf_compute/ccsl.h"
33
34 #include "mmu/gmmu_fmt.h"
35 #include "published/hopper/gh100/dev_mmu.h"
36 #include "published/hopper/gh100/dev_fault.h"
37 #include "published/hopper/gh100/dev_vm.h"
38 #include "published/hopper/gh100/dev_vm_addendum.h"
39
40 /*!
41 * Check if a specific GMMU format version is supported.
42 */
43 NvBool
kgmmuFmtIsVersionSupported_GH10X(KernelGmmu * pKernelGmmu,NvU32 version)44 kgmmuFmtIsVersionSupported_GH10X(KernelGmmu *pKernelGmmu, NvU32 version)
45 {
46 return (version == GMMU_FMT_VERSION_3);
47 }
48
49 /*!
50 * Initialize the GMMU format families.
51 */
52 NV_STATUS
kgmmuFmtFamiliesInit_GH100(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)53 kgmmuFmtFamiliesInit_GH100(OBJGPU *pGpu, KernelGmmu* pKernelGmmu)
54 {
55 NvU32 i;
56 NvU32 pdePcfHw = 0;
57 NvU32 pdePcfSw = 0;
58 NvU32 ptePcfHw = 0;
59 NvU32 ptePcfSw = 0;
60
61 // Initialize the sparse encoding in the PDE PCF field for V3
62 GMMU_FMT_FAMILY *pFam = pKernelGmmu->pFmtFamilies[GMMU_FMT_VERSION_3 - 1];
63
64 if (pFam != NULL)
65 {
66 // 1.Initialize sparsePde
67 pdePcfSw |= (1 << SW_MMU_PCF_SPARSE_IDX);
68 pdePcfSw |= (1 << SW_MMU_PCF_ATS_ALLOWED_IDX);
69 NV_ASSERT_OR_RETURN((kgmmuTranslatePdePcfFromSw_HAL(pKernelGmmu, pdePcfSw, &pdePcfHw) == NV_OK),
70 NV_ERR_INVALID_ARGUMENT);
71 gmmuFieldSetAperture(&pFam->pde.fldAperture, GMMU_APERTURE_INVALID,
72 pFam->sparsePde.v8);
73 nvFieldSet32(&pFam->pde.fldPdePcf, pdePcfHw, pFam->sparsePde.v8);
74
75 // 2.Initialize sparsePdeMulti
76 for (i = 0; i < MMU_FMT_MAX_SUB_LEVELS; ++i)
77 {
78 const GMMU_FMT_PDE *pPdeFmt = &pFam->pdeMulti.subLevels[i];
79 gmmuFieldSetAperture(&pPdeFmt->fldAperture, GMMU_APERTURE_INVALID,
80 pFam->sparsePdeMulti.v8);
81 // Set PDE PCF sparse bit only for sub-level 0 for PdeMulti
82 if (i == 0)
83 {
84 nvFieldSet32(&pPdeFmt->fldPdePcf, pdePcfHw, pFam->sparsePdeMulti.v8);
85 }
86 }
87
88 // 3.Initialize nv4kPte
89 ptePcfSw |= (1 << SW_MMU_PCF_NV4K_IDX);
90 nvFieldSetBool(&pFam->pte.fldValid, NV_FALSE, pFam->nv4kPte.v8);
91 NV_ASSERT_OR_RETURN((kgmmuTranslatePtePcfFromSw_HAL(pKernelGmmu, ptePcfSw, &ptePcfHw) == NV_OK),
92 NV_ERR_INVALID_ARGUMENT);
93 nvFieldSet32(&pFam->pte.fldPtePcf, ptePcfHw, pFam->nv4kPte.v8);
94
95 // 4.Initialize sparsePte
96 ptePcfSw = (1 << SW_MMU_PCF_SPARSE_IDX);
97 nvFieldSetBool(&pFam->pte.fldValid, NV_FALSE, pFam->sparsePte.v8);
98 NV_ASSERT_OR_RETURN((kgmmuTranslatePtePcfFromSw_HAL(pKernelGmmu, ptePcfSw, &ptePcfHw) == NV_OK),
99 NV_ERR_INVALID_ARGUMENT);
100 nvFieldSet32(&pFam->pte.fldPtePcf, ptePcfHw, pFam->sparsePte.v8);
101 }
102
103 return NV_OK;
104 }
105
106 #define PTE_PCF_INVALID_LIST(fn) \
107 fn(INVALID) \
108 fn(NO_VALID_4KB_PAGE) \
109 fn(SPARSE) \
110 fn(MAPPING_NOWHERE)
111
112 #define PTE_PCF_VALID_LIST(fn) \
113 fn(PRIVILEGE_RW_ATOMIC_CACHED_ACD) \
114 fn(PRIVILEGE_RW_ATOMIC_CACHED_ACE) \
115 fn(PRIVILEGE_RW_ATOMIC_UNCACHED_ACD) \
116 fn(PRIVILEGE_RW_ATOMIC_UNCACHED_ACE) \
117 fn(PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACE) \
118 fn(PRIVILEGE_RW_NO_ATOMIC_CACHED_ACE) \
119 fn(PRIVILEGE_RO_ATOMIC_UNCACHED_ACE) \
120 fn(PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACE) \
121 fn(PRIVILEGE_RO_NO_ATOMIC_CACHED_ACE) \
122 fn(REGULAR_RW_ATOMIC_CACHED_ACD) \
123 fn(REGULAR_RW_ATOMIC_CACHED_ACE) \
124 fn(REGULAR_RW_ATOMIC_UNCACHED_ACD) \
125 fn(REGULAR_RW_ATOMIC_UNCACHED_ACE) \
126 fn(REGULAR_RW_NO_ATOMIC_CACHED_ACD) \
127 fn(REGULAR_RW_NO_ATOMIC_CACHED_ACE) \
128 fn(REGULAR_RW_NO_ATOMIC_UNCACHED_ACD) \
129 fn(REGULAR_RW_NO_ATOMIC_UNCACHED_ACE) \
130 fn(REGULAR_RO_ATOMIC_CACHED_ACD) \
131 fn(REGULAR_RO_ATOMIC_CACHED_ACE) \
132 fn(REGULAR_RO_ATOMIC_UNCACHED_ACD) \
133 fn(REGULAR_RO_ATOMIC_UNCACHED_ACE) \
134 fn(REGULAR_RO_NO_ATOMIC_CACHED_ACD) \
135 fn(REGULAR_RO_NO_ATOMIC_CACHED_ACE) \
136 fn(REGULAR_RO_NO_ATOMIC_UNCACHED_ACD) \
137 fn(REGULAR_RO_NO_ATOMIC_UNCACHED_ACE)
138
139 #define PTE_PCF_HW_FROM_SW(name) \
140 case (SW_MMU_PTE_PCF_##name): \
141 { \
142 *pPtePcfHw = NV_MMU_VER3_PTE_PCF_##name; \
143 break; \
144 }
145
146 #define PTE_PCF_SW_FROM_HW(name) \
147 case (NV_MMU_VER3_PTE_PCF_##name): \
148 { \
149 *pPtePcfSw = SW_MMU_PTE_PCF_##name; \
150 break; \
151 }
152
153 //
154 // Takes a SW PTE PCF and translates to HW PTE PCF
155 // If bit patterns is not supported by HW, return NV_ERR_NOT_SUPPORTED
156 //
157 NV_STATUS
kgmmuTranslatePtePcfFromSw_GH100(KernelGmmu * pKernelGmmu,NvU32 ptePcfSw,NvU32 * pPtePcfHw)158 kgmmuTranslatePtePcfFromSw_GH100
159 (
160 KernelGmmu *pKernelGmmu,
161 NvU32 ptePcfSw,
162 NvU32 *pPtePcfHw
163 )
164 {
165 switch (ptePcfSw)
166 {
167 PTE_PCF_INVALID_LIST(PTE_PCF_HW_FROM_SW)
168 PTE_PCF_VALID_LIST(PTE_PCF_HW_FROM_SW)
169
170 default:
171 {
172 NV_PRINTF(LEVEL_ERROR, "Unsupported SW PTE PCF pattern requested : %x\n", ptePcfSw);
173 return NV_ERR_NOT_SUPPORTED;
174 }
175 }
176
177 return NV_OK;
178 }
179
180 NV_STATUS
kgmmuTranslatePtePcfFromHw_GH100(KernelGmmu * pKernelGmmu,NvU32 ptePcfHw,NvBool bPteValid,NvU32 * pPtePcfSw)181 kgmmuTranslatePtePcfFromHw_GH100
182 (
183 KernelGmmu *pKernelGmmu,
184 NvU32 ptePcfHw,
185 NvBool bPteValid,
186 NvU32 *pPtePcfSw
187 )
188 {
189 if (!bPteValid)
190 {
191 switch (ptePcfHw)
192 {
193 PTE_PCF_INVALID_LIST(PTE_PCF_SW_FROM_HW)
194
195 default: return NV_ERR_NOT_SUPPORTED;
196 }
197 }
198 else
199 {
200 switch (ptePcfHw)
201 {
202 PTE_PCF_VALID_LIST(PTE_PCF_SW_FROM_HW)
203
204 default:
205 {
206 NV_PRINTF(LEVEL_ERROR, "Unsupported HW PTE PCF pattern requested : %x\n", ptePcfHw);
207 return NV_ERR_NOT_SUPPORTED;
208 }
209 }
210 }
211
212 return NV_OK;
213 }
214
215 #define PDE_PCF_INVALID_LIST(fn) \
216 fn(INVALID_ATS_ALLOWED) \
217 fn(SPARSE_ATS_ALLOWED) \
218 fn(INVALID_ATS_NOT_ALLOWED) \
219 fn(SPARSE_ATS_NOT_ALLOWED)
220
221 #define PDE_PCF_VALID_LIST(fn) \
222 fn(VALID_CACHED_ATS_ALLOWED) \
223 fn(VALID_CACHED_ATS_NOT_ALLOWED) \
224 fn(VALID_UNCACHED_ATS_ALLOWED) \
225 fn(VALID_UNCACHED_ATS_NOT_ALLOWED)
226
227 #define PDE_PCF_HW_FROM_SW(name) \
228 case (SW_MMU_PDE_PCF_##name): \
229 { \
230 *pPdePcfHw = NV_MMU_VER3_PDE_PCF_##name; \
231 break; \
232 }
233
234 #define PDE_PCF_SW_FROM_HW(name) \
235 case (NV_MMU_VER3_PDE_PCF_##name): \
236 { \
237 *pPdePcfSw = SW_MMU_PDE_PCF_##name; \
238 break; \
239 }
240
241 //
242 // Takes a SW PDE PCF and translates to HW PDE PCF
243 // If a bit pattern is not supported by HW, return NV_ERR_NOT_SUPPORTED
244 //
245 NV_STATUS
kgmmuTranslatePdePcfFromSw_GH100(KernelGmmu * pKernelGmmu,NvU32 pdePcfSw,NvU32 * pPdePcfHw)246 kgmmuTranslatePdePcfFromSw_GH100
247 (
248 KernelGmmu *pKernelGmmu,
249 NvU32 pdePcfSw,
250 NvU32 *pPdePcfHw
251 )
252 {
253 switch (pdePcfSw)
254 {
255 PDE_PCF_INVALID_LIST(PDE_PCF_HW_FROM_SW)
256 PDE_PCF_VALID_LIST(PDE_PCF_HW_FROM_SW)
257
258 default: return NV_ERR_NOT_SUPPORTED;
259 }
260
261 return NV_OK;
262 }
263
264 //
265 // Takes a HW PDE PCF and translates to SW PDE PCF
266 // If a bit pattern is not supported by SW, return NV_ERR_NOT_SUPPORTED
267 //
268 NV_STATUS
kgmmuTranslatePdePcfFromHw_GH100(KernelGmmu * pKernelGmmu,NvU32 pdePcfHw,GMMU_APERTURE aperture,NvU32 * pPdePcfSw)269 kgmmuTranslatePdePcfFromHw_GH100
270 (
271 KernelGmmu *pKernelGmmu,
272 NvU32 pdePcfHw,
273 GMMU_APERTURE aperture,
274 NvU32 *pPdePcfSw
275 )
276 {
277 if (aperture == GMMU_APERTURE_INVALID)
278 {
279 switch (pdePcfHw)
280 {
281 PDE_PCF_INVALID_LIST(PDE_PCF_SW_FROM_HW)
282
283 default: return NV_ERR_NOT_SUPPORTED;
284 }
285 }
286 else
287 {
288 switch (pdePcfHw)
289 {
290 PDE_PCF_VALID_LIST(PDE_PCF_SW_FROM_HW)
291
292 default: return NV_ERR_NOT_SUPPORTED;
293 }
294 }
295
296 return NV_OK;
297 }
298
299 /*
300 * @brief Validates fabric base address.
301 *
302 * @param pKernelGmmu
303 * @param fabricBaseAddr
304 *
305 * @returns On success, NV_OK.
306 * On failure, returns NV_ERR_XXX.
307 */
308 NV_STATUS
kgmmuValidateFabricBaseAddress_GH100(KernelGmmu * pKernelGmmu,NvU64 fabricBaseAddr)309 kgmmuValidateFabricBaseAddress_GH100
310 (
311 KernelGmmu *pKernelGmmu,
312 NvU64 fabricBaseAddr
313 )
314 {
315 OBJGPU *pGpu = ENG_GET_GPU(pKernelGmmu);
316 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
317 NvU64 fbSizeBytes;
318
319 fbSizeBytes = pMemoryManager->Ram.fbTotalMemSizeMb << 20;
320
321 //
322 // Hopper SKUs will be paired with NVSwitches (Laguna Seca) supporting 2K
323 // mapslots that can cover 512GB each. Make sure that the fabric base
324 // address being used is valid to cover whole frame buffer.
325 //
326
327 // Check if fabric address is aligned to mapslot size.
328 if (fabricBaseAddr & (NVBIT64(39) - 1))
329 {
330 return NV_ERR_INVALID_ARGUMENT;
331 }
332
333 // Align fbSize to mapslot size.
334 fbSizeBytes = RM_ALIGN_UP(fbSizeBytes, NVBIT64(39));
335
336 return NV_OK;
337 }
338
339 /*!
340 * @brief Get the engine ID associated with the Graphics Engine
341 */
342 NvU32
kgmmuGetGraphicsEngineId_GH100(KernelGmmu * pKernelGmmu)343 kgmmuGetGraphicsEngineId_GH100
344 (
345 KernelGmmu *pKernelGmmu
346 )
347 {
348 return NV_PFAULT_MMU_ENG_ID_GRAPHICS;
349 }
350
351 NV_STATUS
kgmmuGetFaultRegisterMappings_GH100(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 index,NvP64 * pFaultBufferGet,NvP64 * pFaultBufferPut,NvP64 * pFaultBufferInfo,NvP64 * pHubIntr,NvP64 * pHubIntrEnSet,NvP64 * pHubIntrEnClear,NvU32 * faultMask,NvP64 * pPrefetchCtrl)352 kgmmuGetFaultRegisterMappings_GH100
353 (
354 OBJGPU *pGpu,
355 KernelGmmu *pKernelGmmu,
356 NvU32 index,
357 NvP64 *pFaultBufferGet,
358 NvP64 *pFaultBufferPut,
359 NvP64 *pFaultBufferInfo,
360 NvP64 *pHubIntr,
361 NvP64 *pHubIntrEnSet,
362 NvP64 *pHubIntrEnClear,
363 NvU32 *faultMask,
364 NvP64 *pPrefetchCtrl
365 )
366 {
367 DEVICE_MAPPING *pMapping = gpuGetDeviceMapping(pGpu, DEVICE_INDEX_GPU, 0);
368 NvP64 bar0Mapping = NV_PTR_TO_NvP64(pMapping->gpuNvAddr);
369
370 NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT);
371
372 //
373 // If Hopper CC is not enabled or GSP doesn't entirely own the HW fault buffers
374 // use the Turing HAL
375 //
376 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
377 {
378 return kgmmuGetFaultRegisterMappings_TU102(pGpu, pKernelGmmu, index,
379 pFaultBufferGet, pFaultBufferPut,
380 pFaultBufferInfo, pHubIntr,
381 pHubIntrEnSet, pHubIntrEnClear,
382 faultMask, pPrefetchCtrl);
383 }
384
385 *pFaultBufferGet = 0;
386 *pFaultBufferInfo = 0;
387 *pHubIntr = 0;
388 *pHubIntrEnSet = 0;
389 *pHubIntrEnClear = 0;
390 *faultMask = 0;
391 *pPrefetchCtrl = 0;
392
393 //
394 // When Hopper CC is enabled, we repurpose the access counter registers to
395 // hold the PUT pointer of the shadow buffers. Only GSP-RM can write the
396 // PUT pointer to these PRIs. CPU has read-only access to these PRIs
397 //
398 if (index == REPLAYABLE_FAULT_BUFFER)
399 {
400 Intr *pIntr = GPU_GET_INTR(pGpu);
401 NvU32 intrVector = intrGetVectorFromEngineId(pGpu, pIntr, MC_ENGINE_IDX_REPLAYABLE_FAULT_CPU, NV_FALSE);
402 struct GMMU_FAULT_BUFFER *pFaultBuffer;
403 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuf;
404 FAULT_BUFFER_SHARED_MEMORY *pFaultBufSharedMem;
405 NvU32 leafReg;
406 NvU32 leafBit;
407
408 leafReg = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVector);
409 leafBit = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVector);
410
411 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF];
412 pClientShadowFaultBuf =
413 KERNEL_POINTER_FROM_NvP64(GMMU_CLIENT_SHADOW_FAULT_BUFFER *,
414 pFaultBuffer->pClientShadowFaultBuffer[index]);
415
416 pFaultBufSharedMem =
417 KERNEL_POINTER_FROM_NvP64(FAULT_BUFFER_SHARED_MEMORY *,
418 pClientShadowFaultBuf->pFaultBufferSharedMemoryAddress);
419
420 *pHubIntr = NvP64_PLUS_OFFSET(bar0Mapping,
421 GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF(leafReg)));
422 *pHubIntrEnSet = NvP64_PLUS_OFFSET(bar0Mapping,
423 GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_SET(leafReg)));
424 *pHubIntrEnClear = NvP64_PLUS_OFFSET(bar0Mapping,
425 GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_CLEAR(leafReg)));
426 *faultMask = NVBIT(leafBit);
427 *pFaultBufferGet = (NvU32*) &(pFaultBufSharedMem->swGetIndex);
428 *pFaultBufferPut = NvP64_PLUS_OFFSET(bar0Mapping,
429 GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_PRIV_ACCESS_COUNTER_NOTIFY_BUFFER_HI));
430 }
431 else if (index == NON_REPLAYABLE_FAULT_BUFFER)
432 {
433 *pFaultBufferPut = NvP64_PLUS_OFFSET(bar0Mapping,
434 GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_PRIV_ACCESS_COUNTER_NOTIFY_BUFFER_LO));
435 }
436 else
437 {
438 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
439 }
440
441 return NV_OK;
442 }
443
444 NV_STATUS
kgmmuFaultBufferAllocSharedMemory_GH100(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,FAULT_BUFFER_TYPE index)445 kgmmuFaultBufferAllocSharedMemory_GH100
446 (
447 OBJGPU *pGpu,
448 KernelGmmu *pKernelGmmu,
449 FAULT_BUFFER_TYPE index
450 )
451 {
452 NV_STATUS status;
453 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
454 MEMORY_DESCRIPTOR *pMemDesc;
455 NvU64 flags = MEMDESC_FLAGS_NONE;
456
457 if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED))
458 {
459 NV_PRINTF(LEVEL_ERROR, "Fault-Buffer is disabled. Flush Seq memory cannot be created\n");
460 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_STATE);
461 }
462
463 if (index != REPLAYABLE_FAULT_BUFFER)
464 {
465 return NV_OK;
466 }
467
468 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
469 {
470 return NV_OK;
471 }
472
473 //
474 // On systems with SEV enabled, the fault buffer flush sequence memory should be allocated
475 // in unprotected sysmem as GSP will be reading this location to check whether the Replayable buffer is full.
476 //
477 flags |= MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY;
478
479 pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index];
480 status = memdescCreate(&pMemDesc, pGpu,
481 sizeof(FAULT_BUFFER_SHARED_MEMORY), RM_PAGE_SIZE,
482 NV_FALSE, ADDR_SYSMEM, NV_MEMORY_UNCACHED,
483 flags);
484 if (status != NV_OK)
485 {
486 return status;
487 }
488
489 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_131,
490 pMemDesc);
491 if (status != NV_OK)
492 {
493 goto destroy_memdesc;
494 }
495
496 status = memdescMap(pMemDesc, 0,
497 memdescGetSize(pMemDesc),
498 NV_TRUE, NV_PROTECT_READ_WRITE,
499 &pClientShadowFaultBuffer->pFaultBufferSharedMemoryAddress,
500 &pClientShadowFaultBuffer->pFaultBufferSharedMemoryPriv);
501 if (status != NV_OK)
502 {
503 goto free_memory;
504 }
505
506 pClientShadowFaultBuffer->pFaultBufferSharedMemDesc = pMemDesc;
507
508 return NV_OK;
509
510 free_memory:
511 memdescFree(pMemDesc);
512
513 destroy_memdesc:
514 memdescDestroy(pMemDesc);
515
516 return status;
517 }
518
519 void
kgmmuFaultBufferFreeSharedMemory_GH100(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,FAULT_BUFFER_TYPE index)520 kgmmuFaultBufferFreeSharedMemory_GH100
521 (
522 OBJGPU *pGpu,
523 KernelGmmu *pKernelGmmu,
524 FAULT_BUFFER_TYPE index
525 )
526 {
527 MEMORY_DESCRIPTOR *pMemDesc;
528 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer;
529
530 if (index != REPLAYABLE_FAULT_BUFFER)
531 {
532 return;
533 }
534
535 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
536 {
537 return;
538 }
539
540 pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index];
541 pMemDesc = pClientShadowFaultBuffer->pFaultBufferSharedMemDesc;
542
543 memdescUnmap(pMemDesc,
544 NV_TRUE, osGetCurrentProcess(),
545 pClientShadowFaultBuffer->pFaultBufferSharedMemoryAddress,
546 pClientShadowFaultBuffer->pFaultBufferSharedMemoryPriv);
547
548 memdescFree(pMemDesc);
549 memdescDestroy(pMemDesc);
550 return;
551 }
552
553 /*
554 * @brief GSP client can use this function to initiate a replayable fault buffer flush when the
555 * HW fault buffer is owned by GSP.
556 */
557 NV_STATUS
kgmmuIssueReplayableFaultBufferFlush_GH100(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvBool bCopyAndFlush)558 kgmmuIssueReplayableFaultBufferFlush_GH100
559 (
560 OBJGPU *pGpu,
561 KernelGmmu *pKernelGmmu,
562 NvBool bCopyAndFlush
563 )
564 {
565 KernelGsp *pKernelGsp = GPU_GET_KERNEL_GSP(pGpu);
566 NvU32 arg = !!bCopyAndFlush;
567
568 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu) || !IS_GSP_CLIENT(pGpu))
569 {
570 return NV_ERR_NOT_SUPPORTED;
571 }
572
573 return kgspIssueNotifyOp_HAL(pGpu, pKernelGsp,
574 GSP_NOTIFY_OP_FLUSH_REPLAYABLE_FAULT_BUFFER_OPCODE,
575 &arg,
576 GSP_NOTIFY_OP_FLUSH_REPLAYABLE_FAULT_BUFFER_VALID_ARGC);
577 }
578
579 /*
580 * @brief The GSP client can use this function to toggle the prefetch ctrl register state.
581 * The write of the register will be performed by GSP.
582 *
583 * @param[in] pGpu OBJGPU pointer
584 * @param[in] pKernelGmmu KernelGmmu pointer
585 * @param[in] bEnable Enable/Disable fault on prefetch.
586 */
587 NV_STATUS
kgmmuToggleFaultOnPrefetch_GH100(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvBool bEnable)588 kgmmuToggleFaultOnPrefetch_GH100
589 (
590 OBJGPU *pGpu,
591 KernelGmmu *pKernelGmmu,
592 NvBool bEnable
593 )
594 {
595 KernelGsp *pKernelGsp = GPU_GET_KERNEL_GSP(pGpu);
596 NvU32 arg = !!bEnable;
597
598 if (!IS_GSP_CLIENT(pGpu))
599 {
600 return NV_ERR_NOT_SUPPORTED;
601 }
602
603 return kgspIssueNotifyOp_HAL(pGpu, pKernelGsp, GSP_NOTIFY_OP_TOGGLE_FAULT_ON_PREFETCH_OPCODE, &arg, 1 /* argc */);
604 }
605
606 /*
607 * @brief When Hopper Confidential Compute is enabled, the put index of the
608 * client replayable/non-replayable shadow buffers gets stored in the
609 * access counter PRIs. This function is used by Kernel RM to read the put index.
610 *
611 * @param[in] pGpu OBJGPU pointer
612 * @param[in] pKernelGmmu KernelGmmu pointer
613 * @param[in] type Replayable/Non-replayable fault buffer
614 *
615 * @returns NvU32
616 */
617 NvU32
kgmmuReadShadowBufPutIndex_GH100(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,FAULT_BUFFER_TYPE type)618 kgmmuReadShadowBufPutIndex_GH100
619 (
620 OBJGPU *pGpu,
621 KernelGmmu *pKernelGmmu,
622 FAULT_BUFFER_TYPE type
623 )
624 {
625 NvU32 val;
626 if (type == REPLAYABLE_FAULT_BUFFER)
627 {
628 val = GPU_VREG_RD32(pGpu, NV_VIRTUAL_FUNCTION_PRIV_REPLAYABLE_FAULT_SHADOW_BUFFER_PUT);
629 }
630 else
631 {
632 val = GPU_VREG_RD32(pGpu, NV_VIRTUAL_FUNCTION_PRIV_NON_REPLAYABLE_FAULT_SHADOW_BUFFER_PUT);
633 val = DRF_VAL(_VIRTUAL_FUNCTION_PRIV, _NON_REPLAYABLE_FAULT_SHADOW_BUFFER_PUT, _PTR, val);
634 }
635 return val;
636 }
637
638 /*!
639 * @brief Check if the given engineID is BAR1
640 *
641 * @param[in] pKernelGmmu KernelGmmu object
642 * @param[in] engineID Engine ID
643 *
644 * @return True if BAR1
645 */
646 NvBool
kgmmuIsFaultEngineBar1_GH100(KernelGmmu * pKernelGmmu,NvU32 engineID)647 kgmmuIsFaultEngineBar1_GH100
648 (
649 KernelGmmu *pKernelGmmu,
650 NvU32 engineID
651 )
652 {
653 return (engineID == NV_PFAULT_MMU_ENG_ID_BAR1);
654 }
655
656 /*!
657 * @brief Check if the given engineID is BAR2
658 *
659 * @param[in] pKernelGmmu KernelGmmu object
660 * @param[in] engineID Engine ID
661 *
662 * @return True if BAR2
663 */
664 NvBool
kgmmuIsFaultEngineBar2_GH100(KernelGmmu * pKernelGmmu,NvU32 engineID)665 kgmmuIsFaultEngineBar2_GH100
666 (
667 KernelGmmu *pKernelGmmu,
668 NvU32 engineID
669 )
670 {
671 return (engineID == NV_PFAULT_MMU_ENG_ID_BAR2);
672 }
673
674 /*!
675 * @brief Check if the given engineID is PHYSICAL
676 *
677 * @param[in] pKernelGmmu KernelGmmu object
678 * @param[in] engineID Engine ID
679 *
680 * @return True if PHYSICAL
681 */
682 NvBool
kgmmuIsFaultEnginePhysical_GH100(KernelGmmu * pKernelGmmu,NvU32 engineID)683 kgmmuIsFaultEnginePhysical_GH100
684 (
685 KernelGmmu *pKernelGmmu,
686 NvU32 engineID
687 )
688 {
689 return (engineID == NV_PFAULT_MMU_ENG_ID_PHYSICAL);
690 }
691
692 NvU32
kgmmuReadClientShadowBufPutIndex_GH100(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 gfid,FAULT_BUFFER_TYPE type)693 kgmmuReadClientShadowBufPutIndex_GH100
694 (
695 OBJGPU *pGpu,
696 KernelGmmu *pKernelGmmu,
697 NvU32 gfid,
698 FAULT_BUFFER_TYPE type
699 )
700 {
701 return 0;
702 }
703
704 void
kgmmuWriteClientShadowBufPutIndex_GH100(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU32 gfid,FAULT_BUFFER_TYPE type,NvU32 putIndex)705 kgmmuWriteClientShadowBufPutIndex_GH100
706 (
707 OBJGPU *pGpu,
708 KernelGmmu *pKernelGmmu,
709 NvU32 gfid,
710 FAULT_BUFFER_TYPE type,
711 NvU32 putIndex
712 )
713 {
714 }
715
716 /*
717 * @brief Copies a single fault packet from the replayable/non-replayable
718 * HW fault buffer to the corresponding client shadow buffer
719 *
720 * @param[in] pFaultBuffer Pointer to GMMU_FAULT_BUFFER
721 * @param[in] type Replayable/Non-replayable fault type
722 * @param[in] getIndex Get pointer of the HW fault buffer
723 * @param[in] shadowBufPutIndex Put pointer of the shadow buffer
724 * @param[in] maxBufferEntries Maximum possible entries in the HW buffer
725 * @param[in] pThreadState Pointer to THREAD_STATE_NODE
726 * @param[out] pFaultsCopied Number of fault packets copied by the function
727 *
728 * @returns NV_STATUS
729 */
730 NV_STATUS
kgmmuCopyFaultPacketToClientShadowBuffer_GH100(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,struct GMMU_FAULT_BUFFER * pFaultBuffer,FAULT_BUFFER_TYPE type,NvU32 getIndex,NvU32 shadowBufPutIndex,NvU32 maxBufferEntries,THREAD_STATE_NODE * pThreadState,NvU32 * pFaultsCopied)731 kgmmuCopyFaultPacketToClientShadowBuffer_GH100
732 (
733 OBJGPU *pGpu,
734 KernelGmmu *pKernelGmmu,
735 struct GMMU_FAULT_BUFFER *pFaultBuffer,
736 FAULT_BUFFER_TYPE type,
737 NvU32 getIndex,
738 NvU32 shadowBufPutIndex,
739 NvU32 maxBufferEntries,
740 THREAD_STATE_NODE *pThreadState,
741 NvU32 *pFaultsCopied
742 )
743 {
744 struct HW_FAULT_BUFFER *pHwFaultBuffer = NULL;
745 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuf = NULL;
746 GMMU_FAULT_PACKET faultPacket;
747 NvU32 faultPacketsPerPage;
748 NvU32 faultPacketPageIndex;
749 NvU32 faultPacketPageOffset;
750 void *pSrc;
751 NvU8 *pDst;
752 ConfidentialCompute *pConfCompute = GPU_GET_CONF_COMPUTE(pGpu);
753 NV_STATUS status;
754 NvU8 *pDstMetadata;
755 NvU32 metadataStartIndex;
756 NvU32 metadataPerPage;
757 NvU32 metadataPageIndex;
758 NvU32 metadataPageOffset;
759 NvU8 validBit = 1;
760 void *pCslCtx = NULL;
761
762 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu))
763 {
764 return kgmmuCopyFaultPacketToClientShadowBuffer_GV100(pGpu, pKernelGmmu,
765 pFaultBuffer,
766 type,
767 getIndex,
768 shadowBufPutIndex,
769 maxBufferEntries,
770 pThreadState,
771 pFaultsCopied);
772 }
773
774 *pFaultsCopied = 0;
775
776 pHwFaultBuffer = &pFaultBuffer->hwFaultBuffers[type];
777 pClientShadowFaultBuf = pFaultBuffer->pClientShadowFaultBuffer[type];
778
779 // Read the fault packet from HW buffer
780 pSrc = kgmmuFaultBufferGetFault_HAL(pGpu, pKernelGmmu, pHwFaultBuffer, getIndex);
781 portMemCopy(&faultPacket, sizeof(GMMU_FAULT_PACKET), pSrc, sizeof(GMMU_FAULT_PACKET));
782
783 //
784 // The following is the sequence to be followed for replayable faults
785 // as per production design when Hopper CC is enabled
786 //
787 if (type == REPLAYABLE_FAULT_BUFFER)
788 {
789 NvU32 nextGetIndex;
790
791 kgmmuFaultBufferClearPackets_HAL(pGpu, pKernelGmmu, pHwFaultBuffer, getIndex, 1);
792
793 //
794 // Ensure all writes to the current entry are completed before updating the
795 // GET pointer.
796 //
797 portAtomicMemoryFenceStore();
798
799 nextGetIndex = (getIndex + 1) % maxBufferEntries;
800
801 // Update cached GET to a valid value.
802 pHwFaultBuffer->cachedGetIndex = nextGetIndex;
803
804 // Increment the GET pointer to enable HW to write new fault packets
805 kgmmuWriteFaultBufferGetPtr_HAL(pGpu, pKernelGmmu, type, pHwFaultBuffer->cachedGetIndex, pThreadState);
806
807 // Check if there is space in the shadow buffer
808 if (kgmmuIsReplayableShadowFaultBufferFull_HAL(pGpu, pKernelGmmu,
809 pClientShadowFaultBuf,
810 shadowBufPutIndex,
811 maxBufferEntries))
812 {
813 // The design allows the SW Repalyable shadow fault buffer to overflow.
814 return NV_OK;
815 }
816 }
817
818 faultPacketsPerPage = RM_PAGE_SIZE / sizeof(GMMU_FAULT_PACKET);
819 faultPacketPageIndex = shadowBufPutIndex / faultPacketsPerPage;
820 faultPacketPageOffset = shadowBufPutIndex % faultPacketsPerPage;
821
822 pDst = KERNEL_POINTER_FROM_NvP64(NvU8 *,
823 pClientShadowFaultBuf->pBufferPages[faultPacketPageIndex].pAddress);
824 pDst += (faultPacketPageOffset * sizeof(GMMU_FAULT_PACKET));
825
826 //
827 // Metadata is packed at the end of the buffer.
828 // Calculate the page index and offset at which RM needs to fill the metadata
829 // and copy it over.
830 //
831 metadataStartIndex = pClientShadowFaultBuf->metadataStartIndex;
832 metadataPerPage = RM_PAGE_SIZE / sizeof(GMMU_FAULT_PACKET_METADATA);
833 metadataPageIndex = shadowBufPutIndex / metadataPerPage;
834 metadataPageOffset = shadowBufPutIndex % faultPacketsPerPage;
835
836 pDstMetadata = KERNEL_POINTER_FROM_NvP64(NvU8 *,
837 pClientShadowFaultBuf->pBufferPages[metadataStartIndex + metadataPageIndex].pAddress);
838 pDstMetadata += (metadataPageOffset * sizeof(GMMU_FAULT_PACKET_METADATA));
839
840 // Sanity check client reset the Valid bit.
841 if (pDstMetadata[GMMU_FAULT_PACKET_METADATA_VALID_IDX] != 0)
842 {
843 NV_PRINTF(LEVEL_ERROR, "Plaintext valid bit not reset by client.\n");
844 return NV_ERR_INVALID_STATE;
845 }
846
847 pCslCtx = kgmmuGetShadowFaultBufferCslContext(pGpu, pKernelGmmu, type);
848 if (pCslCtx == NULL)
849 {
850 NV_PRINTF(LEVEL_ERROR, "CSL context for type 0x%x unexpectedtly NULL\n", type);
851 return NV_ERR_INVALID_STATE;
852 }
853
854 status = ccslEncryptWithRotationChecks(pCslCtx,
855 sizeof(GMMU_FAULT_PACKET),
856 (NvU8*) &faultPacket,
857 &validBit,
858 GMMU_FAULT_PACKET_METADATA_VALID_SIZE,
859 pDst,
860 &pDstMetadata[GMMU_FAULT_PACKET_METADATA_AUTHTAG_IDX]);
861 if (status != NV_OK)
862 {
863 if (status == NV_ERR_INSUFFICIENT_RESOURCES)
864 {
865 // IV overflow is considered fatal.
866 NV_PRINTF(LEVEL_ERROR, "Fatal error detected in fault buffer packet encryption: IV overflow!\n");
867 confComputeSetErrorState(pGpu, pConfCompute);
868 }
869 else
870 {
871 NV_PRINTF(LEVEL_ERROR, "Error detected in fault buffer packet encryption: 0x%x\n", status);
872 }
873 return status;
874 }
875
876 //
877 // Ensure that the encrypted packet and authTag have reached point of coherence
878 // before writing the plaintext valid bit.
879 //
880 portAtomicMemoryFenceStore();
881
882 // Write the valid bit and increment the number of faults copied.
883 portMemCopy((void*)&pDstMetadata[GMMU_FAULT_PACKET_METADATA_VALID_IDX],
884 GMMU_FAULT_PACKET_METADATA_VALID_SIZE,
885 &validBit,
886 GMMU_FAULT_PACKET_METADATA_VALID_SIZE);
887
888 *pFaultsCopied = 1;
889
890 return NV_OK;
891 }
892
893 /*
894 * @brief Checks if the client shadow buffer has space
895 *
896 * @param[in] pClientShadowFaultBuf Pointer to the shadow buffer
897 * @param[in] shadowBufPutIndex Put index inside shadow buffer
898 * @param[in] maxBufferEntries Maximum possible entries in the HW buffer
899 *
900 * @returns NV_TRUE/NV_FALSE
901 */
902 NvBool
kgmmuIsReplayableShadowFaultBufferFull_GH100(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,GMMU_CLIENT_SHADOW_FAULT_BUFFER * pClientShadowFaultBuf,NvU32 shadowBufPutIndex,NvU32 maxBufferEntries)903 kgmmuIsReplayableShadowFaultBufferFull_GH100
904 (
905 OBJGPU *pGpu,
906 KernelGmmu *pKernelGmmu,
907 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuf,
908 NvU32 shadowBufPutIndex,
909 NvU32 maxBufferEntries
910 )
911 {
912 FAULT_BUFFER_SHARED_MEMORY *pFaultBufSharedMem;
913
914 pFaultBufSharedMem =
915 KERNEL_POINTER_FROM_NvP64(FAULT_BUFFER_SHARED_MEMORY *,
916 pClientShadowFaultBuf->pFaultBufferSharedMemoryAddress);
917
918 return (pFaultBufSharedMem->swGetIndex ==
919 ((shadowBufPutIndex + 1) % maxBufferEntries)) ? NV_TRUE : NV_FALSE;
920 }
921
922 /*!
923 * @brief Get the engine ID associated with the min CE
924 *
925 * @param[in] pKenrelGmmu KernelGmmu object
926 *
927 * return engine ID of the min CE
928 */
929 NvU32
kgmmuGetMinCeEngineId_GH100(KernelGmmu * pKernelGmmu)930 kgmmuGetMinCeEngineId_GH100
931 (
932 KernelGmmu *pKernelGmmu
933 )
934 {
935 return NV_PFAULT_MMU_ENG_ID_CE0;
936 }
937
938 /*!
939 * @brief Get the engine ID associated with the max CE
940 *
941 * @param[in] pGpu OBJGPU object
942 * @param[in] pKenrelGmmu KernelGmmu object
943 *
944 * return engine ID of the max CE
945 */
946 NvU32
kgmmuGetMaxCeEngineId_GH100(OBJGPU * pGpu,KernelGmmu * pKernelGmmu)947 kgmmuGetMaxCeEngineId_GH100
948 (
949 OBJGPU *pGpu,
950 KernelGmmu *pKernelGmmu
951 )
952 {
953 return NV_PFAULT_MMU_ENG_ID_CE9;
954 }
955
956 /**
957 * @brief Sign extend a fault address to a supported width as per UVM requirements
958 */
959 void
kgmmuSignExtendFaultAddress_GH100(OBJGPU * pGpu,KernelGmmu * pKernelGmmu,NvU64 * pMmuFaultAddress)960 kgmmuSignExtendFaultAddress_GH100
961 (
962 OBJGPU *pGpu,
963 KernelGmmu *pKernelGmmu,
964 NvU64 *pMmuFaultAddress
965 )
966 {
967 NvU32 cpuAddrShift = osGetCpuVaAddrShift();
968 NvU32 gpuVaAddrShift = portUtilCountTrailingZeros64(pKernelGmmu->maxVASize);
969
970 // Sign extend VA to ensure it's in canonical form if required
971 if (gpuVaAddrShift >= cpuAddrShift)
972 {
973 switch (pGpu->busInfo.oorArch)
974 {
975 case OOR_ARCH_X86_64:
976 case OOR_ARCH_ARM:
977 case OOR_ARCH_AARCH64:
978 *pMmuFaultAddress = (NvU64)(((NvS64)*pMmuFaultAddress << (64 - 57)) >>
979 (64 - 57));
980 break;
981 case OOR_ARCH_PPC64LE:
982 break;
983 case OOR_ARCH_NONE:
984 NV_ASSERT_FAILED("Invalid oor address mode type.");
985 break;
986 }
987 }
988 else
989 {
990 NV_PRINTF(LEVEL_ERROR, "UVM has not defined what to do here, doing nothing\n");
991 NV_ASSERT(0);
992 }
993 }
994