1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #define NVOC_KERN_GMMU_H_PRIVATE_ACCESS_ALLOWED 25 26 #include "gpu/mmu/kern_gmmu.h" 27 #include "gpu/mem_mgr/mem_mgr.h" 28 #include "vgpu/vgpu_events.h" 29 #include "nv_sriov_defines.h" 30 #include "kernel/gpu/intr/intr.h" 31 #include "kernel/gpu/gsp/kernel_gsp.h" 32 #include "kernel/gpu/conf_compute/ccsl.h" 33 34 #include "mmu/gmmu_fmt.h" 35 #include "published/hopper/gh100/dev_mmu.h" 36 #include "published/hopper/gh100/dev_fault.h" 37 #include "published/hopper/gh100/dev_vm.h" 38 #include "published/hopper/gh100/dev_vm_addendum.h" 39 40 /*! 41 * Check if a specific GMMU format version is supported. 42 */ 43 NvBool 44 kgmmuFmtIsVersionSupported_GH10X(KernelGmmu *pKernelGmmu, NvU32 version) 45 { 46 return (version == GMMU_FMT_VERSION_3); 47 } 48 49 /*! 50 * Initialize the GMMU format families. 51 */ 52 NV_STATUS 53 kgmmuFmtFamiliesInit_GH100(OBJGPU *pGpu, KernelGmmu* pKernelGmmu) 54 { 55 NvU32 i; 56 NvU32 pdePcfHw = 0; 57 NvU32 pdePcfSw = 0; 58 NvU32 ptePcfHw = 0; 59 NvU32 ptePcfSw = 0; 60 61 // Initialize the sparse encoding in the PDE PCF field for V3 62 GMMU_FMT_FAMILY *pFam = pKernelGmmu->pFmtFamilies[GMMU_FMT_VERSION_3 - 1]; 63 64 if (pFam != NULL) 65 { 66 // 1.Initialize sparsePde 67 pdePcfSw |= (1 << SW_MMU_PCF_SPARSE_IDX); 68 pdePcfSw |= (1 << SW_MMU_PCF_ATS_ALLOWED_IDX); 69 NV_ASSERT_OR_RETURN((kgmmuTranslatePdePcfFromSw_HAL(pKernelGmmu, pdePcfSw, &pdePcfHw) == NV_OK), 70 NV_ERR_INVALID_ARGUMENT); 71 gmmuFieldSetAperture(&pFam->pde.fldAperture, GMMU_APERTURE_INVALID, 72 pFam->sparsePde.v8); 73 nvFieldSet32(&pFam->pde.fldPdePcf, pdePcfHw, pFam->sparsePde.v8); 74 75 // 2.Initialize sparsePdeMulti 76 for (i = 0; i < MMU_FMT_MAX_SUB_LEVELS; ++i) 77 { 78 const GMMU_FMT_PDE *pPdeFmt = &pFam->pdeMulti.subLevels[i]; 79 gmmuFieldSetAperture(&pPdeFmt->fldAperture, GMMU_APERTURE_INVALID, 80 pFam->sparsePdeMulti.v8); 81 // Set PDE PCF sparse bit only for sub-level 0 for PdeMulti 82 if (i == 0) 83 { 84 nvFieldSet32(&pPdeFmt->fldPdePcf, pdePcfHw, pFam->sparsePdeMulti.v8); 85 } 86 } 87 88 // 3.Initialize nv4kPte 89 ptePcfSw |= (1 << SW_MMU_PCF_NV4K_IDX); 90 nvFieldSetBool(&pFam->pte.fldValid, NV_FALSE, pFam->nv4kPte.v8); 91 NV_ASSERT_OR_RETURN((kgmmuTranslatePtePcfFromSw_HAL(pKernelGmmu, ptePcfSw, &ptePcfHw) == NV_OK), 92 NV_ERR_INVALID_ARGUMENT); 93 nvFieldSet32(&pFam->pte.fldPtePcf, ptePcfHw, pFam->nv4kPte.v8); 94 95 // 4.Initialize sparsePte 96 ptePcfSw = (1 << SW_MMU_PCF_SPARSE_IDX); 97 nvFieldSetBool(&pFam->pte.fldValid, NV_FALSE, pFam->sparsePte.v8); 98 NV_ASSERT_OR_RETURN((kgmmuTranslatePtePcfFromSw_HAL(pKernelGmmu, ptePcfSw, &ptePcfHw) == NV_OK), 99 NV_ERR_INVALID_ARGUMENT); 100 nvFieldSet32(&pFam->pte.fldPtePcf, ptePcfHw, pFam->sparsePte.v8); 101 } 102 103 return NV_OK; 104 } 105 106 #define PTE_PCF_INVALID_LIST(fn) \ 107 fn(INVALID) \ 108 fn(NO_VALID_4KB_PAGE) \ 109 fn(SPARSE) \ 110 fn(MAPPING_NOWHERE) 111 112 #define PTE_PCF_VALID_LIST(fn) \ 113 fn(PRIVILEGE_RW_ATOMIC_CACHED_ACD) \ 114 fn(PRIVILEGE_RW_ATOMIC_CACHED_ACE) \ 115 fn(PRIVILEGE_RW_ATOMIC_UNCACHED_ACD) \ 116 fn(PRIVILEGE_RW_ATOMIC_UNCACHED_ACE) \ 117 fn(PRIVILEGE_RW_NO_ATOMIC_UNCACHED_ACE) \ 118 fn(PRIVILEGE_RW_NO_ATOMIC_CACHED_ACE) \ 119 fn(PRIVILEGE_RO_ATOMIC_UNCACHED_ACE) \ 120 fn(PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACE) \ 121 fn(PRIVILEGE_RO_NO_ATOMIC_CACHED_ACE) \ 122 fn(REGULAR_RW_ATOMIC_CACHED_ACD) \ 123 fn(REGULAR_RW_ATOMIC_CACHED_ACE) \ 124 fn(REGULAR_RW_ATOMIC_UNCACHED_ACD) \ 125 fn(REGULAR_RW_ATOMIC_UNCACHED_ACE) \ 126 fn(REGULAR_RW_NO_ATOMIC_CACHED_ACD) \ 127 fn(REGULAR_RW_NO_ATOMIC_CACHED_ACE) \ 128 fn(REGULAR_RW_NO_ATOMIC_UNCACHED_ACD) \ 129 fn(REGULAR_RW_NO_ATOMIC_UNCACHED_ACE) \ 130 fn(REGULAR_RO_ATOMIC_CACHED_ACD) \ 131 fn(REGULAR_RO_ATOMIC_CACHED_ACE) \ 132 fn(REGULAR_RO_ATOMIC_UNCACHED_ACD) \ 133 fn(REGULAR_RO_ATOMIC_UNCACHED_ACE) \ 134 fn(REGULAR_RO_NO_ATOMIC_CACHED_ACD) \ 135 fn(REGULAR_RO_NO_ATOMIC_CACHED_ACE) \ 136 fn(REGULAR_RO_NO_ATOMIC_UNCACHED_ACD) \ 137 fn(REGULAR_RO_NO_ATOMIC_UNCACHED_ACE) 138 139 #define PTE_PCF_HW_FROM_SW(name) \ 140 case (SW_MMU_PTE_PCF_##name): \ 141 { \ 142 *pPtePcfHw = NV_MMU_VER3_PTE_PCF_##name; \ 143 break; \ 144 } 145 146 #define PTE_PCF_SW_FROM_HW(name) \ 147 case (NV_MMU_VER3_PTE_PCF_##name): \ 148 { \ 149 *pPtePcfSw = SW_MMU_PTE_PCF_##name; \ 150 break; \ 151 } 152 153 // 154 // Takes a SW PTE PCF and translates to HW PTE PCF 155 // If bit patterns is not supported by HW, return NV_ERR_NOT_SUPPORTED 156 // 157 NV_STATUS 158 kgmmuTranslatePtePcfFromSw_GH100 159 ( 160 KernelGmmu *pKernelGmmu, 161 NvU32 ptePcfSw, 162 NvU32 *pPtePcfHw 163 ) 164 { 165 switch (ptePcfSw) 166 { 167 PTE_PCF_INVALID_LIST(PTE_PCF_HW_FROM_SW) 168 PTE_PCF_VALID_LIST(PTE_PCF_HW_FROM_SW) 169 170 default: 171 { 172 NV_PRINTF(LEVEL_ERROR, "Unsupported SW PTE PCF pattern requested : %x\n", ptePcfSw); 173 return NV_ERR_NOT_SUPPORTED; 174 } 175 } 176 177 return NV_OK; 178 } 179 180 NV_STATUS 181 kgmmuTranslatePtePcfFromHw_GH100 182 ( 183 KernelGmmu *pKernelGmmu, 184 NvU32 ptePcfHw, 185 NvBool bPteValid, 186 NvU32 *pPtePcfSw 187 ) 188 { 189 if (!bPteValid) 190 { 191 switch (ptePcfHw) 192 { 193 PTE_PCF_INVALID_LIST(PTE_PCF_SW_FROM_HW) 194 195 default: return NV_ERR_NOT_SUPPORTED; 196 } 197 } 198 else 199 { 200 switch (ptePcfHw) 201 { 202 PTE_PCF_VALID_LIST(PTE_PCF_SW_FROM_HW) 203 204 default: 205 { 206 NV_PRINTF(LEVEL_ERROR, "Unsupported HW PTE PCF pattern requested : %x\n", ptePcfHw); 207 return NV_ERR_NOT_SUPPORTED; 208 } 209 } 210 } 211 212 return NV_OK; 213 } 214 215 #define PDE_PCF_INVALID_LIST(fn) \ 216 fn(INVALID_ATS_ALLOWED) \ 217 fn(SPARSE_ATS_ALLOWED) \ 218 fn(INVALID_ATS_NOT_ALLOWED) \ 219 fn(SPARSE_ATS_NOT_ALLOWED) 220 221 #define PDE_PCF_VALID_LIST(fn) \ 222 fn(VALID_CACHED_ATS_ALLOWED) \ 223 fn(VALID_CACHED_ATS_NOT_ALLOWED) \ 224 fn(VALID_UNCACHED_ATS_ALLOWED) \ 225 fn(VALID_UNCACHED_ATS_NOT_ALLOWED) 226 227 #define PDE_PCF_HW_FROM_SW(name) \ 228 case (SW_MMU_PDE_PCF_##name): \ 229 { \ 230 *pPdePcfHw = NV_MMU_VER3_PDE_PCF_##name; \ 231 break; \ 232 } 233 234 #define PDE_PCF_SW_FROM_HW(name) \ 235 case (NV_MMU_VER3_PDE_PCF_##name): \ 236 { \ 237 *pPdePcfSw = SW_MMU_PDE_PCF_##name; \ 238 break; \ 239 } 240 241 // 242 // Takes a SW PDE PCF and translates to HW PDE PCF 243 // If a bit pattern is not supported by HW, return NV_ERR_NOT_SUPPORTED 244 // 245 NV_STATUS 246 kgmmuTranslatePdePcfFromSw_GH100 247 ( 248 KernelGmmu *pKernelGmmu, 249 NvU32 pdePcfSw, 250 NvU32 *pPdePcfHw 251 ) 252 { 253 switch (pdePcfSw) 254 { 255 PDE_PCF_INVALID_LIST(PDE_PCF_HW_FROM_SW) 256 PDE_PCF_VALID_LIST(PDE_PCF_HW_FROM_SW) 257 258 default: return NV_ERR_NOT_SUPPORTED; 259 } 260 261 return NV_OK; 262 } 263 264 // 265 // Takes a HW PDE PCF and translates to SW PDE PCF 266 // If a bit pattern is not supported by SW, return NV_ERR_NOT_SUPPORTED 267 // 268 NV_STATUS 269 kgmmuTranslatePdePcfFromHw_GH100 270 ( 271 KernelGmmu *pKernelGmmu, 272 NvU32 pdePcfHw, 273 GMMU_APERTURE aperture, 274 NvU32 *pPdePcfSw 275 ) 276 { 277 if (aperture == GMMU_APERTURE_INVALID) 278 { 279 switch (pdePcfHw) 280 { 281 PDE_PCF_INVALID_LIST(PDE_PCF_SW_FROM_HW) 282 283 default: return NV_ERR_NOT_SUPPORTED; 284 } 285 } 286 else 287 { 288 switch (pdePcfHw) 289 { 290 PDE_PCF_VALID_LIST(PDE_PCF_SW_FROM_HW) 291 292 default: return NV_ERR_NOT_SUPPORTED; 293 } 294 } 295 296 return NV_OK; 297 } 298 299 /* 300 * @brief Validates fabric base address. 301 * 302 * @param pKernelGmmu 303 * @param fabricBaseAddr 304 * 305 * @returns On success, NV_OK. 306 * On failure, returns NV_ERR_XXX. 307 */ 308 NV_STATUS 309 kgmmuValidateFabricBaseAddress_GH100 310 ( 311 KernelGmmu *pKernelGmmu, 312 NvU64 fabricBaseAddr 313 ) 314 { 315 OBJGPU *pGpu = ENG_GET_GPU(pKernelGmmu); 316 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 317 NvU64 fbSizeBytes; 318 319 fbSizeBytes = pMemoryManager->Ram.fbTotalMemSizeMb << 20; 320 321 // 322 // Hopper SKUs will be paired with NVSwitches (Laguna Seca) supporting 2K 323 // mapslots that can cover 512GB each. Make sure that the fabric base 324 // address being used is valid to cover whole frame buffer. 325 // 326 327 // Check if fabric address is aligned to mapslot size. 328 if (fabricBaseAddr & (NVBIT64(39) - 1)) 329 { 330 return NV_ERR_INVALID_ARGUMENT; 331 } 332 333 // Align fbSize to mapslot size. 334 fbSizeBytes = RM_ALIGN_UP(fbSizeBytes, NVBIT64(39)); 335 336 return NV_OK; 337 } 338 339 /*! 340 * @brief Get the engine ID associated with the Graphics Engine 341 */ 342 NvU32 343 kgmmuGetGraphicsEngineId_GH100 344 ( 345 KernelGmmu *pKernelGmmu 346 ) 347 { 348 return NV_PFAULT_MMU_ENG_ID_GRAPHICS; 349 } 350 351 NV_STATUS 352 kgmmuGetFaultRegisterMappings_GH100 353 ( 354 OBJGPU *pGpu, 355 KernelGmmu *pKernelGmmu, 356 NvU32 index, 357 NvP64 *pFaultBufferGet, 358 NvP64 *pFaultBufferPut, 359 NvP64 *pFaultBufferInfo, 360 NvP64 *pHubIntr, 361 NvP64 *pHubIntrEnSet, 362 NvP64 *pHubIntrEnClear, 363 NvU32 *faultMask, 364 NvP64 *pPrefetchCtrl 365 ) 366 { 367 DEVICE_MAPPING *pMapping = gpuGetDeviceMapping(pGpu, DEVICE_INDEX_GPU, 0); 368 NvP64 bar0Mapping = NV_PTR_TO_NvP64(pMapping->gpuNvAddr); 369 370 NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT); 371 372 // 373 // If Hopper CC is not enabled or GSP doesn't entirely own the HW fault buffers 374 // use the Turing HAL 375 // 376 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 377 { 378 return kgmmuGetFaultRegisterMappings_TU102(pGpu, pKernelGmmu, index, 379 pFaultBufferGet, pFaultBufferPut, 380 pFaultBufferInfo, pHubIntr, 381 pHubIntrEnSet, pHubIntrEnClear, 382 faultMask, pPrefetchCtrl); 383 } 384 385 *pFaultBufferGet = 0; 386 *pFaultBufferInfo = 0; 387 *pHubIntr = 0; 388 *pHubIntrEnSet = 0; 389 *pHubIntrEnClear = 0; 390 *faultMask = 0; 391 *pPrefetchCtrl = 0; 392 393 // 394 // When Hopper CC is enabled, we repurpose the access counter registers to 395 // hold the PUT pointer of the shadow buffers. Only GSP-RM can write the 396 // PUT pointer to these PRIs. CPU has read-only access to these PRIs 397 // 398 if (index == REPLAYABLE_FAULT_BUFFER) 399 { 400 Intr *pIntr = GPU_GET_INTR(pGpu); 401 NvU32 intrVector = intrGetVectorFromEngineId(pGpu, pIntr, MC_ENGINE_IDX_REPLAYABLE_FAULT_CPU, NV_FALSE); 402 struct GMMU_FAULT_BUFFER *pFaultBuffer; 403 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuf; 404 FAULT_BUFFER_SHARED_MEMORY *pFaultBufSharedMem; 405 NvU32 leafReg; 406 NvU32 leafBit; 407 408 leafReg = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(intrVector); 409 leafBit = NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_BIT(intrVector); 410 411 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF]; 412 pClientShadowFaultBuf = 413 KERNEL_POINTER_FROM_NvP64(GMMU_CLIENT_SHADOW_FAULT_BUFFER *, 414 pFaultBuffer->pClientShadowFaultBuffer[index]); 415 416 pFaultBufSharedMem = 417 KERNEL_POINTER_FROM_NvP64(FAULT_BUFFER_SHARED_MEMORY *, 418 pClientShadowFaultBuf->pFaultBufferSharedMemoryAddress); 419 420 *pHubIntr = NvP64_PLUS_OFFSET(bar0Mapping, 421 GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF(leafReg))); 422 *pHubIntrEnSet = NvP64_PLUS_OFFSET(bar0Mapping, 423 GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_SET(leafReg))); 424 *pHubIntrEnClear = NvP64_PLUS_OFFSET(bar0Mapping, 425 GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_PRIV_CPU_INTR_LEAF_EN_CLEAR(leafReg))); 426 *faultMask = NVBIT(leafBit); 427 *pFaultBufferGet = (NvU32*) &(pFaultBufSharedMem->swGetIndex); 428 *pFaultBufferPut = NvP64_PLUS_OFFSET(bar0Mapping, 429 GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_PRIV_ACCESS_COUNTER_NOTIFY_BUFFER_HI)); 430 } 431 else if (index == NON_REPLAYABLE_FAULT_BUFFER) 432 { 433 *pFaultBufferPut = NvP64_PLUS_OFFSET(bar0Mapping, 434 GPU_GET_VREG_OFFSET(pGpu, NV_VIRTUAL_FUNCTION_PRIV_ACCESS_COUNTER_NOTIFY_BUFFER_LO)); 435 } 436 else 437 { 438 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT); 439 } 440 441 return NV_OK; 442 } 443 444 NV_STATUS 445 kgmmuFaultBufferAllocSharedMemory_GH100 446 ( 447 OBJGPU *pGpu, 448 KernelGmmu *pKernelGmmu, 449 FAULT_BUFFER_TYPE index 450 ) 451 { 452 NV_STATUS status; 453 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer; 454 MEMORY_DESCRIPTOR *pMemDesc; 455 NvU64 flags = MEMDESC_FLAGS_NONE; 456 457 if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED)) 458 { 459 NV_PRINTF(LEVEL_ERROR, "Fault-Buffer is disabled. Flush Seq memory cannot be created\n"); 460 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_STATE); 461 } 462 463 if (index != REPLAYABLE_FAULT_BUFFER) 464 { 465 return NV_OK; 466 } 467 468 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 469 { 470 return NV_OK; 471 } 472 473 // 474 // On systems with SEV enabled, the fault buffer flush sequence memory should be allocated 475 // in unprotected sysmem as GSP will be reading this location to check whether the Replayable buffer is full. 476 // 477 flags |= MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY; 478 479 pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index]; 480 status = memdescCreate(&pMemDesc, pGpu, 481 sizeof(FAULT_BUFFER_SHARED_MEMORY), RM_PAGE_SIZE, 482 NV_FALSE, ADDR_SYSMEM, NV_MEMORY_UNCACHED, 483 flags); 484 if (status != NV_OK) 485 { 486 return status; 487 } 488 489 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_131, 490 pMemDesc); 491 if (status != NV_OK) 492 { 493 goto destroy_memdesc; 494 } 495 496 status = memdescMap(pMemDesc, 0, 497 memdescGetSize(pMemDesc), 498 NV_TRUE, NV_PROTECT_READ_WRITE, 499 &pClientShadowFaultBuffer->pFaultBufferSharedMemoryAddress, 500 &pClientShadowFaultBuffer->pFaultBufferSharedMemoryPriv); 501 if (status != NV_OK) 502 { 503 goto free_memory; 504 } 505 506 pClientShadowFaultBuffer->pFaultBufferSharedMemDesc = pMemDesc; 507 508 return NV_OK; 509 510 free_memory: 511 memdescFree(pMemDesc); 512 513 destroy_memdesc: 514 memdescDestroy(pMemDesc); 515 516 return status; 517 } 518 519 void 520 kgmmuFaultBufferFreeSharedMemory_GH100 521 ( 522 OBJGPU *pGpu, 523 KernelGmmu *pKernelGmmu, 524 FAULT_BUFFER_TYPE index 525 ) 526 { 527 MEMORY_DESCRIPTOR *pMemDesc; 528 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer; 529 530 if (index != REPLAYABLE_FAULT_BUFFER) 531 { 532 return; 533 } 534 535 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 536 { 537 return; 538 } 539 540 pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index]; 541 pMemDesc = pClientShadowFaultBuffer->pFaultBufferSharedMemDesc; 542 543 memdescUnmap(pMemDesc, 544 NV_TRUE, osGetCurrentProcess(), 545 pClientShadowFaultBuffer->pFaultBufferSharedMemoryAddress, 546 pClientShadowFaultBuffer->pFaultBufferSharedMemoryPriv); 547 548 memdescFree(pMemDesc); 549 memdescDestroy(pMemDesc); 550 return; 551 } 552 553 /* 554 * @brief GSP client can use this function to initiate a replayable fault buffer flush when the 555 * HW fault buffer is owned by GSP. 556 */ 557 NV_STATUS 558 kgmmuIssueReplayableFaultBufferFlush_GH100 559 ( 560 OBJGPU *pGpu, 561 KernelGmmu *pKernelGmmu 562 ) 563 { 564 KernelGsp *pKernelGsp = GPU_GET_KERNEL_GSP(pGpu); 565 566 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu) || !IS_GSP_CLIENT(pGpu)) 567 { 568 return NV_ERR_NOT_SUPPORTED; 569 } 570 571 return kgspIssueNotifyOp_HAL(pGpu, pKernelGsp, GSP_NOTIFY_OP_FLUSH_REPLAYABLE_FAULT_BUFFER_OPCODE, NULL, 0); 572 } 573 574 /* 575 * @brief The GSP client can use this function to toggle the prefetch ctrl register state. 576 * The write of the register will be performed by GSP. 577 * 578 * @param[in] pGpu OBJGPU pointer 579 * @param[in] pKernelGmmu KernelGmmu pointer 580 * @param[in] bEnable Enable/Disable fault on prefetch. 581 */ 582 NV_STATUS 583 kgmmuToggleFaultOnPrefetch_GH100 584 ( 585 OBJGPU *pGpu, 586 KernelGmmu *pKernelGmmu, 587 NvBool bEnable 588 ) 589 { 590 KernelGsp *pKernelGsp = GPU_GET_KERNEL_GSP(pGpu); 591 NvU32 arg = !!bEnable; 592 593 if (!IS_GSP_CLIENT(pGpu)) 594 { 595 return NV_ERR_NOT_SUPPORTED; 596 } 597 598 return kgspIssueNotifyOp_HAL(pGpu, pKernelGsp, GSP_NOTIFY_OP_TOGGLE_FAULT_ON_PREFETCH_OPCODE, &arg, 1 /* argc */); 599 } 600 601 /* 602 * @brief When Hopper Confidential Compute is enabled, the put index of the 603 * client replayable/non-replayable shadow buffers gets stored in the 604 * access counter PRIs. This function is used by Kernel RM to read the put index. 605 * 606 * @param[in] pGpu OBJGPU pointer 607 * @param[in] pKernelGmmu KernelGmmu pointer 608 * @param[in] type Replayable/Non-replayable fault buffer 609 * 610 * @returns NvU32 611 */ 612 NvU32 613 kgmmuReadShadowBufPutIndex_GH100 614 ( 615 OBJGPU *pGpu, 616 KernelGmmu *pKernelGmmu, 617 FAULT_BUFFER_TYPE type 618 ) 619 { 620 NvU32 val; 621 if (type == REPLAYABLE_FAULT_BUFFER) 622 { 623 val = GPU_VREG_RD32(pGpu, NV_VIRTUAL_FUNCTION_PRIV_REPLAYABLE_FAULT_SHADOW_BUFFER_PUT); 624 } 625 else 626 { 627 val = GPU_VREG_RD32(pGpu, NV_VIRTUAL_FUNCTION_PRIV_NON_REPLAYABLE_FAULT_SHADOW_BUFFER_PUT); 628 val = DRF_VAL(_VIRTUAL_FUNCTION_PRIV, _NON_REPLAYABLE_FAULT_SHADOW_BUFFER_PUT, _PTR, val); 629 } 630 return val; 631 } 632 633 /*! 634 * @brief Check if the given engineID is BAR1 635 * 636 * @param[in] pKernelGmmu KernelGmmu object 637 * @param[in] engineID Engine ID 638 * 639 * @return True if BAR1 640 */ 641 NvBool 642 kgmmuIsFaultEngineBar1_GH100 643 ( 644 KernelGmmu *pKernelGmmu, 645 NvU32 engineID 646 ) 647 { 648 return (engineID == NV_PFAULT_MMU_ENG_ID_BAR1); 649 } 650 651 /*! 652 * @brief Check if the given engineID is BAR2 653 * 654 * @param[in] pKernelGmmu KernelGmmu object 655 * @param[in] engineID Engine ID 656 * 657 * @return True if BAR2 658 */ 659 NvBool 660 kgmmuIsFaultEngineBar2_GH100 661 ( 662 KernelGmmu *pKernelGmmu, 663 NvU32 engineID 664 ) 665 { 666 return (engineID == NV_PFAULT_MMU_ENG_ID_BAR2); 667 } 668 669 /*! 670 * @brief Check if the given engineID is PHYSICAL 671 * 672 * @param[in] pKernelGmmu KernelGmmu object 673 * @param[in] engineID Engine ID 674 * 675 * @return True if PHYSICAL 676 */ 677 NvBool 678 kgmmuIsFaultEnginePhysical_GH100 679 ( 680 KernelGmmu *pKernelGmmu, 681 NvU32 engineID 682 ) 683 { 684 return (engineID == NV_PFAULT_MMU_ENG_ID_PHYSICAL); 685 } 686 687 NvU32 688 kgmmuReadClientShadowBufPutIndex_GH100 689 ( 690 OBJGPU *pGpu, 691 KernelGmmu *pKernelGmmu, 692 NvU32 gfid, 693 FAULT_BUFFER_TYPE type 694 ) 695 { 696 return 0; 697 } 698 699 void 700 kgmmuWriteClientShadowBufPutIndex_GH100 701 ( 702 OBJGPU *pGpu, 703 KernelGmmu *pKernelGmmu, 704 NvU32 gfid, 705 FAULT_BUFFER_TYPE type, 706 NvU32 putIndex 707 ) 708 { 709 } 710 711 /* 712 * @brief Copies a single fault packet from the replayable/non-replayable 713 * HW fault buffer to the corresponding client shadow buffer 714 * 715 * @param[in] pFaultBuffer Pointer to GMMU_FAULT_BUFFER 716 * @param[in] type Replayable/Non-replayable fault type 717 * @param[in] getIndex Get pointer of the HW fault buffer 718 * @param[in] shadowBufPutIndex Put pointer of the shadow buffer 719 * @param[in] maxBufferEntries Maximum possible entries in the HW buffer 720 * @param[in] pThreadState Pointer to THREAD_STATE_NODE 721 * @param[out] pFaultsCopied Number of fault packets copied by the function 722 * 723 * @returns NV_STATUS 724 */ 725 NV_STATUS 726 kgmmuCopyFaultPacketToClientShadowBuffer_GH100 727 ( 728 OBJGPU *pGpu, 729 KernelGmmu *pKernelGmmu, 730 struct GMMU_FAULT_BUFFER *pFaultBuffer, 731 FAULT_BUFFER_TYPE type, 732 NvU32 getIndex, 733 NvU32 shadowBufPutIndex, 734 NvU32 maxBufferEntries, 735 THREAD_STATE_NODE *pThreadState, 736 NvU32 *pFaultsCopied 737 ) 738 { 739 struct HW_FAULT_BUFFER *pHwFaultBuffer = NULL; 740 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuf = NULL; 741 GMMU_FAULT_PACKET faultPacket; 742 NvU32 faultPacketsPerPage; 743 NvU32 faultPacketPageIndex; 744 NvU32 faultPacketPageOffset; 745 void *pSrc; 746 NvU8 *pDst; 747 NV_STATUS status; 748 NvU8 *pDstMetadata; 749 NvU32 metadataStartIndex; 750 NvU32 metadataPerPage; 751 NvU32 metadataPageIndex; 752 NvU32 metadataPageOffset; 753 NvU8 validBit = 1; 754 void *pCslCtx = NULL; 755 756 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 757 { 758 return kgmmuCopyFaultPacketToClientShadowBuffer_GV100(pGpu, pKernelGmmu, 759 pFaultBuffer, 760 type, 761 getIndex, 762 shadowBufPutIndex, 763 maxBufferEntries, 764 pThreadState, 765 pFaultsCopied); 766 } 767 768 *pFaultsCopied = 0; 769 770 pHwFaultBuffer = &pFaultBuffer->hwFaultBuffers[type]; 771 pClientShadowFaultBuf = pFaultBuffer->pClientShadowFaultBuffer[type]; 772 773 // Read the fault packet from HW buffer 774 pSrc = kgmmuFaultBufferGetFault_HAL(pGpu, pKernelGmmu, pHwFaultBuffer, getIndex); 775 portMemCopy(&faultPacket, sizeof(GMMU_FAULT_PACKET), pSrc, sizeof(GMMU_FAULT_PACKET)); 776 777 // 778 // The following is the sequence to be followed for replayable faults 779 // as per production design when Hopper CC is enabled 780 // 781 if (type == REPLAYABLE_FAULT_BUFFER) 782 { 783 NvU32 nextGetIndex; 784 785 kgmmuFaultBufferClearPackets_HAL(pGpu, pKernelGmmu, pHwFaultBuffer, getIndex, 1); 786 787 // 788 // Ensure all writes to the current entry are completed before updating the 789 // GET pointer. 790 // 791 portAtomicMemoryFenceStore(); 792 793 nextGetIndex = (getIndex + 1) % maxBufferEntries; 794 795 // Update cached GET to a valid value. 796 pHwFaultBuffer->cachedGetIndex = nextGetIndex; 797 798 // Increment the GET pointer to enable HW to write new fault packets 799 kgmmuWriteFaultBufferGetPtr_HAL(pGpu, pKernelGmmu, type, pHwFaultBuffer->cachedGetIndex, pThreadState); 800 801 // Check if there is space in the shadow buffer 802 if (kgmmuIsReplayableShadowFaultBufferFull_HAL(pGpu, pKernelGmmu, 803 pClientShadowFaultBuf, 804 shadowBufPutIndex, 805 maxBufferEntries)) 806 { 807 // The design allows the SW Repalyable shadow fault buffer to overflow. 808 return NV_OK; 809 } 810 } 811 812 faultPacketsPerPage = RM_PAGE_SIZE / sizeof(GMMU_FAULT_PACKET); 813 faultPacketPageIndex = shadowBufPutIndex / faultPacketsPerPage; 814 faultPacketPageOffset = shadowBufPutIndex % faultPacketsPerPage; 815 816 pDst = KERNEL_POINTER_FROM_NvP64(NvU8 *, 817 pClientShadowFaultBuf->pBufferPages[faultPacketPageIndex].pAddress); 818 pDst += (faultPacketPageOffset * sizeof(GMMU_FAULT_PACKET)); 819 820 // 821 // Metadata is packed at the end of the buffer. 822 // Calculate the page index and offset at which RM needs to fill the metadata 823 // and copy it over. 824 // 825 metadataStartIndex = pClientShadowFaultBuf->metadataStartIndex; 826 metadataPerPage = RM_PAGE_SIZE / sizeof(GMMU_FAULT_PACKET_METADATA); 827 metadataPageIndex = shadowBufPutIndex / metadataPerPage; 828 metadataPageOffset = shadowBufPutIndex % faultPacketsPerPage; 829 830 pDstMetadata = KERNEL_POINTER_FROM_NvP64(NvU8 *, 831 pClientShadowFaultBuf->pBufferPages[metadataStartIndex + metadataPageIndex].pAddress); 832 pDstMetadata += (metadataPageOffset * sizeof(GMMU_FAULT_PACKET_METADATA)); 833 834 // Sanity check client reset the Valid bit. 835 if (pDstMetadata[GMMU_FAULT_PACKET_METADATA_VALID_IDX] != 0) 836 { 837 NV_PRINTF(LEVEL_ERROR, "Plaintext valid bit not reset by client.\n"); 838 return NV_ERR_INVALID_STATE; 839 } 840 841 pCslCtx = kgmmuGetShadowFaultBufferCslContext(pGpu, pKernelGmmu, type); 842 if (pCslCtx == NULL) 843 { 844 NV_PRINTF(LEVEL_ERROR, "CSL context for type 0x%x unexpectedtly NULL\n", type); 845 return NV_ERR_INVALID_STATE; 846 } 847 848 status = ccslEncrypt(pCslCtx, 849 sizeof(GMMU_FAULT_PACKET), 850 (NvU8*) &faultPacket, 851 &validBit, 852 GMMU_FAULT_PACKET_METADATA_VALID_SIZE, 853 pDst, 854 &pDstMetadata[GMMU_FAULT_PACKET_METADATA_AUTHTAG_IDX]); 855 if (status != NV_OK) 856 { 857 if (status == NV_ERR_INSUFFICIENT_RESOURCES) 858 { 859 // IV overflow is considered fatal. 860 NV_PRINTF(LEVEL_ERROR, "Fatal error detected in fault buffer packet encryption: IV overflow!\n"); 861 confComputeSetErrorState(pGpu, GPU_GET_CONF_COMPUTE(pGpu)); 862 } 863 else 864 { 865 NV_PRINTF(LEVEL_ERROR, "Error detected in fault buffer packet encryption: 0x%x\n", status); 866 } 867 return status; 868 } 869 870 // 871 // Ensure that the encrypted packet and authTag have reached point of coherence 872 // before writing the plaintext valid bit. 873 // 874 portAtomicMemoryFenceStore(); 875 876 // Write the valid bit and increment the number of faults copied. 877 portMemCopy((void*)&pDstMetadata[GMMU_FAULT_PACKET_METADATA_VALID_IDX], 878 GMMU_FAULT_PACKET_METADATA_VALID_SIZE, 879 &validBit, 880 GMMU_FAULT_PACKET_METADATA_VALID_SIZE); 881 882 *pFaultsCopied = 1; 883 884 return NV_OK; 885 } 886 887 /* 888 * @brief Checks if the client shadow buffer has space 889 * 890 * @param[in] pClientShadowFaultBuf Pointer to the shadow buffer 891 * @param[in] shadowBufPutIndex Put index inside shadow buffer 892 * @param[in] maxBufferEntries Maximum possible entries in the HW buffer 893 * 894 * @returns NV_TRUE/NV_FALSE 895 */ 896 NvBool 897 kgmmuIsReplayableShadowFaultBufferFull_GH100 898 ( 899 OBJGPU *pGpu, 900 KernelGmmu *pKernelGmmu, 901 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuf, 902 NvU32 shadowBufPutIndex, 903 NvU32 maxBufferEntries 904 ) 905 { 906 FAULT_BUFFER_SHARED_MEMORY *pFaultBufSharedMem; 907 908 pFaultBufSharedMem = 909 KERNEL_POINTER_FROM_NvP64(FAULT_BUFFER_SHARED_MEMORY *, 910 pClientShadowFaultBuf->pFaultBufferSharedMemoryAddress); 911 912 return (pFaultBufSharedMem->swGetIndex == 913 ((shadowBufPutIndex + 1) % maxBufferEntries)) ? NV_TRUE : NV_FALSE; 914 } 915 916 /*! 917 * @brief Get the engine ID associated with the min CE 918 * 919 * @param[in] pKenrelGmmu KernelGmmu object 920 * 921 * return engine ID of the min CE 922 */ 923 NvU32 924 kgmmuGetMinCeEngineId_GH100 925 ( 926 KernelGmmu *pKernelGmmu 927 ) 928 { 929 return NV_PFAULT_MMU_ENG_ID_CE0; 930 } 931 932 /*! 933 * @brief Get the engine ID associated with the max CE 934 * 935 * @param[in] pGpu OBJGPU object 936 * @param[in] pKenrelGmmu KernelGmmu object 937 * 938 * return engine ID of the max CE 939 */ 940 NvU32 941 kgmmuGetMaxCeEngineId_GH100 942 ( 943 OBJGPU *pGpu, 944 KernelGmmu *pKernelGmmu 945 ) 946 { 947 return NV_PFAULT_MMU_ENG_ID_CE9; 948 } 949 950 /** 951 * @brief Sign extend a fault address to a supported width as per UVM requirements 952 */ 953 void 954 kgmmuSignExtendFaultAddress_GH100 955 ( 956 OBJGPU *pGpu, 957 KernelGmmu *pKernelGmmu, 958 NvU64 *pMmuFaultAddress 959 ) 960 { 961 NvU32 cpuAddrShift = osGetCpuVaAddrShift(); 962 NvU32 gpuVaAddrShift = portUtilCountTrailingZeros64(pKernelGmmu->maxVASize); 963 964 // Sign extend VA to ensure it's in canonical form if required 965 if (gpuVaAddrShift >= cpuAddrShift) 966 { 967 switch (pGpu->busInfo.oorArch) 968 { 969 case OOR_ARCH_X86_64: 970 case OOR_ARCH_ARM: 971 case OOR_ARCH_AARCH64: 972 *pMmuFaultAddress = (NvU64)(((NvS64)*pMmuFaultAddress << (64 - 57)) >> 973 (64 - 57)); 974 break; 975 case OOR_ARCH_PPC64LE: 976 break; 977 case OOR_ARCH_NONE: 978 NV_ASSERT_FAILED("Invalid oor address mode type."); 979 break; 980 } 981 } 982 else 983 { 984 NV_PRINTF(LEVEL_ERROR, "UVM has not defined what to do here, doing nothing\n"); 985 NV_ASSERT(0); 986 } 987 } 988