1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /****************************************************************************** 25 * 26 * Kernel GMMU module header 27 * Defines and structures used on CPU RM for the GMMU object. 28 * 29 ******************************************************************************/ 30 31 #define NVOC_KERN_GMMU_H_PRIVATE_ACCESS_ALLOWED 32 33 #include "gpu/bif/kernel_bif.h" 34 #include "gpu/mmu/kern_gmmu.h" 35 #include "gpu/bus/kern_bus.h" 36 #include "gpu/nvlink/kernel_nvlink.h" 37 #include "gpu/mem_sys/kern_mem_sys.h" 38 #include "gpu/mem_mgr/mem_mgr.h" 39 #include "vgpu/vgpu_events.h" 40 #include "gpu/mem_mgr/mem_desc.h" 41 #include "os/os.h" 42 #include "rmapi/rmapi.h" 43 #include "gpu/gpu.h" 44 #include "nvRmReg.h" 45 #include "vgpu/rpc.h" 46 #include "kernel/gpu/intr/engine_idx.h" 47 48 #include "kernel/gpu/conf_compute/ccsl.h" 49 50 static void _kgmmuInitRegistryOverrides(OBJGPU *pGpu, KernelGmmu *pKernelGmmu); 51 52 /*! 53 * KERNEL_GMMU constructor 54 * 55 * @param[in] pGpu 56 * @param[in] pKernelGmmu 57 * @param[in] engDesc Engine descriptor 58 * 59 * @return NV_OK on success, pertinent error code on failure. 60 */ 61 NV_STATUS 62 kgmmuConstructEngine_IMPL(OBJGPU *pGpu, KernelGmmu *pKernelGmmu, ENGDESCRIPTOR engDesc) 63 { 64 NvU32 v; 65 66 kgmmuDetermineMaxVASize_HAL(pGpu, pKernelGmmu); 67 68 if (gpuIsCacheOnlyModeEnabled(pGpu)) 69 { 70 pKernelGmmu->bHugePageSupported = NV_FALSE; 71 pKernelGmmu->bPageSize512mbSupported = NV_FALSE; 72 } 73 74 // Allocate and init MMU format families. 75 kgmmuFmtInitPdeApertures_HAL(pKernelGmmu, pKernelGmmu->pdeApertures); 76 kgmmuFmtInitPteApertures_HAL(pKernelGmmu, pKernelGmmu->pteApertures); 77 78 for (v = 0; v < GMMU_FMT_MAX_VERSION_COUNT; ++v) 79 { 80 const NvU32 ver = g_gmmuFmtVersions[v]; 81 if (kgmmuFmtIsVersionSupported_HAL(pKernelGmmu, ver)) 82 { 83 GMMU_FMT_FAMILY *pFam = NULL; 84 85 // Alloc version struct. 86 pFam = portMemAllocNonPaged(sizeof(*pFam)); 87 NV_ASSERT_OR_RETURN((pFam != NULL), NV_ERR_NO_MEMORY); 88 portMemSet(pFam, 0, sizeof(*pFam)); 89 pKernelGmmu->pFmtFamilies[v] = pFam; 90 91 // Init PDE/PTE formats. 92 kgmmuFmtInitPdeMulti_HAL(pKernelGmmu, &pFam->pdeMulti, ver, pKernelGmmu->pdeApertures); 93 kgmmuFmtInitPde_HAL(pKernelGmmu, &pFam->pde, ver, pKernelGmmu->pdeApertures); 94 kgmmuFmtInitPte_HAL(pKernelGmmu, &pFam->pte, ver, pKernelGmmu->pteApertures, 95 gpuIsUnifiedMemorySpaceEnabled(pGpu)); 96 97 kgmmuFmtInitPteComptagLine_HAL(pKernelGmmu, &pFam->pte, ver); 98 } 99 else 100 { 101 pKernelGmmu->pFmtFamilies[v] = NULL; 102 } 103 } 104 105 NV_ASSERT_OK_OR_RETURN(kgmmuFmtInit(pKernelGmmu)); 106 107 portMemSet(&pKernelGmmu->mmuFaultBuffer, 0, sizeof(pKernelGmmu->mmuFaultBuffer)); 108 109 // Default placement for PDEs is in vidmem. 110 pKernelGmmu->PDEAperture = ADDR_FBMEM; 111 pKernelGmmu->PDEAttr = NV_MEMORY_WRITECOMBINED; 112 pKernelGmmu->PDEBAR1Aperture = ADDR_FBMEM; 113 pKernelGmmu->PDEBAR1Attr = NV_MEMORY_WRITECOMBINED; 114 115 // Default placement for PTEs is in vidmem. 116 pKernelGmmu->PTEAperture = ADDR_FBMEM; 117 pKernelGmmu->PTEAttr = NV_MEMORY_WRITECOMBINED; 118 pKernelGmmu->PTEBAR1Aperture = ADDR_FBMEM; 119 pKernelGmmu->PTEBAR1Attr = NV_MEMORY_WRITECOMBINED; 120 121 _kgmmuInitRegistryOverrides(pGpu, pKernelGmmu); 122 123 return NV_OK; 124 } 125 126 static NV_STATUS 127 _kgmmuInitStaticInfo 128 ( 129 OBJGPU *pGpu, 130 KernelGmmu *pKernelGmmu 131 ) 132 { 133 NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo; 134 NV_STATUS status; 135 136 // 137 // On vGPU, all hardware management is done by the host except for full SR-IOV. 138 // Thus, only do any further HW initialization on the host. 139 // 140 if (!(IS_VIRTUAL_WITHOUT_SRIOV(pGpu) || 141 (IS_VIRTUAL_WITH_SRIOV(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))) 142 { 143 // Init HAL specific features. 144 NV_ASSERT_OK_OR_RETURN(kgmmuFmtFamiliesInit_HAL(pGpu, pKernelGmmu)); 145 } 146 147 pStaticInfo = portMemAllocNonPaged(sizeof(*pStaticInfo)); 148 NV_CHECK_OR_RETURN(LEVEL_ERROR, pStaticInfo != NULL, NV_ERR_INSUFFICIENT_RESOURCES); 149 portMemSet(pStaticInfo, 0, sizeof(*pStaticInfo)); 150 151 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, 152 kgmmuInitStaticInfo_HAL(pGpu, pKernelGmmu, pStaticInfo), 153 fail); 154 155 pKernelGmmu->pStaticInfo = pStaticInfo; 156 157 fail: 158 if (status != NV_OK) 159 { 160 portMemFree(pStaticInfo); 161 } 162 163 return status; 164 } 165 166 /* 167 * Initialize the Kernel GMMU state. 168 * 169 * @param pGpu 170 * @param pKernelGmmu 171 */ 172 NV_STATUS kgmmuStateInitLocked_IMPL 173 ( 174 OBJGPU *pGpu, 175 KernelGmmu *pKernelGmmu 176 ) 177 { 178 KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu); 179 NV_STATUS status; 180 181 if (pKernelBif != NULL) 182 { 183 // This value shouldn't change after initialization, so cache it now 184 pKernelGmmu->sysmemBaseAddress = pKernelBif->dmaWindowStartAddress; 185 } 186 187 status = _kgmmuInitStaticInfo(pGpu, pKernelGmmu); 188 if (status != NV_OK) 189 { 190 return status; 191 } 192 193 return status; 194 } 195 196 static NV_STATUS 197 _kgmmuCreateGlobalVASpace 198 ( 199 OBJGPU *pGpu, 200 KernelGmmu *pKernelGmmu, 201 NvU32 flags 202 ) 203 { 204 NvU32 constructFlags = VASPACE_FLAGS_NONE; 205 OBJVASPACE *pGlobalVAS = NULL; 206 NV_STATUS rmStatus; 207 OBJGPUGRP *pGpuGrp = NULL; 208 209 // Bail out early on sleep/suspend cases 210 if (flags & GPU_STATE_FLAGS_PRESERVING) 211 return NV_OK; 212 if (!gpumgrIsParentGPU(pGpu)) 213 return NV_OK; 214 215 // 216 // We create the device vaspace at this point. Assemble the flags needed 217 // for construction. 218 // 219 220 // Allow PTE in SYS 221 constructFlags |= VASPACE_FLAGS_RETRY_PTE_ALLOC_IN_SYS; 222 223 constructFlags |= VASPACE_FLAGS_DEFAULT_PARAMS; 224 constructFlags |= VASPACE_FLAGS_DEFAULT_SIZE; 225 constructFlags |= DRF_DEF(_VASPACE, _FLAGS, _BIG_PAGE_SIZE, _DEFAULT); 226 227 pGpuGrp = gpumgrGetGpuGrpFromGpu(pGpu); 228 NV_ASSERT_OR_RETURN(pGpuGrp != NULL, NV_ERR_INVALID_DATA); 229 230 rmStatus = gpugrpCreateGlobalVASpace(pGpuGrp, pGpu, 231 FERMI_VASPACE_A, 232 0, 0, 233 constructFlags, 234 &pGlobalVAS); 235 NV_ASSERT_OR_RETURN((NV_OK == rmStatus), rmStatus); 236 237 return NV_OK; 238 } 239 240 static NV_STATUS 241 _kgmmuDestroyGlobalVASpace 242 ( 243 OBJGPU *pGpu, 244 KernelGmmu *pKernelGmmu, 245 NvU32 flags 246 ) 247 { 248 OBJGPUGRP *pGpuGrp = NULL; 249 250 if (flags & GPU_STATE_FLAGS_PRESERVING) 251 return NV_OK; 252 253 pGpuGrp = gpumgrGetGpuGrpFromGpu(pGpu); 254 return gpugrpDestroyGlobalVASpace(pGpuGrp, pGpu); 255 } 256 257 /* 258 * Helper function to enable ComputePeerMode 259 */ 260 NV_STATUS 261 kgmmuEnableComputePeerAddressing_IMPL 262 ( 263 OBJGPU *pGpu, 264 KernelGmmu *pKernelGmmu, 265 NvU32 flags 266 ) 267 { 268 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 269 OBJSYS *pSys = SYS_GET_INSTANCE(); 270 NV_STATUS status = NV_OK; 271 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 272 NvBool bComputePeerMode = NV_FALSE; 273 274 if (pSys->getProperty(pSys, PDB_PROP_SYS_NVSWITCH_IS_PRESENT) || 275 kbusIsFlaSupported(pKernelBus)) 276 { 277 bComputePeerMode = NV_TRUE; 278 } 279 280 if (bComputePeerMode) 281 { 282 status = kgmmuEnableNvlinkComputePeerAddressing_HAL(pKernelGmmu); 283 if (status != NV_OK) 284 { 285 NV_PRINTF(LEVEL_ERROR, 286 "Failed to enable GMMU property compute addressing for GPU %x , status:%x\n", 287 pGpu->gpuInstance, status); 288 return status; 289 } 290 291 status = pRmApi->Control(pRmApi, 292 pGpu->hInternalClient, 293 pGpu->hInternalSubdevice, 294 NV2080_CTRL_CMD_INTERNAL_NVLINK_ENABLE_COMPUTE_PEER_ADDR, 295 NULL, 0); 296 } 297 return status; 298 } 299 300 /* 301 * State Post Load 302 */ 303 NV_STATUS kgmmuStatePostLoad_IMPL 304 ( 305 OBJGPU *pGpu, 306 KernelGmmu *pKernelGmmu, 307 NvU32 flags 308 ) 309 { 310 NV_STATUS status = NV_OK; 311 312 status = _kgmmuCreateGlobalVASpace(pGpu, pKernelGmmu, flags); 313 314 if (status != NV_OK) 315 { 316 NV_PRINTF(LEVEL_ERROR, 317 "Failed to create GVASpace, status:%x\n", 318 status); 319 return status; 320 } 321 322 status = kgmmuEnableComputePeerAddressing(pGpu, pKernelGmmu, flags); 323 324 if (status != NV_OK) 325 { 326 NV_PRINTF(LEVEL_ERROR, 327 "Failed to enable compute peer addressing, status:%x\n", 328 status); 329 return status; 330 } 331 332 return status; 333 } 334 335 /* 336 * State Pre Unload 337 */ 338 NV_STATUS 339 kgmmuStatePreUnload_IMPL 340 ( 341 OBJGPU *pGpu, 342 KernelGmmu *pKernelGmmu, 343 NvU32 flags 344 ) 345 { 346 NV_STATUS status = NV_OK; 347 348 status = _kgmmuDestroyGlobalVASpace(pGpu, pKernelGmmu, flags); 349 350 if (status != NV_OK) 351 { 352 NV_PRINTF(LEVEL_ERROR, 353 "Failed to destory GVASpace, status:%x\n", 354 status); 355 return status; 356 } 357 return status; 358 } 359 360 /*! 361 * KernelGmmu destructor 362 * 363 * @param[in] pKernelGmmu KernelGmmu object pointer 364 */ 365 void 366 kgmmuDestruct_IMPL(KernelGmmu *pKernelGmmu) 367 { 368 NvU32 v; 369 NvU32 b; 370 371 // Free per big page size format and format-family storage. 372 for (v = 0; v < GMMU_FMT_MAX_VERSION_COUNT; ++v) 373 { 374 if (NULL != pKernelGmmu->pFmtFamilies[v]) 375 { 376 for (b = 0; b < GMMU_FMT_MAX_BIG_PAGE_SIZES; ++b) 377 { 378 portMemFree(pKernelGmmu->pFmtFamilies[v]->pFmts[b]); 379 pKernelGmmu->pFmtFamilies[v]->pFmts[b] = NULL; 380 } 381 portMemFree(pKernelGmmu->pFmtFamilies[v]); 382 } 383 } 384 } 385 386 void 387 kgmmuStateDestroy_IMPL(OBJGPU *pGpu, KernelGmmu *pKernelGmmu) 388 { 389 if (NULL != pKernelGmmu->pStaticInfo) 390 { 391 portMemFree((void *)pKernelGmmu->pStaticInfo); 392 pKernelGmmu->pStaticInfo = NULL; 393 } 394 if (NULL != pKernelGmmu->pWarSmallPageTable) 395 { 396 memdescFree(pKernelGmmu->pWarSmallPageTable); 397 memdescDestroy(pKernelGmmu->pWarSmallPageTable); 398 pKernelGmmu->pWarSmallPageTable = NULL; 399 } 400 if (NULL != pKernelGmmu->pWarPageDirectory0) 401 { 402 memdescFree(pKernelGmmu->pWarPageDirectory0); 403 memdescDestroy(pKernelGmmu->pWarPageDirectory0); 404 pKernelGmmu->pWarPageDirectory0 = NULL; 405 } 406 } 407 408 /*! 409 * Initializes KERN_GMMU state based on registry key overrides 410 * 411 * @param[in] pGpu 412 * @param[in] pKernelGmmu 413 */ 414 static void 415 _kgmmuInitRegistryOverrides(OBJGPU *pGpu, KernelGmmu *pKernelGmmu) 416 { 417 NvU32 data; 418 419 memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC, _PDE, pGpu->instLocOverrides), 420 "GMMU PDE", 421 &pKernelGmmu->PDEAperture, 422 &pKernelGmmu->PDEAttr); 423 memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC, _BAR_PDE, pGpu->instLocOverrides), 424 "BAR1 PDE", 425 &pKernelGmmu->PDEBAR1Aperture, 426 &pKernelGmmu->PDEBAR1Attr); 427 memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC, _PTE, pGpu->instLocOverrides), 428 "GMMU PTE", 429 &pKernelGmmu->PTEAperture, 430 &pKernelGmmu->PTEAttr); 431 memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC, _BAR_PTE, pGpu->instLocOverrides), 432 "BAR1 PTE", 433 &pKernelGmmu->PTEBAR1Aperture, 434 &pKernelGmmu->PTEBAR1Attr); 435 436 // 437 // Check if we want to disable big page size per address space 438 // 439 pKernelGmmu->bEnablePerVaspaceBigPage = IsGM20X(pGpu); 440 if (NV_OK == osReadRegistryDword(pGpu, 441 NV_REG_STR_RM_DISABLE_BIG_PAGE_PER_ADDRESS_SPACE, &data)) 442 { 443 pKernelGmmu->bEnablePerVaspaceBigPage = !data; 444 } 445 446 if (NV_OK == osReadRegistryDword(pGpu, 447 NV_REG_STR_FERMI_BIG_PAGE_SIZE, &data)) 448 { 449 if (pGpu->optimizeUseCaseOverride != 450 NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX_DEFAULT) 451 { 452 NV_PRINTF(LEVEL_ERROR, 453 "The %s regkey cannot be used with the %s regkey!\n", 454 NV_REG_STR_FERMI_BIG_PAGE_SIZE, 455 NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX); 456 return; 457 } 458 else 459 { 460 switch (data) 461 { 462 case NV_REG_STR_FERMI_BIG_PAGE_SIZE_64KB: 463 case NV_REG_STR_FERMI_BIG_PAGE_SIZE_128KB: 464 pKernelGmmu->overrideBigPageSize = data; 465 break; 466 default: 467 break; 468 } 469 } 470 } 471 else if (pGpu->optimizeUseCaseOverride != 472 NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX_DEFAULT) 473 { 474 switch (pGpu->optimizeUseCaseOverride) 475 { 476 case NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX_SPARSE_TEX: 477 pKernelGmmu->overrideBigPageSize = RM_PAGE_SIZE_64K; 478 break; 479 case NV_REG_STR_RM_OPTIMIZE_COMPUTE_OR_SPARSE_TEX_COMPUTE: 480 pKernelGmmu->overrideBigPageSize = RM_PAGE_SIZE_128K; 481 break; 482 default: 483 break; 484 } 485 } 486 487 // Check if HW fault buffer is disabled 488 if (NV_OK == osReadRegistryDword(pGpu, 489 NV_REG_STR_RM_DISABLE_HW_FAULT_BUFFER, &data)) 490 { 491 NV_PRINTF(LEVEL_ERROR, 492 "Overriding HW Fault buffer state to 0x%x due to regkey!\n", 493 data); 494 pKernelGmmu->setProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED, data); 495 } 496 497 } 498 499 GMMU_APERTURE 500 kgmmuGetMemAperture_IMPL 501 ( 502 KernelGmmu *pKernelGmmu, 503 MEMORY_DESCRIPTOR *pMemDesc 504 ) 505 { 506 switch (memdescGetAddressSpace(pMemDesc)) 507 { 508 case ADDR_FBMEM: 509 return GMMU_APERTURE_VIDEO; 510 case ADDR_SYSMEM: 511 if (NV_MEMORY_CACHED == memdescGetCpuCacheAttrib(pMemDesc)) 512 { 513 return GMMU_APERTURE_SYS_COH; 514 } 515 return GMMU_APERTURE_SYS_NONCOH; 516 default: 517 NV_ASSERT(0); 518 return GMMU_APERTURE_INVALID; 519 } 520 } 521 522 /*! 523 * Initialize GMMU format structures dependent on big page size. 524 */ 525 NV_STATUS 526 kgmmuFmtInit_IMPL(KernelGmmu *pKernelGmmu) 527 { 528 NvU32 v; 529 NvU32 b; 530 531 // Allocate and init MMU formats for the supported big page sizes. 532 for (v = 0; v < GMMU_FMT_MAX_VERSION_COUNT; ++v) 533 { 534 const NvU32 ver = g_gmmuFmtVersions[v]; 535 GMMU_FMT_FAMILY *pFam = pKernelGmmu->pFmtFamilies[v]; 536 if (NULL != pFam) 537 { 538 for (b = 0; b < GMMU_FMT_MAX_BIG_PAGE_SIZES; ++b) 539 { 540 const NvU32 bigPageShift = g_gmmuFmtBigPageShifts[b]; 541 542 // Allocate +1 level for the last dual-level. 543 const NvU32 numLevels = GMMU_FMT_MAX_LEVELS + 1; 544 const NvU32 size = sizeof(GMMU_FMT) + sizeof(MMU_FMT_LEVEL) * numLevels; 545 MMU_FMT_LEVEL *pLvls; 546 547 // Allocate format and levels in one chunk. 548 pFam->pFmts[b] = portMemAllocNonPaged(size); 549 NV_ASSERT_OR_RETURN((pFam->pFmts[b] != NULL), NV_ERR_NO_MEMORY); 550 portMemSet(pFam->pFmts[b], 0, size); 551 552 // Levels stored contiguously after the format struct. 553 pLvls = (MMU_FMT_LEVEL *)(pFam->pFmts[b] + 1); 554 555 // Common init. 556 pFam->pFmts[b]->version = ver; 557 pFam->pFmts[b]->pRoot = pLvls; 558 pFam->pFmts[b]->pPdeMulti = &pFam->pdeMulti; 559 pFam->pFmts[b]->pPde = &pFam->pde; 560 pFam->pFmts[b]->pPte = &pFam->pte; 561 562 kgmmuFmtInitLevels_HAL(pKernelGmmu, pLvls, numLevels, ver, bigPageShift); 563 kgmmuFmtInitCaps_HAL(pKernelGmmu, pFam->pFmts[b]); 564 } 565 } 566 } 567 568 return NV_OK; 569 } 570 571 /*! 572 * Retrieve GMMU format family based on version. 573 */ 574 const GMMU_FMT_FAMILY * 575 kgmmuFmtGetFamily_IMPL(KernelGmmu *pKernelGmmu, NvU32 version) 576 { 577 NvU32 v; 578 579 // Find a matching format. 580 for (v = GMMU_FMT_MAX_VERSION_COUNT; v > 0; --v) 581 { 582 if (0 == version) 583 { 584 // Pick newest default version if none requested. 585 if (NULL != pKernelGmmu->pFmtFamilies[v - 1]) 586 { 587 return pKernelGmmu->pFmtFamilies[v - 1]; 588 } 589 } 590 else if (g_gmmuFmtVersions[v - 1] == version) 591 { 592 return pKernelGmmu->pFmtFamilies[v - 1]; 593 } 594 } 595 596 return NULL; 597 } 598 599 /*! 600 * Returns GMMU settings that are static after GPU state init/load is 601 * finished. 602 */ 603 const NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS * 604 kgmmuGetStaticInfo_IMPL 605 ( 606 OBJGPU *pGpu, 607 KernelGmmu *pKernelGmmu 608 ) 609 { 610 // check if state Init has not completed. 611 NV_ASSERT_OR_ELSE(pKernelGmmu != NULL, return NULL); 612 613 return pKernelGmmu->pStaticInfo; 614 } 615 616 /*! 617 * @brief Initializes static info data from the Physical side. 618 * 619 * @param pGpu 620 * @param pKernelGmmu 621 * @param[out] pStaticInfo pointer to the static info init on Physical driver. 622 */ 623 NV_STATUS 624 kgmmuInitStaticInfo_KERNEL 625 ( 626 OBJGPU *pGpu, 627 KernelGmmu *pKernelGmmu, 628 NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo 629 ) 630 { 631 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 632 NV_STATUS status; 633 634 status = pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice, 635 NV2080_CTRL_CMD_INTERNAL_GMMU_GET_STATIC_INFO, 636 pStaticInfo, sizeof(*pStaticInfo)); 637 638 return status; 639 } 640 641 /*! 642 * Retrieve GMMU format based on version and big page size. 643 */ 644 const GMMU_FMT * 645 kgmmuFmtGet_IMPL(KernelGmmu *pKernelGmmu, NvU32 version, NvU64 bigPageSize) 646 { 647 const GMMU_FMT_FAMILY *pFmtFamily = kgmmuFmtGetFamily(pKernelGmmu, version); 648 649 if (NULL != pFmtFamily) 650 { 651 NvU32 b; 652 653 // Pick default big page size if none requested. 654 if (0 == bigPageSize) 655 { 656 // 657 // Retrieve Big Page Size. If it is not yet set, set it to 64K. 658 // Useful when this method is invoked before big page size is set. 659 // 660 if (0 == (bigPageSize = kgmmuGetBigPageSize_HAL(pKernelGmmu))) 661 bigPageSize = NVBIT64(16); 662 } 663 664 // Find a matching format. 665 for (b = 0; b < GMMU_FMT_MAX_BIG_PAGE_SIZES; ++b) 666 { 667 if (NVBIT64(g_gmmuFmtBigPageShifts[b]) == bigPageSize) 668 { 669 return pFmtFamily->pFmts[b]; 670 } 671 } 672 } 673 674 return NULL; 675 } 676 677 /*! 678 * Check if a big page size is supported. 679 */ 680 NvBool 681 kgmmuFmtIsBigPageSizeSupported_IMPL(KernelGmmu *pKernelGmmu, NvU64 bigPageSize) 682 { 683 if (kgmmuIsPerVaspaceBigPageEn(pKernelGmmu)) 684 { 685 return NV_TRUE; 686 } 687 return kgmmuGetBigPageSize_HAL(pKernelGmmu) == bigPageSize; 688 } 689 690 /*! 691 * @bried Returns the latest supported MMU fmt. 692 * 693 * @param[in] pGpu OBJGPU pointer 694 * @param[in] pKernelGmmu KernelGmmu pointer 695 * 696 * @returns const GMMU_FMT* 697 */ 698 const GMMU_FMT* 699 kgmmuFmtGetLatestSupportedFormat_IMPL(OBJGPU *pGpu, KernelGmmu *pKernelGmmu) 700 { 701 NvU32 v; 702 NvU32 maxFmtVersionSupported = 0; 703 704 for (v = 0; v < GMMU_FMT_MAX_VERSION_COUNT; ++v) 705 { 706 const NvU32 ver = g_gmmuFmtVersions[v]; 707 if (kgmmuFmtIsVersionSupported_HAL(pKernelGmmu, ver)) 708 { 709 maxFmtVersionSupported = maxFmtVersionSupported < ver ? ver : maxFmtVersionSupported; 710 } 711 } 712 713 return kgmmuFmtGet(pKernelGmmu, maxFmtVersionSupported, 0); 714 } 715 716 /*! 717 * @brief Used for calculating total memory required for page tables 718 required for translating a given VA range. 719 * 720 * @param pGpu 721 * @param pKernelGmmu 722 * @param[in] pFmt Pointer to GMMU format 723 * @param[in] vaBase Start VA 724 * @param[in] vaLimit End VA 725 * @param[in] pageSizeLockMask Mask of page sizes locked down at VA reservation 726 * 727 * @returns total size of page tables. 728 */ 729 NvU64 730 kgmmuGetSizeOfPageTables_IMPL 731 ( 732 OBJGPU *pGpu, 733 KernelGmmu *pKernelGmmu, 734 const GMMU_FMT *pFmt, 735 NvU64 vaBase, 736 NvU64 vaLimit, 737 NvU64 pageSizeLockMask 738 ) 739 { 740 const MMU_FMT_LEVEL *pPgTbl = NULL; 741 NvU64 pgTblSize = 0; 742 NvU64 numPgTblsCeil; 743 NvU64 numPgTblsFloor; 744 NvU64 numEntries; 745 NvU32 pageShift; 746 747 // Loop over all page table sizes in mask 748 FOR_EACH_INDEX_IN_MASK(64, pageShift, pageSizeLockMask) 749 { 750 pPgTbl = mmuFmtFindLevelWithPageShift(pFmt->pRoot, pageShift); 751 752 // 753 // Do not consider page directories. They are handled by 754 // @ref kgmmuGetSizeOfPageDirs. 755 // 756 if (!pPgTbl->bPageTable || (pPgTbl->numSubLevels != 0)) 757 { 758 continue; 759 } 760 761 numPgTblsCeil = NV_DIV_AND_CEIL(vaLimit, NVBIT64(pPgTbl->virtAddrBitHi + 1)) - 762 (vaBase / NVBIT64(pPgTbl->virtAddrBitHi + 1)) + 1; 763 numPgTblsFloor = vaLimit / NVBIT64(pPgTbl->virtAddrBitHi + 1); 764 765 // If full page tables are not used, allocate only as much as needed. 766 if (numPgTblsFloor == 0) 767 { 768 numEntries = mmuFmtVirtAddrToEntryIndex(pPgTbl, vaLimit) - 769 mmuFmtVirtAddrToEntryIndex(pPgTbl, vaBase) + 1; 770 pgTblSize += numEntries * pPgTbl->entrySize; 771 } 772 else 773 { 774 pgTblSize += numPgTblsCeil * mmuFmtLevelSize(pPgTbl); 775 } 776 } 777 FOR_EACH_INDEX_IN_MASK_END 778 779 return pgTblSize; 780 } 781 782 /*! 783 * @brief Used for calculating total memory required for page directories 784 required for translating a given VA range. 785 * 786 * @param pGpu 787 * @param pKernelGmmu 788 * @param[in] pFmt Pointer to GMMU format 789 * @param[in] vaBase Start VA 790 * @param[in] vaLimit End VA 791 * 792 * @returns total size of page directories 793 */ 794 NvU64 795 kgmmuGetSizeOfPageDirs_IMPL 796 ( 797 OBJGPU *pGpu, 798 KernelGmmu *pKernelGmmu, 799 const GMMU_FMT *pFmt, 800 NvU64 vaBase, 801 NvU64 vaLimit, 802 NvU64 pageSizeLockMask 803 ) 804 { 805 const MMU_FMT_LEVEL *pLevel = NULL; 806 NvU64 size = 0; 807 NvU16 i; 808 809 NV_ASSERT_OR_RETURN(pFmt != NULL, 0); 810 811 pLevel = pFmt->pRoot; 812 813 // 814 // Retain only the lowest set bit 815 // 816 // If the lowest set bit corresponds to a leaf page table (4K or 64K), we"ll 817 // calculate memory for all upper level page directories and if the set bit 818 // corresponds to an upper level page directory we"ll factor in all levels 819 // from the root upto that level. 820 // 821 pageSizeLockMask = pageSizeLockMask & -((NvS64)pageSizeLockMask); 822 823 // Accumulate size for all Page Directories. 824 for (i = 0; i < GMMU_FMT_MAX_LEVELS - 1; i++) 825 { 826 NvU64 vaPerEntry = mmuFmtEntryVirtAddrMask(pLevel) + 1; 827 NvU64 numEntries = NV_DIV_AND_CEIL(vaLimit, vaPerEntry) - 828 (vaBase / vaPerEntry) + 1; 829 NvU64 levelSize = numEntries * pLevel->entrySize; 830 levelSize = NV_ROUNDUP(levelSize, RM_PAGE_SIZE); 831 832 // Stop accumulating size once we are beyond the specified level. 833 if (mmuFmtLevelPageSize(pLevel) < pageSizeLockMask) 834 { 835 break; 836 } 837 838 size += levelSize; 839 840 // If there's one sublevel choose that. 841 if (pLevel->numSubLevels == 1) 842 { 843 pLevel = &(pLevel->subLevels[0]); 844 } 845 else 846 { 847 // Choose the 4K page size sublevel. 848 pLevel = &(pLevel->subLevels[1]); 849 } 850 NV_ASSERT_OR_RETURN(pLevel != NULL, 0); 851 852 // Stop accumulating size if we've exhausted all Page Dirs. 853 if (pLevel->bPageTable && (pLevel->numSubLevels == 0)) 854 { 855 break; 856 } 857 } 858 859 return size; 860 } 861 862 /* 863 * Fill comptag field in PTE. 864 */ 865 void kgmmuFieldSetKindCompTags_IMPL 866 ( 867 KernelGmmu *pGmmu, 868 const GMMU_FMT *pFmt, 869 const MMU_FMT_LEVEL *pLevel, 870 const COMPR_INFO *pCompr, 871 NvU64 physAddr, 872 NvU64 surfOffset, 873 NvU32 pteIndex, 874 NvU8 *pEntries 875 ) 876 { 877 OBJGPU *pGpu = ENG_GET_GPU(pGmmu); 878 GMMU_COMPR_INFO comprInfo = {0}; 879 880 comprInfo.compressedKind = pCompr->kind; 881 comprInfo.compPageShift = pCompr->compPageShift; 882 883 if (memmgrIsKind_HAL(GPU_GET_MEMORY_MANAGER(pGpu), FB_IS_KIND_COMPRESSIBLE, pCompr->kind)) 884 { 885 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig = 886 kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu)); 887 888 if (pCompr->bPhysBasedComptags) 889 { 890 NvBool bCallingContextPlugin; 891 892 NV_ASSERT(pMemorySystemConfig->bOneToOneComptagLineAllocation || pMemorySystemConfig->bUseRawModeComptaglineAllocation); 893 894 NV_ASSERT_OR_RETURN_VOID(vgpuIsCallingContextPlugin(pGpu, &bCallingContextPlugin) == NV_OK); 895 if (IS_VIRTUAL_WITH_SRIOV(pGpu) || bCallingContextPlugin || 896 pMemorySystemConfig->bUseRawModeComptaglineAllocation) 897 { 898 // In raw mode or when SR-IOV is enabled, HW handles compression tags 899 comprInfo.compTagLineMin = 1; 900 } 901 else 902 { 903 comprInfo.compTagLineMin = memmgrDetermineComptag_HAL(pGpu, GPU_GET_MEMORY_MANAGER(pGpu), physAddr); 904 } 905 906 comprInfo.compPageIndexLo = surfOffset >> pCompr->compPageShift; 907 comprInfo.compPageIndexHi = (surfOffset + mmuFmtLevelPageSize(pLevel) - 1) >> pCompr->compPageShift; 908 comprInfo.compTagLineMultiplier = 1; 909 } 910 else 911 { 912 comprInfo.compPageIndexLo = pCompr->compPageIndexLo; 913 comprInfo.compPageIndexHi = pCompr->compPageIndexHi; 914 comprInfo.compTagLineMin = pCompr->compTagLineMin; 915 comprInfo.compTagLineMultiplier = pCompr->compTagLineMultiplier; 916 } 917 } 918 919 gmmuFmtInitPteCompTags(pFmt, pLevel, &comprInfo, surfOffset, pteIndex, 1, pEntries); 920 } 921 922 NV_STATUS 923 kgmmuFaultBufferGetAddressSpace_IMPL 924 ( 925 OBJGPU *pGpu, 926 KernelGmmu *pKernelGmmu, 927 NvU32 index, 928 NvU32 *pFaultBufferAddrSpace, 929 NvU32 *pFaultBufferAttr 930 ) 931 { 932 NvU32 faultBufferAddrSpace = ADDR_UNKNOWN; 933 NvU32 faultBufferAttr = 0; 934 NvBool bAllocInVidmem = NV_FALSE; 935 936 bAllocInVidmem = gpuIsCCFeatureEnabled(pGpu); 937 938 NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT); 939 940 if (index == NON_REPLAYABLE_FAULT_BUFFER) 941 { 942 faultBufferAddrSpace = bAllocInVidmem ? ADDR_FBMEM : ADDR_SYSMEM; 943 faultBufferAttr = bAllocInVidmem ? NV_MEMORY_UNCACHED : NV_MEMORY_CACHED; 944 memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC_3, _UVM_FAULT_BUFFER_NONREPLAYABLE, pGpu->instLocOverrides3), 945 "UVM non-replayable fault", &faultBufferAddrSpace, &faultBufferAttr); 946 } 947 else if (index == REPLAYABLE_FAULT_BUFFER) 948 { 949 faultBufferAddrSpace = bAllocInVidmem ? ADDR_FBMEM : ADDR_SYSMEM; 950 faultBufferAttr = bAllocInVidmem ? NV_MEMORY_UNCACHED : NV_MEMORY_CACHED; 951 memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC_4, _UVM_FAULT_BUFFER_REPLAYABLE, pGpu->instLocOverrides4), 952 "UVM replayable fault", &faultBufferAddrSpace, &faultBufferAttr); 953 } 954 // 955 // Whenever Hopper CC is enabled, HW requires both replayable and non-replayable 956 // fault buffers to be in CPR vidmem. It would be illegal to allocate the buffers 957 // in any other aperture 958 // 959 if (bAllocInVidmem && (faultBufferAddrSpace == ADDR_SYSMEM)) 960 { 961 NV_PRINTF(LEVEL_ERROR, "Fault buffers must be in CPR vidmem when HCC is enabled\n"); 962 NV_ASSERT(0); 963 return NV_ERR_INVALID_ARGUMENT; 964 } 965 966 if (pFaultBufferAddrSpace != NULL) 967 { 968 *pFaultBufferAddrSpace = faultBufferAddrSpace; 969 } 970 971 if (pFaultBufferAttr != NULL) 972 { 973 *pFaultBufferAttr = faultBufferAttr; 974 } 975 976 return NV_OK; 977 } 978 979 NV_STATUS 980 kgmmuFaultBufferCreateMemDesc_IMPL 981 ( 982 OBJGPU *pGpu, 983 KernelGmmu *pKernelGmmu, 984 NvU32 index, 985 NvU32 faultBufferSize, 986 NvU64 memDescFlags, 987 MEMORY_DESCRIPTOR **ppMemDesc 988 ) 989 { 990 NV_STATUS status; 991 MEMORY_DESCRIPTOR *pMemDesc = NULL; 992 NvU32 faultBufferAddrSpace = ADDR_UNKNOWN; 993 NvU32 faultBufferAttr = 0; 994 NvBool isContiguous = NV_FALSE; 995 996 NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT); 997 998 status = kgmmuFaultBufferGetAddressSpace(pGpu, pKernelGmmu, index, 999 &faultBufferAddrSpace, &faultBufferAttr); 1000 if (status != NV_OK) 1001 { 1002 return status; 1003 } 1004 1005 if ((IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)) 1006 || gpuIsCCFeatureEnabled(pGpu) 1007 ) 1008 { 1009 // Allocate contiguous fault buffers for SR-IOV Heavy 1010 // Fault buffers get allocated in CPR vidmem when Hopper CC is enabled 1011 // We're almost assured to get contiguous allocations in vidmem 1012 isContiguous = NV_TRUE; 1013 } 1014 1015 status = memdescCreate(&pMemDesc, pGpu, 1016 RM_PAGE_ALIGN_UP(faultBufferSize), 0, isContiguous, 1017 faultBufferAddrSpace, faultBufferAttr, 1018 (memDescFlags | MEMDESC_FLAGS_LOST_ON_SUSPEND)); 1019 if (status != NV_OK) 1020 { 1021 return status; 1022 } 1023 1024 // 1025 // GPU doesn't read faultbuffer memory, so if faultBuffers are in sysmem, ensure that GpuCacheAttr 1026 // is set to UNCACHED as having a vol bit set in PTEs will ensure HUB uses L2Bypass mode and it will 1027 // save extra cycles to cache in L2 while MMU will write fault packets. 1028 // 1029 if (faultBufferAddrSpace == ADDR_SYSMEM && 1030 pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_SYSMEM_FAULT_BUFFER_GPU_UNCACHED)) 1031 { 1032 memdescSetGpuCacheAttrib(pMemDesc, NV_MEMORY_UNCACHED); 1033 } 1034 1035 memdescSetPageSize(pMemDesc, AT_GPU, RM_PAGE_SIZE); 1036 1037 *ppMemDesc = pMemDesc; 1038 1039 return NV_OK; 1040 } 1041 1042 NV_STATUS 1043 kgmmuFaultBufferUnregister_IMPL 1044 ( 1045 OBJGPU *pGpu, 1046 KernelGmmu *pKernelGmmu, 1047 NvU32 index 1048 ) 1049 { 1050 struct HW_FAULT_BUFFER *pFaultBuffer; 1051 MEMORY_DESCRIPTOR *pMemDesc; 1052 1053 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[index]; 1054 pMemDesc = pFaultBuffer->pFaultBufferMemDesc; 1055 1056 pFaultBuffer->faultBufferSize = 0; 1057 pFaultBuffer->pFaultBufferMemDesc = NULL; 1058 1059 memdescDestroy(pMemDesc); 1060 1061 return NV_OK; 1062 } 1063 1064 NV_STATUS 1065 kgmmuFaultBufferAlloc_IMPL 1066 ( 1067 OBJGPU *pGpu, 1068 KernelGmmu *pKernelGmmu, 1069 NvU32 index, 1070 NvU32 faultBufferSize 1071 ) 1072 { 1073 NV_STATUS status; 1074 MEMORY_DESCRIPTOR *pMemDesc = NULL; 1075 struct HW_FAULT_BUFFER *pFaultBuffer; 1076 const char *name = (index == REPLAYABLE_FAULT_BUFFER ? NV_RM_SURF_NAME_REPLAYABLE_FAULT_BUFFER : NV_RM_SURF_NAME_NONREPLAYABLE_FAULT_BUFFER); 1077 1078 NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT); 1079 1080 if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED)) 1081 return NV_OK; 1082 1083 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[index]; 1084 1085 status = kgmmuFaultBufferCreateMemDesc(pGpu, pKernelGmmu, index, faultBufferSize, 1086 MEMDESC_FLAGS_NONE, &pMemDesc); 1087 if (status != NV_OK) 1088 { 1089 return status; 1090 } 1091 1092 status = memdescAlloc(pMemDesc); 1093 if (status != NV_OK) 1094 { 1095 memdescDestroy(pMemDesc); 1096 return status; 1097 } 1098 1099 memdescSetName(pGpu, pMemDesc, name, NULL); 1100 1101 pFaultBuffer->faultBufferSize = faultBufferSize; 1102 pFaultBuffer->pFaultBufferMemDesc = pMemDesc; 1103 1104 return status; 1105 } 1106 1107 NV_STATUS 1108 kgmmuFaultBufferFree_IMPL 1109 ( 1110 OBJGPU *pGpu, 1111 KernelGmmu *pKernelGmmu, 1112 NvU32 index 1113 ) 1114 { 1115 struct HW_FAULT_BUFFER *pFaultBuffer; 1116 1117 NV_ASSERT_OR_RETURN((index < NUM_FAULT_BUFFERS), NV_ERR_INVALID_ARGUMENT); 1118 1119 if (pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED)) 1120 return NV_OK; 1121 1122 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[index]; 1123 1124 memdescFree(pFaultBuffer->pFaultBufferMemDesc); 1125 1126 kgmmuFaultBufferUnregister(pGpu, pKernelGmmu, index); 1127 1128 return NV_OK; 1129 } 1130 1131 NV_STATUS 1132 kgmmuFaultBufferReplayableAllocate_IMPL 1133 ( 1134 OBJGPU *pGpu, 1135 KernelGmmu *pKernelGmmu, 1136 NvHandle hClient, 1137 NvHandle hObject 1138 ) 1139 { 1140 NV_STATUS status; 1141 struct HW_FAULT_BUFFER *pFaultBuffer; 1142 NvU32 faultBufferSize; 1143 NvU32 numBufferPages; 1144 const NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo = kgmmuGetStaticInfo(pGpu, pKernelGmmu); 1145 1146 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) || 1147 pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED)) 1148 { 1149 return NV_OK; 1150 } 1151 1152 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[REPLAYABLE_FAULT_BUFFER]; 1153 if (pFaultBuffer->pFaultBufferMemDesc != NULL) 1154 { 1155 return NV_ERR_NOT_SUPPORTED; 1156 } 1157 1158 faultBufferSize = pStaticInfo->replayableFaultBufferSize; 1159 1160 status = kgmmuFaultBufferAlloc(pGpu, pKernelGmmu, 1161 REPLAYABLE_FAULT_BUFFER, 1162 faultBufferSize); 1163 if (status != NV_OK) 1164 { 1165 return status; 1166 } 1167 1168 if (IS_GSP_CLIENT(pGpu)) 1169 { 1170 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 1171 NV2080_CTRL_INTERNAL_GMMU_REGISTER_FAULT_BUFFER_PARAMS *pParams; 1172 1173 pParams = portMemAllocNonPaged(sizeof(*pParams)); 1174 if (pParams == NULL) 1175 { 1176 kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER); 1177 return NV_ERR_NO_MEMORY; 1178 } 1179 portMemSet(pParams, 0, sizeof(*pParams)); 1180 1181 numBufferPages = RM_PAGE_ALIGN_UP(faultBufferSize) / RM_PAGE_SIZE; 1182 if (numBufferPages > NV_ARRAY_ELEMENTS(pParams->faultBufferPteArray)) 1183 { 1184 portMemFree(pParams); 1185 kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER); 1186 return NV_ERR_BUFFER_TOO_SMALL; 1187 } 1188 1189 memdescGetPhysAddrs(pFaultBuffer->pFaultBufferMemDesc, 1190 AT_GPU, 0, RM_PAGE_SIZE, 1191 numBufferPages, pParams->faultBufferPteArray); 1192 1193 pParams->hClient = hClient; 1194 pParams->hObject = hObject; 1195 pParams->faultBufferSize = faultBufferSize; 1196 1197 status = pRmApi->Control(pRmApi, 1198 pGpu->hInternalClient, 1199 pGpu->hInternalSubdevice, 1200 NV2080_CTRL_CMD_INTERNAL_GMMU_REGISTER_FAULT_BUFFER, 1201 pParams, sizeof(*pParams)); 1202 1203 portMemFree(pParams); 1204 if (status != NV_OK) 1205 { 1206 kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER); 1207 return status; 1208 } 1209 } 1210 1211 pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferClient = hClient; 1212 pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferObject = hObject; 1213 pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].faultBufferGenerationCounter = 0; 1214 1215 return NV_OK; 1216 } 1217 1218 NV_STATUS 1219 kgmmuFaultBufferReplayableDestroy_IMPL 1220 ( 1221 OBJGPU *pGpu, 1222 KernelGmmu *pKernelGmmu 1223 ) 1224 { 1225 NV_STATUS status = NV_OK; 1226 struct HW_FAULT_BUFFER *pFaultBuffer; 1227 1228 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) || 1229 pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED)) 1230 { 1231 return NV_OK; 1232 } 1233 1234 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hwFaultBuffers[REPLAYABLE_FAULT_BUFFER]; 1235 if (pFaultBuffer->pFaultBufferMemDesc == NULL) 1236 { 1237 return NV_OK; 1238 } 1239 1240 pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferClient = 0; 1241 pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].hFaultBufferObject = 0; 1242 1243 if (IS_GSP_CLIENT(pGpu)) 1244 { 1245 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 1246 status = pRmApi->Control(pRmApi, 1247 pGpu->hInternalClient, 1248 pGpu->hInternalSubdevice, 1249 NV2080_CTRL_CMD_INTERNAL_GMMU_UNREGISTER_FAULT_BUFFER, 1250 NULL, 0); 1251 if (status != NV_OK) 1252 { 1253 NV_PRINTF(LEVEL_ERROR, 1254 "Unregistering Replayable Fault buffer failed (status=0x%08x), proceeding...\n", 1255 status); 1256 } 1257 } 1258 1259 if (RMCFG_FEATURE_PLATFORM_GSP) 1260 { 1261 status = kgmmuFaultBufferUnregister(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER); 1262 } 1263 else 1264 { 1265 status = kgmmuFaultBufferFree(pGpu, pKernelGmmu, REPLAYABLE_FAULT_BUFFER); 1266 } 1267 1268 if (status != NV_OK) 1269 { 1270 NV_PRINTF(LEVEL_ERROR, 1271 "Destroying Replayable Fault buffer failed (status=0x%08x), proceeding...\n", 1272 status); 1273 } 1274 1275 return NV_OK; 1276 } 1277 1278 /*! 1279 * @brief: Encodes peer addresses to support NVSwitch systems. 1280 * 1281 * This function prepends the fabricBaseAddress to a physical address in order 1282 * to generate a unique peer address from the global fabric address space. 1283 * 1284 * @param[in] pAddresses : Array of physical addresses to be encoded. 1285 * @param[in] fabricBaseAddress : Unique fabric base address. 1286 * @param[in] count : Count if physical addresses. 1287 */ 1288 static void 1289 _kgmmuEncodePeerAddrs 1290 ( 1291 NvU64 *pAddresses, 1292 NvU64 fabricBaseAddress, 1293 NvU64 count 1294 ) 1295 { 1296 NvU64 i; 1297 1298 // 1299 // If there is no fabric address, it should be a NOP. Note, this acts as an 1300 // early complete path for other PEER addressing. 1301 // 1302 if (fabricBaseAddress == NVLINK_INVALID_FABRIC_ADDR) 1303 { 1304 return; 1305 } 1306 1307 for (i = 0; i < count; i++) 1308 { 1309 pAddresses[i] = fabricBaseAddress + pAddresses[i]; 1310 } 1311 } 1312 1313 void 1314 kgmmuEncodePhysAddrs_IMPL 1315 ( 1316 KernelGmmu *pKernelGmmu, 1317 const GMMU_APERTURE aperture, 1318 NvU64 *pAddresses, 1319 NvU64 fabricBaseAddress, 1320 NvU64 count 1321 ) 1322 { 1323 NV_ASSERT(aperture != GMMU_APERTURE_INVALID); 1324 1325 if (aperture == GMMU_APERTURE_SYS_COH || 1326 aperture == GMMU_APERTURE_SYS_NONCOH) 1327 { 1328 kgmmuEncodeSysmemAddrs_HAL(pKernelGmmu, pAddresses, count); 1329 } 1330 else if (aperture == GMMU_APERTURE_PEER) 1331 { 1332 _kgmmuEncodePeerAddrs(pAddresses, fabricBaseAddress, count); 1333 } 1334 else 1335 { 1336 return; 1337 } 1338 } 1339 1340 NvU64 1341 kgmmuEncodePhysAddr_IMPL 1342 ( 1343 KernelGmmu *pKernelGmmu, 1344 const GMMU_APERTURE aperture, 1345 NvU64 physAddr, 1346 NvU64 fabricBaseAddress 1347 ) 1348 { 1349 kgmmuEncodePhysAddrs(pKernelGmmu, aperture, &physAddr, fabricBaseAddress, 1); 1350 return physAddr; 1351 } 1352 1353 static void 1354 _kgmmuClientShadowBufferQueueCopyData 1355 ( 1356 NvLength msgSize, 1357 NvLength opIdx, 1358 QueueContext *pCtx, 1359 void *pData, 1360 NvLength count, 1361 NvBool bCopyIn 1362 ) 1363 { 1364 NvLength size; 1365 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer = pCtx->pData; 1366 NvU8 *pQueueData, *pClientData = pData; 1367 void *pDst, *pSrc; 1368 1369 if (count == 0) 1370 return; 1371 1372 size = count * msgSize; 1373 pQueueData = KERNEL_POINTER_FROM_NvP64(NvU8 *, pClientShadowFaultBuffer->pBufferAddress); 1374 pQueueData = pQueueData + (opIdx * msgSize); 1375 1376 pDst = bCopyIn ? pQueueData : pClientData; 1377 pSrc = bCopyIn ? pClientData : pQueueData; 1378 portMemCopy(pDst, size, pSrc, size); 1379 } 1380 1381 static NV_STATUS 1382 _kgmmuClientShadowFaultBufferQueueAllocate 1383 ( 1384 OBJGPU *pGpu, 1385 KernelGmmu *pKernelGmmu, 1386 FAULT_BUFFER_TYPE index 1387 ) 1388 { 1389 NV_STATUS status; 1390 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer; 1391 MEMORY_DESCRIPTOR *pQueueMemDesc; 1392 NvU64 flags = MEMDESC_FLAGS_NONE; 1393 1394 // 1395 // On systems with SEV enabled, the client shadow buffers should be allocated 1396 // in unprotected sysmem as GSP will be writing the fault packets to these 1397 // buffers. Since GSP will be encrypting the fault packets, we don't risk 1398 // leaking any information 1399 // 1400 flags |= MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY; 1401 1402 // 1403 // Shadow fault buffers are not implemented using circular queues when 1404 // Hopper CC is enabled 1405 // 1406 if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 1407 return NV_OK; 1408 1409 pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index]; 1410 1411 status = memdescCreate(&pQueueMemDesc, pGpu, 1412 sizeof(GMMU_SHADOW_FAULT_BUF), RM_PAGE_SIZE, 1413 NV_TRUE, ADDR_SYSMEM, NV_MEMORY_CACHED, 1414 flags); 1415 if (status != NV_OK) 1416 { 1417 return status; 1418 } 1419 1420 status = memdescAlloc(pQueueMemDesc); 1421 if (status != NV_OK) 1422 { 1423 memdescDestroy(pQueueMemDesc); 1424 return status; 1425 } 1426 1427 status = memdescMap(pQueueMemDesc, 0, 1428 memdescGetSize(pQueueMemDesc), 1429 NV_TRUE, NV_PROTECT_READ_WRITE, 1430 &pClientShadowFaultBuffer->pQueueAddress, 1431 &pClientShadowFaultBuffer->pQueuePriv); 1432 if (status != NV_OK) 1433 { 1434 memdescFree(pQueueMemDesc); 1435 memdescDestroy(pQueueMemDesc); 1436 return status; 1437 } 1438 1439 pClientShadowFaultBuffer->queueContext.pCopyData = _kgmmuClientShadowBufferQueueCopyData; 1440 pClientShadowFaultBuffer->queueContext.pData = pClientShadowFaultBuffer; 1441 pClientShadowFaultBuffer->pQueueMemDesc = pQueueMemDesc; 1442 1443 return NV_OK; 1444 } 1445 1446 void 1447 kgmmuClientShadowFaultBufferQueueDestroy_IMPL 1448 ( 1449 OBJGPU *pGpu, 1450 KernelGmmu *pKernelGmmu, 1451 NvBool bFreeQueue, 1452 FAULT_BUFFER_TYPE index 1453 ) 1454 { 1455 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer; 1456 MEMORY_DESCRIPTOR *pQueueMemDesc; 1457 1458 // 1459 // Shadow fault buffers are not implemented using circular queues when 1460 // Hopper CC is enabled. So, there is nothing to free here 1461 // 1462 if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 1463 return; 1464 1465 pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index]; 1466 1467 pQueueMemDesc = pClientShadowFaultBuffer->pQueueMemDesc; 1468 1469 pClientShadowFaultBuffer->pQueueMemDesc = NULL; 1470 pClientShadowFaultBuffer->pQueueAddress = NvP64_NULL; 1471 pClientShadowFaultBuffer->pQueuePriv = NvP64_NULL; 1472 1473 if (bFreeQueue) 1474 { 1475 memdescFree(pQueueMemDesc); 1476 } 1477 memdescDestroy(pQueueMemDesc); 1478 } 1479 1480 static NV_STATUS 1481 _kgmmuClientShadowFaultBufferPagesAllocate 1482 ( 1483 OBJGPU *pGpu, 1484 KernelGmmu *pKernelGmmu, 1485 NvU32 shadowFaultBufferSize, 1486 NvU32 shadowFaultBufferMetadataSize, 1487 FAULT_BUFFER_TYPE index 1488 ) 1489 { 1490 NV_STATUS status; 1491 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer; 1492 MEMORY_DESCRIPTOR *pMemDesc; 1493 NvU64 flags = MEMDESC_FLAGS_NONE; 1494 NvU32 shadowFaultBufferSizeTotal; 1495 1496 // 1497 // On systems with SEV enabled, the client shadow buffers should be allocated 1498 // in unprotected sysmem as GSP will be writing the fault packets to these 1499 // buffers. Since GSP will be encrypting the fault packets, we don't risk 1500 // leaking any information 1501 // 1502 flags |= MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY; 1503 1504 pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index]; 1505 1506 shadowFaultBufferSizeTotal = RM_PAGE_ALIGN_UP(shadowFaultBufferSize) + RM_PAGE_ALIGN_UP(shadowFaultBufferMetadataSize); 1507 1508 status = memdescCreate(&pMemDesc, pGpu, 1509 shadowFaultBufferSizeTotal, RM_PAGE_SIZE, 1510 NV_FALSE, ADDR_SYSMEM, NV_MEMORY_CACHED, 1511 flags); 1512 if (status != NV_OK) 1513 { 1514 return status; 1515 } 1516 1517 status = memdescAlloc(pMemDesc); 1518 if (status != NV_OK) 1519 { 1520 memdescDestroy(pMemDesc); 1521 return status; 1522 } 1523 1524 status = memdescMap(pMemDesc, 0, 1525 memdescGetSize(pMemDesc), 1526 NV_TRUE, NV_PROTECT_READ_WRITE, 1527 &pClientShadowFaultBuffer->pBufferAddress, 1528 &pClientShadowFaultBuffer->pBufferPriv); 1529 if (status != NV_OK) 1530 { 1531 memdescFree(pMemDesc); 1532 memdescDestroy(pMemDesc); 1533 return status; 1534 } 1535 1536 pClientShadowFaultBuffer->pFaultBufferMetadataAddress = 1537 ((NvP64)(((NvU64) pClientShadowFaultBuffer->pBufferAddress) + 1538 RM_PAGE_ALIGN_UP(shadowFaultBufferSize))); 1539 pClientShadowFaultBuffer->pBufferMemDesc = pMemDesc; 1540 1541 return NV_OK; 1542 } 1543 1544 void 1545 kgmmuClientShadowFaultBufferPagesDestroy_IMPL 1546 ( 1547 OBJGPU *pGpu, 1548 KernelGmmu *pKernelGmmu, 1549 NvBool bFreePages, 1550 FAULT_BUFFER_TYPE index 1551 ) 1552 { 1553 MEMORY_DESCRIPTOR *pMemDesc; 1554 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer; 1555 GMMU_FAULT_BUFFER_PAGE *pBufferPage; 1556 NvU32 i; 1557 1558 pClientShadowFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].clientShadowFaultBuffer[index]; 1559 pMemDesc = pClientShadowFaultBuffer->pBufferMemDesc; 1560 1561 if (bFreePages) 1562 { 1563 memdescUnmap(pMemDesc, 1564 NV_TRUE, osGetCurrentProcess(), 1565 pClientShadowFaultBuffer->pBufferAddress, 1566 pClientShadowFaultBuffer->pBufferPriv); 1567 1568 memdescFree(pMemDesc); 1569 } 1570 else 1571 { 1572 for (i = 0; i < pClientShadowFaultBuffer->numBufferPages; i++) 1573 { 1574 pBufferPage = &pClientShadowFaultBuffer->pBufferPages[i]; 1575 1576 memdescUnmap(pMemDesc, NV_TRUE, osGetCurrentProcess(), 1577 pBufferPage->pAddress, pBufferPage->pPriv); 1578 } 1579 portMemFree(pClientShadowFaultBuffer->pBufferPages); 1580 } 1581 memdescDestroy(pMemDesc); 1582 } 1583 1584 NV_STATUS 1585 kgmmuClientShadowFaultBufferRegister_IMPL 1586 ( 1587 OBJGPU *pGpu, 1588 KernelGmmu *pKernelGmmu, 1589 FAULT_BUFFER_TYPE index 1590 ) 1591 { 1592 NV_STATUS status = NV_OK; 1593 struct GMMU_FAULT_BUFFER *pFaultBuffer; 1594 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer; 1595 GMMU_SHADOW_FAULT_BUF *pQueue; 1596 MEMORY_DESCRIPTOR *pBufferMemDesc; 1597 RmPhysAddr shadowFaultBufferQueuePhysAddr; 1598 NvU32 queueCapacity, numBufferPages; 1599 NvU32 faultBufferSize; 1600 NvU32 shadowFaultBufferMetadataSize; 1601 const NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo = kgmmuGetStaticInfo(pGpu, pKernelGmmu); 1602 NvBool bQueueAllocated = NV_FALSE; 1603 1604 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF]; 1605 pClientShadowFaultBuffer = &pFaultBuffer->clientShadowFaultBuffer[index]; 1606 1607 if (index == NON_REPLAYABLE_FAULT_BUFFER) 1608 { 1609 faultBufferSize = pStaticInfo->nonReplayableFaultBufferSize; 1610 shadowFaultBufferMetadataSize = pStaticInfo->nonReplayableShadowFaultBufferMetadataSize; 1611 } 1612 else if (index == REPLAYABLE_FAULT_BUFFER) 1613 { 1614 faultBufferSize = pStaticInfo->replayableFaultBufferSize; 1615 shadowFaultBufferMetadataSize = pStaticInfo->replayableShadowFaultBufferMetadataSize; 1616 } 1617 else 1618 { 1619 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT); 1620 } 1621 1622 // 1623 // We don't use circular queues for shadow fault buffers when Hopper 1624 // CC is enabled 1625 // 1626 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 1627 { 1628 pQueue = KERNEL_POINTER_FROM_NvP64(GMMU_SHADOW_FAULT_BUF *, 1629 pClientShadowFaultBuffer->pQueueAddress); 1630 queueCapacity = faultBufferSize / NVC369_BUF_SIZE; 1631 1632 status = queueInitNonManaged(pQueue, queueCapacity); 1633 if (status != NV_OK) 1634 { 1635 return status; 1636 } 1637 bQueueAllocated = NV_TRUE; 1638 } 1639 1640 if (!IS_GSP_CLIENT(pGpu)) 1641 { 1642 portSyncSpinlockAcquire(pFaultBuffer->pShadowFaultBufLock); 1643 1644 if (pFaultBuffer->pClientShadowFaultBuffer[index] == NULL) 1645 { 1646 pFaultBuffer->pClientShadowFaultBuffer[index] = pClientShadowFaultBuffer; 1647 } 1648 else 1649 { 1650 status = NV_ERR_NOT_SUPPORTED; 1651 } 1652 1653 portSyncSpinlockRelease(pFaultBuffer->pShadowFaultBufLock); 1654 1655 if (status != NV_OK) 1656 { 1657 if (bQueueAllocated) 1658 queueDestroy(pQueue); 1659 return status; 1660 } 1661 } 1662 else 1663 { 1664 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 1665 NV2080_CTRL_INTERNAL_GMMU_REGISTER_CLIENT_SHADOW_FAULT_BUFFER_PARAMS *pParams; 1666 1667 pParams = portMemAllocNonPaged(sizeof(*pParams)); 1668 if (pParams == NULL) 1669 { 1670 if (bQueueAllocated) 1671 queueDestroy(pQueue); 1672 return NV_ERR_NO_MEMORY; 1673 } 1674 portMemSet(pParams, 0, sizeof(*pParams)); 1675 1676 pBufferMemDesc = pClientShadowFaultBuffer->pBufferMemDesc; 1677 numBufferPages = memdescGetSize(pBufferMemDesc) >> RM_PAGE_SHIFT; 1678 if (numBufferPages > NV_ARRAY_ELEMENTS(pParams->shadowFaultBufferPteArray)) 1679 { 1680 portMemFree(pParams); 1681 if (bQueueAllocated) 1682 queueDestroy(pQueue); 1683 return NV_ERR_BUFFER_TOO_SMALL; 1684 } 1685 1686 memdescGetPhysAddrs(pBufferMemDesc, 1687 AT_GPU, 1688 0, RM_PAGE_SIZE, 1689 numBufferPages, pParams->shadowFaultBufferPteArray); 1690 1691 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 1692 { 1693 shadowFaultBufferQueuePhysAddr = memdescGetPhysAddr(pClientShadowFaultBuffer->pQueueMemDesc, 1694 AT_GPU, 0); 1695 pParams->shadowFaultBufferQueuePhysAddr = shadowFaultBufferQueuePhysAddr; 1696 } 1697 pParams->shadowFaultBufferSize = faultBufferSize; 1698 pParams->shadowFaultBufferMetadataSize = shadowFaultBufferMetadataSize; 1699 pParams->shadowFaultBufferType = (index == NON_REPLAYABLE_FAULT_BUFFER) ? 1700 NV2080_CTRL_FAULT_BUFFER_NON_REPLAYABLE : 1701 NV2080_CTRL_FAULT_BUFFER_REPLAYABLE; 1702 1703 if (gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu) && index == REPLAYABLE_FAULT_BUFFER) 1704 { 1705 pParams->faultBufferSharedMemoryPhysAddr = memdescGetPhysAddr(pClientShadowFaultBuffer->pFaultBufferSharedMemDesc, 1706 AT_GPU, 0); 1707 } 1708 1709 status = pRmApi->Control(pRmApi, 1710 pGpu->hInternalClient, 1711 pGpu->hInternalSubdevice, 1712 NV2080_CTRL_CMD_INTERNAL_GMMU_REGISTER_CLIENT_SHADOW_FAULT_BUFFER, 1713 pParams, sizeof(*pParams)); 1714 1715 portMemFree(pParams); 1716 if (status != NV_OK) 1717 { 1718 if (bQueueAllocated) 1719 queueDestroy(pQueue); 1720 return status; 1721 } 1722 1723 pFaultBuffer->pClientShadowFaultBuffer[index] = pClientShadowFaultBuffer; 1724 } 1725 1726 return NV_OK; 1727 } 1728 1729 void 1730 kgmmuClientShadowFaultBufferUnregister_IMPL 1731 ( 1732 OBJGPU *pGpu, 1733 KernelGmmu *pKernelGmmu, 1734 FAULT_BUFFER_TYPE index 1735 ) 1736 { 1737 NV_STATUS status = NV_OK; 1738 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer; 1739 GMMU_SHADOW_FAULT_BUF *pQueue; 1740 struct GMMU_FAULT_BUFFER *pFaultBuffer; 1741 1742 pFaultBuffer = &pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF]; 1743 1744 if (!IS_GSP_CLIENT(pGpu)) 1745 { 1746 portSyncSpinlockAcquire(pFaultBuffer->pShadowFaultBufLock); 1747 1748 pFaultBuffer->pClientShadowFaultBuffer[index] = NULL; 1749 1750 portSyncSpinlockRelease(pFaultBuffer->pShadowFaultBufLock); 1751 } 1752 else 1753 { 1754 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 1755 NV2080_CTRL_INTERNAL_GMMU_UNREGISTER_CLIENT_SHADOW_FAULT_BUFFER_PARAMS params; 1756 1757 portMemSet(¶ms, 0, sizeof(params)); 1758 1759 params.shadowFaultBufferType = (index == NON_REPLAYABLE_FAULT_BUFFER) ? 1760 NV2080_CTRL_FAULT_BUFFER_NON_REPLAYABLE : 1761 NV2080_CTRL_FAULT_BUFFER_REPLAYABLE; 1762 status = pRmApi->Control(pRmApi, 1763 pGpu->hInternalClient, 1764 pGpu->hInternalSubdevice, 1765 NV2080_CTRL_CMD_INTERNAL_GMMU_UNREGISTER_CLIENT_SHADOW_FAULT_BUFFER, 1766 ¶ms, sizeof(params)); 1767 if (status != NV_OK) 1768 { 1769 NV_PRINTF(LEVEL_ERROR, 1770 "Unregistering %s fault buffer failed (status=0x%08x), proceeding...\n", 1771 (index == NON_REPLAYABLE_FAULT_BUFFER) ? "non-replayable" : "replayable", 1772 status); 1773 } 1774 1775 pFaultBuffer->pClientShadowFaultBuffer[index] = NULL; 1776 } 1777 1778 if (!gpuIsCCFeatureEnabled(pGpu) || !gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 1779 { 1780 pClientShadowFaultBuffer = &pFaultBuffer->clientShadowFaultBuffer[index]; 1781 pQueue = KERNEL_POINTER_FROM_NvP64(GMMU_SHADOW_FAULT_BUF *, 1782 pClientShadowFaultBuffer->pQueueAddress); 1783 queueDestroy(pQueue); 1784 } 1785 } 1786 1787 /*! 1788 * @brief Creates shadow fault buffer for client handling of replayable/non-replayable 1789 * faults in the CPU-RM, and registers it in the GSP-RM. 1790 * 1791 * @param[in] pGpu 1792 * @param[in] pKernelGmmu 1793 * @param[in] index Replayable or non-replayable fault buffer 1794 * 1795 * @returns 1796 */ 1797 NV_STATUS 1798 kgmmuClientShadowFaultBufferAllocate_IMPL 1799 ( 1800 OBJGPU *pGpu, 1801 KernelGmmu *pKernelGmmu, 1802 FAULT_BUFFER_TYPE index 1803 ) 1804 { 1805 NV_STATUS status; 1806 const NV2080_CTRL_INTERNAL_GMMU_GET_STATIC_INFO_PARAMS *pStaticInfo = kgmmuGetStaticInfo(pGpu, pKernelGmmu); 1807 NvU32 faultBufferSize; 1808 NvU32 shadowFaultBufferMetadataSize; 1809 1810 ct_assert((RM_PAGE_SIZE % sizeof(struct GMMU_FAULT_PACKET)) == 0); 1811 1812 NV_ASSERT_OR_RETURN(!pKernelGmmu->getProperty(pKernelGmmu, PDB_PROP_KGMMU_FAULT_BUFFER_DISABLED), NV_ERR_INVALID_STATE); 1813 1814 NV_ASSERT_OR_RETURN(pStaticInfo->nonReplayableFaultBufferSize != 0, NV_ERR_INVALID_STATE); 1815 1816 if (index == NON_REPLAYABLE_FAULT_BUFFER) 1817 { 1818 faultBufferSize = pStaticInfo->nonReplayableFaultBufferSize; 1819 shadowFaultBufferMetadataSize = pStaticInfo->nonReplayableShadowFaultBufferMetadataSize; 1820 } 1821 else if (index == REPLAYABLE_FAULT_BUFFER) 1822 { 1823 faultBufferSize = pStaticInfo->replayableFaultBufferSize; 1824 shadowFaultBufferMetadataSize = pStaticInfo->replayableShadowFaultBufferMetadataSize; 1825 } 1826 else 1827 { 1828 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT); 1829 } 1830 1831 status = _kgmmuClientShadowFaultBufferQueueAllocate(pGpu, pKernelGmmu, index); 1832 if (status != NV_OK) 1833 { 1834 return status; 1835 } 1836 1837 status = _kgmmuClientShadowFaultBufferPagesAllocate(pGpu, pKernelGmmu, 1838 faultBufferSize, 1839 shadowFaultBufferMetadataSize, 1840 index); 1841 if (status != NV_OK) 1842 { 1843 goto destroy_queue_and_exit; 1844 } 1845 1846 status = kgmmuFaultBufferAllocSharedMemory_HAL(pGpu, pKernelGmmu, index); 1847 if (status != NV_OK) 1848 { 1849 goto destroy_pages_and_exit; 1850 } 1851 1852 status = kgmmuClientShadowFaultBufferRegister(pGpu, pKernelGmmu, 1853 index); 1854 if (status != NV_OK) 1855 { 1856 goto destroy_shared_memory_and_exit; 1857 } 1858 1859 return NV_OK; 1860 1861 destroy_shared_memory_and_exit: 1862 kgmmuFaultBufferFreeSharedMemory_HAL(pGpu, pKernelGmmu, index); 1863 destroy_pages_and_exit: 1864 kgmmuClientShadowFaultBufferPagesDestroy(pGpu, pKernelGmmu, NV_TRUE, 1865 index); 1866 destroy_queue_and_exit: 1867 kgmmuClientShadowFaultBufferQueueDestroy(pGpu, pKernelGmmu, NV_TRUE, 1868 index); 1869 return status; 1870 } 1871 1872 /*! 1873 * @brief Unregister client shadow fault buffer in the GSP-RM or destroy 1874 * it in the CPU-RM. 1875 * 1876 * @param[in] pGpu 1877 * @param[in] pKernelGmmu 1878 * 1879 * @returns 1880 */ 1881 NV_STATUS 1882 kgmmuClientShadowFaultBufferDestroy_IMPL 1883 ( 1884 OBJGPU *pGpu, 1885 KernelGmmu *pKernelGmmu, 1886 FAULT_BUFFER_TYPE index 1887 ) 1888 { 1889 GMMU_CLIENT_SHADOW_FAULT_BUFFER *pClientShadowFaultBuffer; 1890 NvBool bFreeMemory = !RMCFG_FEATURE_PLATFORM_GSP; 1891 1892 pClientShadowFaultBuffer = 1893 pKernelGmmu->mmuFaultBuffer[GPU_GFID_PF].pClientShadowFaultBuffer[index]; 1894 1895 if (pClientShadowFaultBuffer != NvP64_NULL) 1896 { 1897 kgmmuClientShadowFaultBufferUnregister(pGpu, pKernelGmmu, 1898 index); 1899 1900 kgmmuFaultBufferFreeSharedMemory_HAL(pGpu, pKernelGmmu, index); 1901 1902 kgmmuClientShadowFaultBufferPagesDestroy(pGpu, pKernelGmmu, bFreeMemory, 1903 index); 1904 kgmmuClientShadowFaultBufferQueueDestroy(pGpu, pKernelGmmu, bFreeMemory, 1905 index); 1906 } 1907 1908 return NV_OK; 1909 } 1910 1911 /*! 1912 * Returns the minimum allocation size to align to big-page size in bytes 1913 * 1914 * @param[in] pKernelGmmu 1915 * 1916 * @return NvU32 1917 */ 1918 NvU64 1919 kgmmuGetMinBigPageSize_IMPL(KernelGmmu *pKernelGmmu) 1920 { 1921 // 1922 // Set the minimum size in the heap that we will round up to a big page instead 1923 // just 4KB. HW doesn't like 4KB pages in video memory, but SW wants to pack 1924 // physical memory sometimes. Typically UMDs that really care about perf use 1925 // suballocation for larger RM allocations anyway. 1926 // 1927 // Promote allocates bigger than half the big page size. 1928 // (this is a policy change for Big page sizes/VASpace) 1929 // 1930 return RM_PAGE_SIZE_64K >> 1; 1931 } 1932 1933 /*! 1934 * @brief Initializes the init block for an engine 1935 * 1936 * @param[in] pKernelGmmu 1937 * @param[in] pInstBlkDesc Memory descriptor for the instance block of the engine 1938 * @param[in] pVAS OBJVASPACE pointer of the engine 1939 * @param[in] subctxId subctxId Value 1940 * @param[in] pInstBlkParams Pointer to the structure storing the parameters passed by the caller 1941 * 1942 * @returns NV_STATUS 1943 */ 1944 NV_STATUS 1945 kgmmuInstBlkInit_IMPL 1946 ( 1947 KernelGmmu *pKernelGmmu, 1948 MEMORY_DESCRIPTOR *pInstBlkDesc, 1949 OBJVASPACE *pVAS, 1950 NvU32 subctxId, 1951 INST_BLK_INIT_PARAMS *pInstBlkParams 1952 ) 1953 { 1954 OBJGPU *pGpu = ENG_GET_GPU(pKernelGmmu); 1955 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 1956 NvU8 *pInstBlk; // CPU VA of instance block. 1957 NvU64 vaLimitData; 1958 NvU32 vaLimitOffset; 1959 NvU32 dirBaseHiOffset; 1960 NvU32 dirBaseHiData; 1961 NvU32 dirBaseLoOffset; 1962 NvU32 dirBaseLoData; 1963 NvU32 atsOffset; 1964 NvU32 atsData; 1965 NvU32 magicValueOffset; 1966 NvU32 magicValueData; 1967 NV_STATUS status = NV_OK; 1968 1969 NV_ASSERT(!gpumgrGetBcEnabledStatus(pGpu)); 1970 1971 // Get VA limit 1972 status = kgmmuInstBlkVaLimitGet_HAL(pKernelGmmu, pVAS, subctxId, pInstBlkParams, &vaLimitOffset, &vaLimitData); 1973 NV_ASSERT_OR_RETURN((status == NV_OK), status); 1974 1975 // Get page dir base 1976 NV_ASSERT_OK_OR_RETURN(kgmmuInstBlkPageDirBaseGet_HAL(pGpu, pKernelGmmu, 1977 pVAS, pInstBlkParams, subctxId, 1978 &dirBaseLoOffset, &dirBaseLoData, &dirBaseHiOffset, &dirBaseHiData)); 1979 1980 if ((pVAS != NULL) && vaspaceIsAtsEnabled(pVAS)) 1981 { 1982 // Coherent link ATS parameters are only set on the new VMM path. 1983 status = kgmmuInstBlkAtsGet_HAL(pKernelGmmu, pVAS, subctxId, &atsOffset, &atsData); 1984 NV_ASSERT_OR_RETURN((status == NV_OK), status); 1985 } 1986 else 1987 { 1988 atsOffset = 0; 1989 atsData = 0; 1990 } 1991 1992 status = kgmmuInstBlkMagicValueGet_HAL(pKernelGmmu, &magicValueOffset, &magicValueData); 1993 1994 // Write the fields out 1995 pInstBlk = pInstBlkParams->pInstBlk; 1996 1997 if (pInstBlk != NULL) 1998 { 1999 if (vaLimitOffset != 0) 2000 { 2001 // TO DO: FMODEL fails with MEM_WR64 2002 if (IS_SIMULATION(pGpu)) 2003 { 2004 MEM_WR32(pInstBlk + vaLimitOffset + 0, NvU64_LO32(vaLimitData)); 2005 MEM_WR32(pInstBlk + vaLimitOffset + 4, NvU64_HI32(vaLimitData)); 2006 } 2007 else 2008 { 2009 MEM_WR64(pInstBlk + vaLimitOffset, vaLimitData); 2010 } 2011 } 2012 2013 MEM_WR32(pInstBlk + dirBaseHiOffset, dirBaseHiData); 2014 MEM_WR32(pInstBlk + dirBaseLoOffset, dirBaseLoData); 2015 2016 if (atsOffset != 0) 2017 MEM_WR32(pInstBlk + atsOffset, atsData); 2018 2019 if (status == NV_OK) 2020 MEM_WR32(pInstBlk + magicValueOffset, magicValueData); 2021 } 2022 else 2023 { 2024 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 2025 2026 pInstBlk = memmgrMemDescBeginTransfer(pMemoryManager, pInstBlkDesc, 2027 TRANSFER_FLAGS_SHADOW_ALLOC); 2028 if (pInstBlk == NULL) 2029 { 2030 return NV_ERR_INSUFFICIENT_RESOURCES; 2031 } 2032 2033 if (vaLimitOffset != 0) 2034 { 2035 // TO DO: FMODEL fails with MEM_WR64 2036 if (IS_SIMULATION(pGpu)) 2037 { 2038 MEM_WR32(pInstBlk + vaLimitOffset + 0, NvU64_LO32(vaLimitData)); 2039 MEM_WR32(pInstBlk + vaLimitOffset + 4, NvU64_HI32(vaLimitData)); 2040 } 2041 else 2042 { 2043 MEM_WR64(pInstBlk + vaLimitOffset, vaLimitData); 2044 } 2045 } 2046 2047 MEM_WR32(pInstBlk + dirBaseHiOffset, dirBaseHiData); 2048 MEM_WR32(pInstBlk + dirBaseLoOffset, dirBaseLoData); 2049 2050 if (atsOffset != 0) 2051 MEM_WR32(pInstBlk + atsOffset, atsData); 2052 2053 if (status == NV_OK) 2054 MEM_WR32(pInstBlk + magicValueOffset, magicValueData); 2055 2056 memmgrMemDescEndTransfer(pMemoryManager, pInstBlkDesc, 2057 TRANSFER_FLAGS_SHADOW_ALLOC); 2058 } 2059 2060 if (!pInstBlkParams->bDeferFlush) 2061 { 2062 kbusFlush_HAL(pGpu, pKernelBus, BUS_FLUSH_USE_PCIE_READ 2063 | kbusGetFlushAperture(pKernelBus, memdescGetAddressSpace(pInstBlkDesc))); 2064 } 2065 2066 return NV_OK; 2067 } 2068 2069 GMMU_APERTURE 2070 kgmmuGetExternalAllocAperture_IMPL 2071 ( 2072 NvU32 addressSpace 2073 ) 2074 { 2075 switch (addressSpace) 2076 { 2077 case ADDR_FBMEM: 2078 return GMMU_APERTURE_VIDEO; 2079 case ADDR_FABRIC_V2: 2080 case ADDR_FABRIC_MC: 2081 return GMMU_APERTURE_PEER; 2082 case ADDR_SYSMEM: 2083 case ADDR_VIRTUAL: 2084 return GMMU_APERTURE_SYS_COH; 2085 default: 2086 NV_PRINTF(LEVEL_ERROR, "Unexpected addressSpace (%u) when mapping to GMMU_APERTURE.\n", 2087 addressSpace); 2088 NV_ASSERT(0); 2089 return GMMU_APERTURE_SYS_COH; 2090 } 2091 } 2092 2093 /*! 2094 * @brief 2095 * 2096 * @param pGpu 2097 * @param pKernelGmmu 2098 * @param bOwnedByRm 2099 */ 2100 void 2101 kgmmuAccessCntrChangeIntrOwnership_IMPL 2102 ( 2103 OBJGPU *pGpu, 2104 KernelGmmu *pKernelGmmu, 2105 NvBool bOwnedByRm 2106 ) 2107 { 2108 // 2109 // Disable the interrupt when RM loses the ownership and enable it back when 2110 // RM regains it. nvUvmInterfaceOwnAccessCntIntr() will rely on this behavior. 2111 // 2112 if (bOwnedByRm) 2113 pKernelGmmu->uvmSharedIntrRmOwnsMask |= RM_UVM_SHARED_INTR_MASK_HUB_ACCESS_COUNTER_NOTIFY; 2114 else 2115 pKernelGmmu->uvmSharedIntrRmOwnsMask &= ~RM_UVM_SHARED_INTR_MASK_HUB_ACCESS_COUNTER_NOTIFY; 2116 } 2117 2118 /** 2119 * @brief Provides an opportunity to register some IntrService during intrStateInit. 2120 */ 2121 void 2122 kgmmuRegisterIntrService_IMPL 2123 ( 2124 OBJGPU *pGpu, 2125 KernelGmmu *pKernelGmmu, 2126 IntrServiceRecord pRecords[MC_ENGINE_IDX_MAX] 2127 ) 2128 { 2129 NvU32 engineIdx; 2130 NvU16 *pEngineIdxList; 2131 NvU32 listSize; 2132 2133 static NvU16 engineIdxList[] = { 2134 MC_ENGINE_IDX_REPLAYABLE_FAULT, 2135 MC_ENGINE_IDX_REPLAYABLE_FAULT_ERROR, 2136 }; 2137 2138 static NvU16 engineIdxListForCC[] = { 2139 MC_ENGINE_IDX_REPLAYABLE_FAULT_CPU, 2140 MC_ENGINE_IDX_NON_REPLAYABLE_FAULT_CPU, 2141 }; 2142 2143 if (IS_GSP_CLIENT(pGpu) && gpuIsCCFeatureEnabled(pGpu) && gpuIsGspOwnedFaultBuffersEnabled(pGpu)) 2144 { 2145 pEngineIdxList = engineIdxListForCC; 2146 listSize = NV_ARRAY_ELEMENTS(engineIdxListForCC); 2147 } 2148 else 2149 { 2150 pEngineIdxList = engineIdxList; 2151 listSize = NV_ARRAY_ELEMENTS(engineIdxList); 2152 } 2153 2154 for (NvU32 tableIdx = 0; tableIdx < listSize; tableIdx++) 2155 { 2156 engineIdx = (pEngineIdxList)[tableIdx]; 2157 NV_ASSERT(pRecords[engineIdx].pInterruptService == NULL); 2158 pRecords[engineIdx].pInterruptService = staticCast(pKernelGmmu, IntrService); 2159 } 2160 } 2161 2162 /** 2163 * @brief Service stall interrupts. 2164 * 2165 * @returns Zero, or any implementation-chosen nonzero value. If the same nonzero value is returned enough 2166 * times the interrupt is considered stuck. 2167 */ 2168 NvU32 2169 kgmmuServiceInterrupt_IMPL 2170 ( 2171 OBJGPU *pGpu, 2172 KernelGmmu *pKernelGmmu, 2173 IntrServiceServiceInterruptArguments *pParams 2174 ) 2175 { 2176 NV_STATUS status; 2177 2178 NV_ASSERT_OR_RETURN(pParams != NULL, 0); 2179 2180 switch (pParams->engineIdx) 2181 { 2182 case MC_ENGINE_IDX_REPLAYABLE_FAULT: 2183 { 2184 NV_STATUS status = kgmmuServiceReplayableFault_HAL(pGpu, pKernelGmmu); 2185 if (status != NV_OK) 2186 { 2187 NV_ASSERT_OK_FAILED("Failed to service replayable MMU fault error", 2188 status); 2189 } 2190 break; 2191 } 2192 case MC_ENGINE_IDX_REPLAYABLE_FAULT_ERROR: 2193 { 2194 status = kgmmuReportFaultBufferOverflow_HAL(pGpu, pKernelGmmu); 2195 if (status != NV_OK) 2196 { 2197 NV_ASSERT_OK_FAILED( 2198 "Failed to report replayable MMU fault buffer overflow error", 2199 status); 2200 } 2201 break; 2202 } 2203 case MC_ENGINE_IDX_NON_REPLAYABLE_FAULT_CPU: 2204 { 2205 osQueueMMUFaultHandler(pGpu); 2206 status = 0; 2207 break; 2208 } 2209 case MC_ENGINE_IDX_REPLAYABLE_FAULT_CPU: 2210 { 2211 NV_PRINTF(LEVEL_ERROR, "Unexpected replayable interrupt routed to RM. Verify UVM took ownership.\n"); 2212 status = NV_ERR_INVALID_STATE; 2213 break; 2214 } 2215 default: 2216 { 2217 NV_ASSERT_FAILED("Invalid engineIdx"); 2218 break; 2219 } 2220 } 2221 2222 return 0; 2223 } 2224 2225 /*! 2226 * @brief Extract the PTE FIELDS from the PTE and 2227 * set the corresponding flags/fields in pParams. 2228 * 2229 * @param[in] pKernelGmmu 2230 * @param[in] pPte Pointer to the PTE contents 2231 * @param[out] pPteInfo Pointer to the PTE info structure 2232 * @param[in] pFmt NV0080_CTRL_DMA_PTE_INFO_PTE_BLOCK pointer to cmd params 2233 * @param[in] pLevelFmt Format of the level 2234 * 2235 * 2236 * @returns none 2237 */ 2238 void 2239 kgmmuExtractPteInfo_IMPL 2240 ( 2241 KernelGmmu *pKernelGmmu, 2242 GMMU_ENTRY_VALUE *pPte, 2243 NV0080_CTRL_DMA_PTE_INFO_PTE_BLOCK *pPteInfo, 2244 const GMMU_FMT *pFmt, 2245 const MMU_FMT_LEVEL *pLevelFmt 2246 ) 2247 { 2248 OBJGPU *pGpu = ENG_GET_GPU(pKernelGmmu); 2249 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 2250 const GMMU_FMT_PTE *pFmtPte = pFmt->pPte; 2251 NvBool bPteValid; 2252 2253 bPteValid = nvFieldGetBool(&pFmtPte->fldValid, pPte->v8); 2254 2255 pPteInfo->pteFlags = FLD_SET_DRF_NUM(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_VALID, 2256 bPteValid, pPteInfo->pteFlags); 2257 2258 if (pFmtPte->version != GMMU_FMT_VERSION_3) 2259 { 2260 pPteInfo->pteFlags = FLD_SET_DRF_NUM(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_ENCRYPTED, 2261 nvFieldGetBool(&pFmtPte->fldEncrypted, pPte->v8), pPteInfo->pteFlags); 2262 } 2263 2264 switch (gmmuFieldGetAperture(&pFmtPte->fldAperture, pPte->v8)) 2265 { 2266 case GMMU_APERTURE_VIDEO: 2267 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_APERTURE, 2268 _VIDEO_MEMORY, pPteInfo->pteFlags); 2269 break; 2270 case GMMU_APERTURE_PEER: 2271 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_APERTURE, 2272 _PEER_MEMORY, pPteInfo->pteFlags); 2273 break; 2274 case GMMU_APERTURE_SYS_COH: 2275 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_APERTURE, 2276 _SYSTEM_COHERENT_MEMORY, pPteInfo->pteFlags); 2277 break; 2278 case GMMU_APERTURE_SYS_NONCOH: 2279 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_APERTURE, 2280 _SYSTEM_NON_COHERENT_MEMORY, pPteInfo->pteFlags); 2281 break; 2282 case GMMU_APERTURE_INVALID: 2283 default: 2284 NV_ASSERT(0); 2285 break; 2286 } 2287 2288 if (pFmtPte->version == GMMU_FMT_VERSION_3) 2289 { 2290 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 2291 NvU32 ptePcfHw; 2292 NvU32 ptePcfSw = 0; 2293 2294 // In Version 3, parse the PCF bits and return those 2295 ptePcfHw = nvFieldGet32(&pFmtPte->fldPtePcf, pPte->v8); 2296 NV_ASSERT(kgmmuTranslatePtePcfFromHw_HAL(pKernelGmmu, ptePcfHw, bPteValid, &ptePcfSw) == NV_OK); 2297 2298 // Valid 2MB PTEs follow the same format as 64K and 4K PTEs 2299 if (bPteValid) 2300 { 2301 if (!(ptePcfSw & (1 << SW_MMU_PCF_UNCACHED_IDX))) 2302 { 2303 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, 2304 _PARAMS_FLAGS_GPU_CACHED, _TRUE, pPteInfo->pteFlags); 2305 } 2306 if (ptePcfSw & (1 << SW_MMU_PCF_RO_IDX)) 2307 { 2308 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, 2309 _PARAMS_FLAGS_READ_ONLY, _TRUE, pPteInfo->pteFlags); 2310 } 2311 if (ptePcfSw & (1 << SW_MMU_PCF_NOATOMIC_IDX)) 2312 { 2313 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, 2314 _PARAMS_FLAGS_ATOMIC, _DISABLE, pPteInfo->pteFlags); 2315 } 2316 if (ptePcfSw & (1 << SW_MMU_PCF_REGULAR_IDX)) 2317 { 2318 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, 2319 _PARAMS_FLAGS_PRIVILEGED, _FALSE, pPteInfo->pteFlags); 2320 } 2321 if (ptePcfSw & (1 << SW_MMU_PCF_ACE_IDX)) 2322 { 2323 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, 2324 _PARAMS_FLAGS_ACCESS_COUNTING, _ENABLE, pPteInfo->pteFlags); 2325 } 2326 } 2327 else 2328 { 2329 if (pLevelFmt->numSubLevels == 0) 2330 { 2331 if (ptePcfSw & (1 << SW_MMU_PCF_SPARSE_IDX)) 2332 { 2333 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, 2334 _PARAMS_FLAGS_GPU_CACHED, _FALSE, pPteInfo->pteFlags); 2335 } 2336 else 2337 { 2338 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, 2339 _PARAMS_FLAGS_GPU_CACHED, _TRUE, pPteInfo->pteFlags); 2340 } 2341 } 2342 else 2343 { 2344 NvU32 pdePcfHw = 0; 2345 NvU32 pdePcfSw = 0; 2346 2347 pdePcfHw = nvFieldGet32(&pFmt->pPde->fldPdePcf, pPte->v8); 2348 NV_ASSERT(kgmmuTranslatePdePcfFromHw_HAL(pKernelGmmu, pdePcfHw, GMMU_APERTURE_INVALID, &pdePcfSw) == NV_OK); 2349 if (pdePcfSw & (1 << SW_MMU_PCF_SPARSE_IDX)) 2350 { 2351 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, 2352 _PARAMS_FLAGS_GPU_CACHED, _FALSE, pPteInfo->pteFlags); 2353 } 2354 else 2355 { 2356 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, 2357 _PARAMS_FLAGS_GPU_CACHED, _TRUE, pPteInfo->pteFlags); 2358 } 2359 2360 } 2361 } 2362 } 2363 else 2364 { 2365 pPteInfo->pteFlags = FLD_SET_DRF_NUM(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_GPU_CACHED, 2366 !nvFieldGetBool(&pFmtPte->fldVolatile, pPte->v8), pPteInfo->pteFlags); 2367 2368 if (nvFieldIsValid32(&pFmtPte->fldReadDisable.desc) && 2369 nvFieldIsValid32(&pFmtPte->fldWriteDisable.desc)) 2370 { 2371 if (nvFieldGetBool(&pFmtPte->fldWriteDisable, pPte->v8)) 2372 { 2373 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, 2374 _PARAMS_FLAGS_SHADER_ACCESS, _READ_ONLY, pPteInfo->pteFlags); 2375 } 2376 else if (nvFieldGetBool(&pFmtPte->fldReadDisable, pPte->v8)) 2377 { 2378 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, 2379 _PARAMS_FLAGS_SHADER_ACCESS, _WRITE_ONLY, pPteInfo->pteFlags); 2380 } 2381 else 2382 { 2383 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, 2384 _PARAMS_FLAGS_SHADER_ACCESS, _READ_WRITE, pPteInfo->pteFlags); 2385 } 2386 } 2387 else 2388 { 2389 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_SHADER_ACCESS, 2390 _NOT_SUPPORTED, pPteInfo->pteFlags); 2391 } 2392 2393 pPteInfo->pteFlags = FLD_SET_DRF_NUM(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_READ_ONLY, 2394 nvFieldGetBool(&pFmtPte->fldReadOnly, pPte->v8), pPteInfo->pteFlags); 2395 2396 // Get comptagline 2397 pPteInfo->comptagLine = nvFieldGet32(&pFmtPte->fldCompTagLine, pPte->v8); 2398 } 2399 2400 // Get kind 2401 pPteInfo->kind = nvFieldGet32(&pFmtPte->fldKind, pPte->v8); 2402 2403 // 2404 // Decode the comptags value from kind. GF100 only supports 2 bits per rop tile, 2405 // but future chips will use the other layouts. 2406 // 2407 if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE_1, pPteInfo->kind)) 2408 { 2409 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_COMPTAGS, _1, pPteInfo->pteFlags); 2410 } 2411 else if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE_2, pPteInfo->kind)) 2412 { 2413 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_COMPTAGS, _2, pPteInfo->pteFlags); 2414 } 2415 else if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE_4, pPteInfo->kind)) 2416 { 2417 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_COMPTAGS, _4, pPteInfo->pteFlags); 2418 } 2419 else 2420 { 2421 pPteInfo->pteFlags = FLD_SET_DRF(0080_CTRL, _DMA_PTE_INFO, _PARAMS_FLAGS_COMPTAGS, _NONE, pPteInfo->pteFlags); 2422 } 2423 } 2424 2425 NvS32* 2426 kgmmuGetFatalFaultIntrPendingState_IMPL 2427 ( 2428 KernelGmmu *pKernelGmmu, 2429 NvU8 gfid 2430 ) 2431 { 2432 return &pKernelGmmu->mmuFaultBuffer[gfid].fatalFaultIntrPending; 2433 } 2434 2435 struct HW_FAULT_BUFFER* 2436 kgmmuGetHwFaultBufferPtr_IMPL 2437 ( 2438 KernelGmmu *pKernelGmmu, 2439 NvU8 gfid, 2440 NvU8 faultBufferIndex 2441 ) 2442 { 2443 return &pKernelGmmu->mmuFaultBuffer[gfid].hwFaultBuffers[faultBufferIndex]; 2444 } 2445 2446 NvU64 2447 kgmmuGetFaultBufferGenCnt_IMPL 2448 ( 2449 OBJGPU *pGpu, 2450 KernelGmmu *pKernelGmmu, 2451 NvU8 gfid 2452 ) 2453 { 2454 return pKernelGmmu->mmuFaultBuffer[gfid].faultBufferGenerationCounter; 2455 } 2456 2457 void * 2458 kgmmuGetShadowFaultBufferCslContext_IMPL 2459 ( 2460 OBJGPU *pGpu, 2461 KernelGmmu *pKernelGmmu, 2462 FAULT_BUFFER_TYPE type 2463 ) 2464 { 2465 ConfidentialCompute *pConfCompute = GPU_GET_CONF_COMPUTE(pGpu); 2466 2467 if (!gpuIsCCFeatureEnabled(pGpu)) 2468 { 2469 return NULL; 2470 } 2471 2472 NV_ASSERT_OR_RETURN( 2473 pConfCompute->getProperty(pConfCompute, PDB_PROP_CONFCOMPUTE_ENCRYPT_ENABLED), 2474 NULL); 2475 2476 switch (type) 2477 { 2478 case NON_REPLAYABLE_FAULT_BUFFER: 2479 return pConfCompute->pNonReplayableFaultCcslCtx; 2480 case REPLAYABLE_FAULT_BUFFER: 2481 return pConfCompute->pReplayableFaultCcslCtx; 2482 default: 2483 break; 2484 } 2485 2486 return NULL; 2487 } 2488