1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "gpu/gpu.h" 25 #include "gpu/mem_mgr/mem_mgr.h" 26 #include "mem_mgr/gpu_vaspace.h" 27 #include "gpu/mmu/kern_gmmu.h" 28 #include "kernel/gpu/nvlink/kernel_nvlink.h" 29 #include "gpu/mem_mgr/mem_desc.h" 30 #include "nvRmReg.h" // NV_REG_STR_RM_* 31 32 #include "mmu/gmmu_fmt.h" 33 #include "mmu/mmu_fmt.h" 34 35 /*! 36 * @file 37 * @brief struct MMU_WALK_CALLBACKS g_gmmuWalkCallbacks and the callback 38 * function implementations. 39 */ 40 41 /** 42 * See @ref MMU_WALK_FILL_STATE 43 */ 44 #if NV_PRINTF_STRINGS_ALLOWED 45 const char *g_gmmuFillStateStrings[] = { "INVALID", "SPARSE", "NV4K" }; 46 const char *g_gmmuUVMMirroringDirStrings[] = { "[User Root] ", "[Mirrored Root] " }; 47 #else // NV_PRINTF_STRINGS_ALLOWED 48 static const char _gmmuFillStateString[] = "XS4"; 49 static const char _gmmuUVMMirroringDirString[] = "UM"; 50 #endif // NV_PRINTF_STRINGS_ALLOWED 51 52 static PMEMORY_DESCRIPTOR 53 _gmmuMemDescCacheCreate(MMU_WALK_USER_CTX *pUserCtx, 54 MEMORY_DESCRIPTOR *pMemDesc, 55 NvU32 memSize); 56 57 static PMEMORY_DESCRIPTOR 58 _gmmuMemDescCacheAlloc(MMU_WALK_USER_CTX *pUserCtx); 59 60 /*! 61 * Utility function to decide if a level should be mirrored. 62 * Used by MMU callbacks. 63 */ 64 static NvBool NV_FORCEINLINE 65 _mirrorLevel 66 ( 67 MMU_WALK_USER_CTX *pUserCtx, 68 const MMU_FMT_LEVEL *pLevelFmt 69 ) 70 { 71 return (pLevelFmt == pUserCtx->pGpuState->pFmt->pRoot) && pUserCtx->pGVAS->bIsMirrored; 72 } 73 74 /*! 75 * Utility function to get the number of Page Dirs to loop over. 76 * Used by MMU callbacks. 77 */ 78 static NvU8 NV_FORCEINLINE 79 _getMaxPageDirs(NvBool bMirror) 80 { 81 return bMirror ? GMMU_MAX_PAGE_DIR_INDEX_COUNT : 82 GMMU_MAX_PAGE_DIR_INDEX_COUNT - 1; 83 } 84 85 static NV_STATUS 86 _gmmuScrubMemDesc 87 ( 88 OBJGPU *pGpu, 89 MEMORY_DESCRIPTOR *pMemDesc 90 ) 91 { 92 TRANSFER_SURFACE dest = {0}; 93 94 dest.pMemDesc = pMemDesc; 95 dest.offset = 0; 96 97 NV_ASSERT_OK_OR_RETURN(memmgrMemSet(GPU_GET_MEMORY_MANAGER(pGpu), &dest, 0, 98 (NvU32)memdescGetSize(pMemDesc), 99 TRANSFER_FLAGS_NONE)); 100 101 return NV_OK; 102 } 103 104 static NV_STATUS 105 _gmmuWalkCBLevelAlloc 106 ( 107 MMU_WALK_USER_CTX *pUserCtx, 108 const MMU_FMT_LEVEL *pLevelFmt, 109 const NvU64 vaBase, 110 const NvU64 vaLimit, 111 const NvBool bTarget, 112 MMU_WALK_MEMDESC **ppMemDesc, 113 NvU32 *pMemSize, 114 NvBool *pBChanged 115 ) 116 { 117 OBJGPU *pGpu = pUserCtx->pGpu; 118 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 119 OBJGVASPACE *pGVAS = pUserCtx->pGVAS; 120 const GVAS_BLOCK *pBlock = pUserCtx->pBlock; 121 const GMMU_FMT *pFmt = pUserCtx->pGpuState->pFmt; 122 MEMORY_DESCRIPTOR *pMemDesc[GMMU_MAX_PAGE_DIR_INDEX_COUNT] = {NULL}; 123 const NvU32 minMemSize = (mmuFmtVirtAddrToEntryIndex(pLevelFmt, vaLimit) + 1) * 124 pLevelFmt->entrySize; 125 NvU32 newMemSize; 126 NV_STATUS status = NV_OK; 127 NvU32 alignment; 128 NvU32 aperture; 129 NvU32 attr; 130 NvU64 memDescFlags = MEMDESC_FLAGS_NONE; 131 NvU32 memPoolListCount = 0; 132 NvU32 memPoolList[4]; 133 NvBool bAllowSysmem; 134 NvBool bPacked = NV_FALSE; 135 NvBool bPartialTbl = NV_FALSE; 136 NvBool bPmaManaged = !!(pGVAS->flags & VASPACE_FLAGS_PTETABLE_PMA_MANAGED); 137 NvBool bMirror = _mirrorLevel(pUserCtx, pLevelFmt); 138 NvU8 maxPgDirs = _getMaxPageDirs(bMirror); 139 NvU8 i = 0, j = 0; 140 141 // Abort early if level is not targeted or already sufficiently sized. 142 if (((NULL == *ppMemDesc) && !bTarget) || 143 ((NULL != *ppMemDesc) && (minMemSize <= *pMemSize))) 144 { 145 return NV_OK; 146 } 147 148 // Check if this level is the root page directory. 149 if (pLevelFmt == pFmt->pRoot) 150 { 151 newMemSize = kgmmuGetPDBAllocSize_HAL(pKernelGmmu, pLevelFmt, pGVAS->vaLimitInternal); 152 153 // TODO: PDB alignemnt. 154 alignment = RM_PAGE_SIZE; 155 156 // Determine level aperture and memory attributes. 157 if (pGVAS->flags & VASPACE_FLAGS_BAR) 158 { 159 aperture = kgmmuGetPDEBAR1Aperture(pKernelGmmu); 160 attr = kgmmuGetPDEBAR1Attr(pKernelGmmu); 161 bAllowSysmem = !FLD_TEST_DRF(_REG_STR_RM, _INST_LOC, _BAR_PDE, _VID, 162 pGpu->instLocOverrides); 163 } 164 else 165 { 166 aperture = kgmmuGetPDEAperture(pKernelGmmu); 167 attr = kgmmuGetPDEAttr(pKernelGmmu); 168 bAllowSysmem = !FLD_TEST_DRF(_REG_STR_RM, _INST_LOC, _PDE, _VID, 169 pGpu->instLocOverrides); 170 } 171 172 // Default aperture. 173 memPoolList[memPoolListCount++] = aperture; 174 175 // Fallback to sysmem if allowed. 176 if (bAllowSysmem && 177 (aperture != ADDR_SYSMEM) && !(pGVAS->flags & VASPACE_FLAGS_BAR)) 178 { 179 memPoolList[memPoolListCount++] = ADDR_SYSMEM; 180 } 181 } 182 else 183 { 184 const MMU_FMT_LEVEL *pParent; 185 const GMMU_FMT_PDE_MULTI *pPdeMulti = pFmt->pPdeMulti; 186 const GMMU_FMT_PDE *pPde; 187 NvU32 subLevel; 188 189 // Find the level's parent format. 190 pParent = mmuFmtFindLevelParent(pFmt->pRoot, pLevelFmt, &subLevel); 191 NV_ASSERT_OR_RETURN(NULL != pParent, NV_ERR_INVALID_ARGUMENT); 192 193 // Get the alignment from the parent PDE address shift. 194 pPde = gmmuFmtGetPde(pFmt, pParent, subLevel); 195 196 if (pPde->version == GMMU_FMT_VERSION_3) 197 { 198 alignment = NVBIT(pPde->fldAddr.shift); 199 } 200 else 201 { 202 alignment = NVBIT(pPde->fldAddrSysmem.shift); 203 } 204 205 // Initially assume full size. 206 newMemSize = mmuFmtLevelSize(pLevelFmt); 207 208 // Shrink size if partial page tables are supported. 209 if ((pGVAS->flags & VASPACE_FLAGS_MINIMIZE_PTETABLE_SIZE) && 210 (pParent->numSubLevels > 1) && 211 nvFieldIsValid32(&pPdeMulti->fldSizeRecipExp)) 212 { 213 NvU32 i; 214 // 215 // Only a fixed set of PDE ranges are allowed to have partial size. 216 // Partial VA holes of these PDEs are blocked at VAS creation time. 217 // See @ref gvaspaceConstructHal_IMPL for details. 218 // 219 for (i = 0; i < pGVAS->numPartialPtRanges; ++i) 220 { 221 if ((vaBase >= pGVAS->partialPtVaRangeBase[i]) && 222 (vaBase <= (pGVAS->partialPtVaRangeBase[i] + 223 pGVAS->partialPtVaRangeSize - 1))) 224 { 225 const NvU32 recipExpMax = pPdeMulti->fldSizeRecipExp.maskPos >> 226 pPdeMulti->fldSizeRecipExp.shift; 227 const NvU32 fracMemSize = nvNextPow2_U32(minMemSize); 228 const NvU32 recipExpTgt = BIT_IDX_32(newMemSize / fracMemSize); 229 const NvU32 recipExp = NV_MIN(recipExpMax, recipExpTgt); 230 newMemSize >>= recipExp; 231 bPartialTbl = NV_TRUE; 232 break; 233 } 234 } 235 } 236 237 // New size must satisfy the minimum size. 238 NV_ASSERT(newMemSize >= minMemSize); 239 // New size must be larger than old size, otherwise should have aborted earlier. 240 NV_ASSERT(newMemSize > *pMemSize); 241 242 // Determine level aperture and memory attributes. 243 if (pGVAS->flags & VASPACE_FLAGS_BAR) 244 { 245 aperture = kgmmuGetPTEBAR1Aperture(pKernelGmmu); 246 attr = kgmmuGetPTEBAR1Attr(pKernelGmmu); 247 bAllowSysmem = !FLD_TEST_DRF(_REG_STR_RM, _INST_LOC, _BAR_PTE, _VID, 248 pGpu->instLocOverrides); 249 } 250 else 251 { 252 aperture = kgmmuGetPTEAperture(pKernelGmmu); 253 attr = kgmmuGetPTEAttr(pKernelGmmu); 254 bAllowSysmem = !FLD_TEST_DRF(_REG_STR_RM, _INST_LOC, _PTE, _VID, 255 pGpu->instLocOverrides); 256 } 257 258 // 259 // BAR PDEs/PTEs are not allowed in sysmem since it can cause deadlock 260 // during PCIE transactions. 261 // PMU PDEs/PTEs must be in vidmem so that PMU can access virtually mapped 262 // memory during GC6 exit. 263 // 264 bAllowSysmem = bAllowSysmem && 265 !(pGVAS->flags & VASPACE_FLAGS_BAR) && 266 !(pGVAS->flags & VASPACE_FLAGS_PMU); 267 268 // Prefer sysmem if requested and allowed. 269 if (bAllowSysmem && 270 (NULL != pBlock && pBlock->flags.bPreferSysmemPageTables)) 271 { 272 memPoolList[memPoolListCount++] = ADDR_SYSMEM; 273 } 274 275 // Default aperture. 276 memPoolList[memPoolListCount++] = aperture; 277 278 // Fallback to sysmem if requested and allowed. 279 if (bAllowSysmem && 280 (pGVAS->flags & VASPACE_FLAGS_RETRY_PTE_ALLOC_IN_SYS)) 281 { 282 memPoolList[memPoolListCount++] = ADDR_SYSMEM; 283 } 284 } 285 286 // Add memList end entry. 287 memPoolList[memPoolListCount++] = ADDR_UNKNOWN; 288 NV_ASSERT(memPoolListCount <= NV_ARRAY_ELEMENTS(memPoolList)); 289 290 // MEMDESC flags 291 memDescFlags = MEMDESC_FLAGS_LOCKLESS_SYSMEM_ALLOC | 292 MEMDESC_FLAGS_PAGE_SIZE_ALIGN_IGNORE; 293 294 if (pGVAS->flags & VASPACE_FLAGS_ALLOW_PAGES_IN_PHYS_MEM_SUBALLOCATOR) 295 { 296 memDescFlags |= MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE; 297 } 298 299 // Create the level memdesc. 300 for (i = 0; i < maxPgDirs; i++) 301 { 302 MEMORY_DESCRIPTOR *pMemDescTemp; 303 304 status = memdescCreate(&pMemDescTemp, pGpu, 305 (((newMemSize < RM_PAGE_SIZE) && !bPartialTbl && !bPmaManaged) ? 306 RM_PAGE_SIZE : newMemSize), 307 alignment, 308 NV_TRUE, 309 ADDR_UNKNOWN, 310 attr, 311 memDescFlags); 312 NV_ASSERT_OR_GOTO(NV_OK == status, done); 313 314 // Page levels always use 4KB swizzle. 315 memdescSetPageSize(pMemDescTemp, AT_GPU, RM_PAGE_SIZE); 316 317 // 318 // Allocate the page level memory from reserved pool if aperture is vidmem 319 // and PMA is enabled. Otherwise, allocate the same way on both vidmem and 320 // sysmem. 321 // 322 while (memPoolList[j] != ADDR_UNKNOWN) 323 { 324 memdescSetAddressSpace(pMemDescTemp, memPoolList[j]); 325 switch (memPoolList[j]) 326 { 327 case ADDR_FBMEM: 328 if (RMCFG_FEATURE_PMA && 329 (pGVAS->flags & VASPACE_FLAGS_PTETABLE_PMA_MANAGED) && 330 (pGVAS->pPageTableMemPool != NULL)) 331 { 332 pMemDescTemp->ActualSize = RM_ALIGN_UP(newMemSize, alignment); 333 status = rmMemPoolAllocate(pGVAS->pPageTableMemPool, 334 (RM_POOL_ALLOC_MEMDESC*)pMemDescTemp); 335 break; 336 } 337 case ADDR_SYSMEM: 338 status = memdescAlloc(pMemDescTemp); 339 break; 340 default: 341 NV_ASSERT_OR_GOTO(0, done); 342 } 343 if (NV_OK == status) 344 { 345 // 346 // Always scrub the allocation for the PDB allocation in case 347 // GMMU prefetches some uninitialized entries 348 // 349 if (pLevelFmt == pFmt->pRoot) 350 { 351 status = _gmmuScrubMemDesc(pGpu, pMemDescTemp); 352 } 353 354 memdescSetName(pGpu, pMemDescTemp, NV_RM_SURF_NAME_PAGE_TABLE, mmuFmtConvertLevelIdToSuffix(pLevelFmt)); 355 break; 356 } 357 j++; 358 } 359 360 if (NV_OK != status) 361 { 362 memdescDestroy(pMemDescTemp); 363 goto done; 364 } 365 366 // 367 // The packing optimization is only needed for allocations in vidmem since 368 // the 4K granularity is not applicable to allocations in sysmem. 369 // 370 bPacked = ((memdescGetAddressSpace(pMemDescTemp) == ADDR_FBMEM) && 371 (alignment < RM_PAGE_SIZE) && !bPmaManaged); 372 373 if (bPacked) 374 { 375 // Try to allocate from the free list of packed memdescs 376 pMemDesc[i] = _gmmuMemDescCacheAlloc(pUserCtx); 377 if (NULL != pMemDesc[i]) 378 { 379 // Free this if we have already allocated from the list. 380 memdescFree(pMemDescTemp); 381 memdescDestroy(pMemDescTemp); 382 } 383 else 384 { 385 // Add another page to the cache and then alloc. 386 pMemDesc[i] = _gmmuMemDescCacheCreate(pUserCtx, 387 pMemDescTemp, 388 newMemSize); 389 if (NULL == pMemDesc[i]) 390 { 391 memdescFree(pMemDescTemp); 392 memdescDestroy(pMemDescTemp); 393 goto done; 394 } 395 } 396 } 397 else 398 { 399 pMemDesc[i] = pMemDescTemp; 400 } 401 402 #if NV_PRINTF_STRINGS_ALLOWED 403 NV_PRINTF(LEVEL_INFO, 404 "[GPU%u]: [%s] %sPA 0x%llX (0x%X bytes) for VA 0x%llX-0x%llX\n", 405 pUserCtx->pGpu->gpuInstance, 406 bPacked ? "Packed" : "Unpacked", 407 bMirror ? g_gmmuUVMMirroringDirStrings[i] : "", 408 memdescGetPhysAddr(pMemDesc[i], AT_GPU, 0), newMemSize, 409 mmuFmtLevelVirtAddrLo(pLevelFmt, vaBase), 410 mmuFmtLevelVirtAddrHi(pLevelFmt, vaLimit)); 411 #else // NV_PRINTF_STRINGS_ALLOWED 412 NV_PRINTF(LEVEL_INFO, 413 "[GPU%u]: [Packed: %c] %sPA 0x%llX (0x%X bytes) for VA 0x%llX-0x%llX\n", 414 pUserCtx->pGpu->gpuInstance, 415 bPacked ? 'Y' : 'N', 416 bMirror ? _gmmuUVMMirroringDirString[i] : ' ', 417 memdescGetPhysAddr(pMemDesc[i], AT_GPU, 0), newMemSize, 418 mmuFmtLevelVirtAddrLo(pLevelFmt, vaBase), 419 mmuFmtLevelVirtAddrHi(pLevelFmt, vaLimit)); 420 #endif // NV_PRINTF_STRINGS_ALLOWED 421 } 422 423 // Commit return values. 424 *ppMemDesc = (MMU_WALK_MEMDESC*)pMemDesc[GMMU_USER_PAGE_DIR_INDEX]; 425 *pMemSize = newMemSize; 426 *pBChanged = NV_TRUE; 427 428 done: 429 if (NV_OK == status) 430 { 431 // Commit mirrored root desc. 432 if (bMirror) 433 { 434 pUserCtx->pGpuState->pMirroredRoot = 435 (MMU_WALK_MEMDESC*)pMemDesc[GMMU_KERNEL_PAGE_DIR_INDEX]; 436 } 437 } 438 else 439 { 440 for (i = 0; i < maxPgDirs; i++) 441 { 442 memdescFree(pMemDesc[i]); 443 memdescDestroy(pMemDesc[i]); 444 } 445 } 446 return status; 447 } 448 449 static PMEMORY_DESCRIPTOR 450 _gmmuMemDescCacheCreate 451 ( 452 MMU_WALK_USER_CTX *pUserCtx, 453 MEMORY_DESCRIPTOR *pMemDesc, 454 NvU32 memSize 455 ) 456 { 457 NV_STATUS status = NV_OK; 458 MEMORY_DESCRIPTOR* pMemDescTmp; 459 NvU32 i; 460 461 NV_ASSERT_OR_RETURN((NULL != pMemDesc), NULL); 462 NV_ASSERT_OR_RETURN((memSize <= pMemDesc->ActualSize), NULL); 463 464 if (pMemDesc->pSubMemDescList == NULL) 465 { 466 pMemDesc->pSubMemDescList = portMemAllocNonPaged(sizeof(MEMORY_DESCRIPTOR_LIST)); 467 NV_ASSERT_OR_RETURN(pMemDesc->pSubMemDescList != NULL, NULL); 468 } 469 470 // Initialize the list head of the unpacked memdesc 471 listInitIntrusive(pMemDesc->pSubMemDescList); 472 473 // Form the list of submemdescs with the parent memdesc as the head 474 for (i = 0; i < (pMemDesc->ActualSize / memSize); i++) 475 { 476 MEMORY_DESCRIPTOR *pSubMemDesc = NULL; 477 status = memdescCreateSubMem(&pSubMemDesc, 478 pMemDesc, 479 pUserCtx->pGpu, 480 i * memSize, 481 memSize); 482 NV_ASSERT_OR_RETURN((NV_OK == status), NULL); 483 listAppendExisting(pMemDesc->pSubMemDescList, pSubMemDesc); 484 } 485 486 // Add the parent memdesc to the per VAS/per GPU list of unpacked memdescs 487 listAppendExisting(&pUserCtx->pGpuState->unpackedMemDescList, pMemDesc); 488 489 // Pop the free list of packed memdescs and return one 490 pMemDescTmp = listTail(pMemDesc->pSubMemDescList); 491 listRemove(pMemDesc->pSubMemDescList, pMemDescTmp); 492 return pMemDescTmp; 493 } 494 495 static PMEMORY_DESCRIPTOR 496 _gmmuMemDescCacheAlloc 497 ( 498 MMU_WALK_USER_CTX *pUserCtx 499 ) 500 { 501 MEMORY_DESCRIPTOR *pParentMemDesc; 502 MEMORY_DESCRIPTOR *pParentMemDescNext; 503 504 for (pParentMemDesc = listHead(&pUserCtx->pGpuState->unpackedMemDescList); 505 pParentMemDesc != NULL; 506 pParentMemDesc = pParentMemDescNext) 507 { 508 pParentMemDescNext = listNext(&pUserCtx->pGpuState->unpackedMemDescList, pParentMemDesc); 509 MEMORY_DESCRIPTOR *pChild; 510 pChild = listTail(pParentMemDesc->pSubMemDescList); 511 listRemove(pParentMemDesc->pSubMemDescList, pChild); 512 if (NULL != pChild) 513 { 514 return pChild; 515 } 516 } 517 return NULL; 518 } 519 520 void 521 gmmuMemDescCacheFree 522 ( 523 GVAS_GPU_STATE *pGpuState 524 ) 525 { 526 NV_ASSERT_OR_RETURN_VOID(NULL != pGpuState); 527 528 while (listCount(&pGpuState->unpackedMemDescList) > 0) 529 { 530 MEMORY_DESCRIPTOR *pTmp; 531 MEMORY_DESCRIPTOR *pParentMemDesc; 532 pParentMemDesc = listTail(&pGpuState->unpackedMemDescList); 533 534 // Assert if all submemdescs have not been returned to the parent. 535 NV_ASSERT(pParentMemDesc->RefCount - listCount(pParentMemDesc->pSubMemDescList) == 1); 536 537 while(listCount(pParentMemDesc->pSubMemDescList) > 0) 538 { 539 pTmp = listTail(pParentMemDesc->pSubMemDescList); 540 listRemove(pParentMemDesc->pSubMemDescList, pTmp); 541 memdescDestroy(pTmp); 542 } 543 listRemove(&pGpuState->unpackedMemDescList, pParentMemDesc); 544 memdescFree(pParentMemDesc); 545 memdescDestroy(pParentMemDesc); 546 } 547 } 548 549 static void 550 _gmmuWalkCBLevelFree 551 ( 552 MMU_WALK_USER_CTX *pUserCtx, 553 const MMU_FMT_LEVEL *pLevelFmt, 554 const NvU64 vaBase, 555 MMU_WALK_MEMDESC *pOldMem 556 ) 557 { 558 NvU8 i; 559 NvBool bMirror = _mirrorLevel(pUserCtx, pLevelFmt); 560 NvU8 maxPgDirs = _getMaxPageDirs(bMirror); 561 MEMORY_DESCRIPTOR *pMemDesc[GMMU_MAX_PAGE_DIR_INDEX_COUNT] = {NULL}; 562 563 pMemDesc[GMMU_USER_PAGE_DIR_INDEX] = (MEMORY_DESCRIPTOR*)pOldMem; 564 if (bMirror) 565 { 566 pMemDesc[GMMU_KERNEL_PAGE_DIR_INDEX] = 567 (MEMORY_DESCRIPTOR*)pUserCtx->pGpuState->pMirroredRoot; 568 pUserCtx->pGpuState->pMirroredRoot = NULL; 569 } 570 571 for (i = 0; i < maxPgDirs; i++) 572 { 573 if (NULL == pMemDesc[i]) 574 { 575 continue; 576 } 577 578 #if NV_PRINTF_STRINGS_ALLOWED 579 NV_PRINTF(LEVEL_INFO, 580 "[GPU%u]: %sPA 0x%llX for VA 0x%llX-0x%llX\n", 581 pUserCtx->pGpu->gpuInstance, 582 bMirror ? g_gmmuUVMMirroringDirStrings[i] : "", 583 memdescGetPhysAddr(pMemDesc[i], AT_GPU, 0), 584 mmuFmtLevelVirtAddrLo(pLevelFmt, vaBase), 585 mmuFmtLevelVirtAddrHi(pLevelFmt, vaBase)); 586 #else // NV_PRINTF_STRINGS_ALLOWED 587 NV_PRINTF(LEVEL_INFO, 588 "[GPU%u]: %cPA 0x%llX for VA 0x%llX-0x%llX\n", 589 pUserCtx->pGpu->gpuInstance, 590 bMirror ? _gmmuUVMMirroringDirString[i] : ' ', 591 memdescGetPhysAddr(pMemDesc[i], AT_GPU, 0), 592 mmuFmtLevelVirtAddrLo(pLevelFmt, vaBase), 593 mmuFmtLevelVirtAddrHi(pLevelFmt, vaBase)); 594 #endif // NV_PRINTF_STRINGS_ALLOWED 595 596 // 597 // If this is a submemdesc, return it to its free list only when 598 // the refcount is 1. A refcount greater than 1 implies that 2 or 599 // more GPUs in SLI are using it. GPUs in SLI can share a page level 600 // instance. 601 // 602 if (memdescIsSubMemoryMemDesc(pMemDesc[i]) && 603 (pMemDesc[i]->RefCount == 1)) 604 { 605 // Return this to the free list from which it was borrowed 606 listAppendExisting(memdescGetParentDescriptor(pMemDesc[i])->pSubMemDescList, pMemDesc[i]); 607 } 608 else 609 { 610 if (RMCFG_FEATURE_PMA && 611 (pUserCtx->pGVAS->flags & VASPACE_FLAGS_PTETABLE_PMA_MANAGED) && 612 (pMemDesc[i]->pPageHandleList != NULL) && 613 (listCount(pMemDesc[i]->pPageHandleList) != 0) && 614 (pUserCtx->pGVAS->pPageTableMemPool != NULL)) 615 { 616 rmMemPoolFree(pUserCtx->pGVAS->pPageTableMemPool, 617 (RM_POOL_ALLOC_MEMDESC*)pMemDesc[i], 618 pUserCtx->pGVAS->flags); 619 } 620 621 if (!memdescIsSubMemoryMemDesc(pMemDesc[i])) 622 { 623 memdescFree(pMemDesc[i]); 624 } 625 memdescDestroy(pMemDesc[i]); 626 } 627 } 628 } 629 630 static NvBool 631 _gmmuWalkCBUpdatePdb 632 ( 633 MMU_WALK_USER_CTX *pUserCtx, 634 const MMU_FMT_LEVEL *pRootFmt, 635 const MMU_WALK_MEMDESC *pRootMem, 636 const NvBool bIgnoreChannelBusy 637 ) 638 { 639 OBJGPU *pGpu = pUserCtx->pGpu; 640 MEMORY_DESCRIPTOR *pPDB = (MEMORY_DESCRIPTOR*)pRootMem; 641 642 NV_PRINTF(LEVEL_INFO, "[GPU%u]: PA 0x%llX (%s)\n", 643 pUserCtx->pGpu->gpuInstance, 644 (NULL != pPDB) ? memdescGetPhysAddr(pPDB, AT_GPU, 0) : 0, 645 (NULL != pPDB) ? "valid" : "null"); 646 647 if (pUserCtx->pGVAS->flags & VASPACE_FLAGS_BAR_BAR1) 648 { 649 // 650 // Do nothing, as BAR1 pdb is static and is only created and 651 // destroyed along with the vaspace itself. Since the bar1 652 // instance memory is appropriately updated then, we do not 653 // do anything inside update pdb for bar1 which will be invoked 654 // for mmuwalksparsify and mmuwalkunmap. 655 // 656 return NV_TRUE; 657 } 658 else if ((pUserCtx->pGVAS->flags & VASPACE_FLAGS_HDA)) 659 { 660 // Instance Block set up once by caller. 661 return NV_TRUE; 662 } 663 else if (IS_VIRTUAL_WITH_SRIOV(pGpu) || IS_GSP_CLIENT(pGpu)) 664 { 665 // Noop inside a guest or CPU RM. 666 return NV_TRUE; 667 } 668 return NV_TRUE; 669 } 670 671 static NvBool 672 _gmmuWalkCBUpdatePde 673 ( 674 MMU_WALK_USER_CTX *pUserCtx, 675 const MMU_FMT_LEVEL *pLevelFmt, 676 const MMU_WALK_MEMDESC *pLevelMem, 677 const NvU32 entryIndex, 678 const MMU_WALK_MEMDESC **pSubLevels 679 ) 680 { 681 NvU32 i; 682 GMMU_ENTRY_VALUE entry; 683 NvBool bMirror = _mirrorLevel(pUserCtx, pLevelFmt); 684 NvU8 maxPgDirs = _getMaxPageDirs(bMirror); 685 OBJGPU *pGpu = pUserCtx->pGpu; 686 OBJGVASPACE *pGVAS = pUserCtx->pGVAS; 687 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 688 const GMMU_FMT *pFmt = pUserCtx->pGpuState->pFmt; 689 MEMORY_DESCRIPTOR *pMemDesc[GMMU_MAX_PAGE_DIR_INDEX_COUNT] = {NULL}; 690 NvU32 recipExp = NV_U32_MAX; 691 const GMMU_FMT_PDE_MULTI *pPdeMulti = pFmt->pPdeMulti; 692 693 pMemDesc[GMMU_USER_PAGE_DIR_INDEX] = (MEMORY_DESCRIPTOR*)pLevelMem; 694 if (bMirror) 695 { 696 pMemDesc[GMMU_KERNEL_PAGE_DIR_INDEX] = 697 (MEMORY_DESCRIPTOR*)pUserCtx->pGpuState->pMirroredRoot; 698 } 699 700 for (i = 0; i < maxPgDirs; i++) 701 { 702 #if NV_PRINTF_STRINGS_ALLOWED 703 NV_PRINTF(LEVEL_INFO, "[GPU%u]: %sPA 0x%llX, Entry 0x%X\n", 704 pUserCtx->pGpu->gpuInstance, 705 bMirror ? g_gmmuUVMMirroringDirStrings[i] : "", 706 memdescGetPhysAddr(pMemDesc[i], AT_GPU, 0), entryIndex); 707 #else // NV_PRINTF_STRINGS_ALLOWED 708 NV_PRINTF(LEVEL_INFO, "[GPU%u]: %cPA 0x%llX, Entry 0x%X\n", 709 pUserCtx->pGpu->gpuInstance, 710 bMirror ? _gmmuUVMMirroringDirString[i] : ' ', 711 memdescGetPhysAddr(pMemDesc[i], AT_GPU, 0), entryIndex); 712 #endif // NV_PRINTF_STRINGS_ALLOWED 713 } 714 715 portMemSet(entry.v8, 0, pLevelFmt->entrySize); 716 717 for (i = 0; i < pLevelFmt->numSubLevels; ++i) 718 { 719 const GMMU_FMT_PDE *pPde = gmmuFmtGetPde(pFmt, pLevelFmt, i); 720 MEMORY_DESCRIPTOR *pSubMemDesc = (MEMORY_DESCRIPTOR*)pSubLevels[i]; 721 722 if (NULL != pSubMemDesc) 723 { 724 const GMMU_APERTURE aperture = kgmmuGetMemAperture(pKernelGmmu, pSubMemDesc); 725 const GMMU_FIELD_ADDRESS *pFldAddr = gmmuFmtPdePhysAddrFld(pPde, aperture); 726 const NvU64 physAddr = memdescGetPhysAddr(pSubMemDesc, AT_GPU, 0); 727 728 if (pFmt->version == GMMU_FMT_VERSION_3) 729 { 730 NvU32 pdePcfHw = 0; 731 NvU32 pdePcfSw = 0; 732 733 pdePcfSw |= gvaspaceIsAtsEnabled(pGVAS) ? (1 << SW_MMU_PCF_ATS_ALLOWED_IDX) : 0; 734 pdePcfSw |= memdescGetVolatility(pSubMemDesc) ? (1 << SW_MMU_PCF_UNCACHED_IDX) : 0; 735 736 NV_ASSERT_OR_RETURN((kgmmuTranslatePdePcfFromSw_HAL(pKernelGmmu, pdePcfSw, &pdePcfHw) == NV_OK), 737 NV_ERR_INVALID_ARGUMENT); 738 nvFieldSet32(&pPde->fldPdePcf, pdePcfHw, entry.v8); 739 } 740 else 741 { 742 nvFieldSetBool(&pPde->fldVolatile, memdescGetVolatility(pSubMemDesc), entry.v8); 743 } 744 745 gmmuFieldSetAperture(&pPde->fldAperture, aperture, entry.v8); 746 gmmuFieldSetAddress(pFldAddr, 747 kgmmuEncodePhysAddr(pKernelGmmu, aperture, physAddr, 748 NVLINK_INVALID_FABRIC_ADDR), 749 entry.v8); 750 751 // Calculate partial page table size if supported. 752 if ((pGVAS->flags & VASPACE_FLAGS_MINIMIZE_PTETABLE_SIZE) && 753 (pLevelFmt->numSubLevels > 1) && 754 nvFieldIsValid32(&pPdeMulti->fldSizeRecipExp)) 755 { 756 const NvU32 maxMemSize = mmuFmtLevelSize(&pLevelFmt->subLevels[i]); 757 const NvU32 curMemSize = (NvU32)pSubMemDesc->Size; 758 const NvU32 minRecipExp = BIT_IDX_32(maxMemSize / curMemSize); 759 760 // We should have allocated on a fractional (pow2) boundary. 761 NV_ASSERT(ONEBITSET(curMemSize)); 762 763 if (recipExp == NV_U32_MAX) 764 { 765 // Save exponent if not set yet. 766 recipExp = minRecipExp; 767 } 768 else 769 { 770 // Otherwise ensure parallel sub-levels match. 771 NV_ASSERT(recipExp == minRecipExp); 772 } 773 } 774 775 NV_PRINTF(LEVEL_INFO, " SubLevel %u = PA 0x%llX\n", i, 776 physAddr); 777 } 778 else 779 { 780 NV_PRINTF(LEVEL_INFO, " SubLevel %u = INVALID\n", i); 781 } 782 } 783 784 // Set partial page table size exponent if needed. 785 if (recipExp != NV_U32_MAX) 786 { 787 nvFieldSet32(&pPdeMulti->fldSizeRecipExp, recipExp, entry.v8); 788 } 789 790 for (i = 0; i < maxPgDirs; i++) 791 { 792 TRANSFER_SURFACE dest = {0}; 793 794 dest.pMemDesc = pMemDesc[i]; 795 dest.offset = entryIndex * pLevelFmt->entrySize; 796 NV_ASSERT_OK(memmgrMemWrite(GPU_GET_MEMORY_MANAGER(pGpu), &dest, 797 entry.v8, pLevelFmt->entrySize, 798 TRANSFER_FLAGS_NONE)); 799 } 800 801 return NV_TRUE; 802 } 803 804 static void 805 _gmmuWalkCBFillEntries 806 ( 807 MMU_WALK_USER_CTX *pUserCtx, 808 const MMU_FMT_LEVEL *pLevelFmt, 809 const MMU_WALK_MEMDESC *pLevelMem, 810 const NvU32 entryIndexLo, 811 const NvU32 entryIndexHi, 812 const MMU_WALK_FILL_STATE fillState, 813 NvU32 *pProgress 814 ) 815 { 816 NvU32 i; 817 NvU32 j; 818 OBJGPU *pGpu = pUserCtx->pGpu; 819 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 820 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 821 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 822 const GMMU_FMT *pFmt = pUserCtx->pGpuState->pFmt; 823 NvBool bMirror = _mirrorLevel(pUserCtx, pLevelFmt); 824 NvU8 maxPgDirs = _getMaxPageDirs(bMirror); 825 MEMORY_DESCRIPTOR *pMemDesc[GMMU_MAX_PAGE_DIR_INDEX_COUNT] = {NULL}; 826 NvU32 sizeOfEntries = (entryIndexHi - entryIndexLo + 1) * 827 pLevelFmt->entrySize; 828 NvU8 *pEntries; 829 830 pMemDesc[GMMU_USER_PAGE_DIR_INDEX] = (MEMORY_DESCRIPTOR*)pLevelMem; 831 if (bMirror) 832 { 833 pMemDesc[GMMU_KERNEL_PAGE_DIR_INDEX] = 834 (MEMORY_DESCRIPTOR*)pUserCtx->pGpuState->pMirroredRoot; 835 } 836 837 for (j = 0; j < maxPgDirs; j++) 838 { 839 TRANSFER_SURFACE dest = {0}; 840 841 dest.pMemDesc = pMemDesc[j]; 842 dest.offset = entryIndexLo * pLevelFmt->entrySize; 843 844 // 845 // A shadow buffer is allocated to store the PTEs in case of writes 846 // using CE and GSP DMA task. This code gets called in a high IRQL 847 // path on Windows and shadow buffer allocation may fail there. 848 // 849 pEntries = memmgrMemBeginTransfer(pMemoryManager, &dest, sizeOfEntries, 850 TRANSFER_FLAGS_SHADOW_ALLOC); 851 NV_ASSERT_OR_RETURN_VOID(pEntries != NULL); 852 853 #if NV_PRINTF_STRINGS_ALLOWED 854 NV_PRINTF(LEVEL_INFO, 855 "[GPU%u]: %sPA 0x%llX, Entries 0x%X-0x%X = %s\n", 856 pUserCtx->pGpu->gpuInstance, 857 bMirror ? g_gmmuUVMMirroringDirStrings[j] : "", 858 memdescGetPhysAddr(pMemDesc[j], AT_GPU, 0), 859 entryIndexLo, entryIndexHi, 860 g_gmmuFillStateStrings[fillState]); 861 #else // NV_PRINTF_STRINGS_ALLOWED 862 NV_PRINTF(LEVEL_INFO, 863 "[GPU%u] %cPA 0x%llX, Entries 0x%X-0x%X = %c\n", 864 pUserCtx->pGpu->gpuInstance, 865 bMirror ? _gmmuUVMMirroringDirString[j] : ' ', 866 memdescGetPhysAddr(pMemDesc[j], AT_GPU, 0), 867 entryIndexLo, entryIndexHi, 868 _gmmuFillStateString[fillState]); 869 #endif // NV_PRINTF_STRINGS_ALLOWED 870 871 switch (fillState) 872 { 873 case MMU_WALK_FILL_INVALID: 874 portMemSet(pEntries, 0, sizeOfEntries); 875 break; 876 case MMU_WALK_FILL_SPARSE: 877 { 878 const GMMU_FMT_FAMILY *pFam = kgmmuFmtGetFamily(pKernelGmmu, pFmt->version); 879 const GMMU_ENTRY_VALUE *pSparseEntry; 880 881 // Select sparse entry template based on number of sub-levels. 882 if (pLevelFmt->numSubLevels > 1) 883 { 884 pSparseEntry = &pFam->sparsePdeMulti; 885 } 886 else if (pLevelFmt->numSubLevels == 1) 887 { 888 pSparseEntry = &pFam->sparsePde; 889 } 890 else 891 { 892 if (kbusIsFlaDummyPageEnabled(pKernelBus) && 893 (pUserCtx->pGVAS->flags & VASPACE_FLAGS_FLA)) 894 pSparseEntry = &pUserCtx->pGpuState->flaDummyPage.pte; 895 else 896 pSparseEntry = &pFam->sparsePte; 897 } 898 899 // Copy sparse template to each entry. 900 for (i = entryIndexLo; i <= entryIndexHi; ++i) 901 { 902 NvU32 entryIndex = (i - entryIndexLo) * pLevelFmt->entrySize; 903 portMemCopy(&pEntries[entryIndex], 904 pLevelFmt->entrySize, 905 pSparseEntry->v8, 906 pLevelFmt->entrySize); 907 } 908 break; 909 } 910 case MMU_WALK_FILL_NV4K: 911 { 912 const GMMU_FMT_FAMILY *pFam = 913 kgmmuFmtGetFamily(pKernelGmmu, pFmt->version); 914 const GMMU_ENTRY_VALUE *pNv4kEntry = &pFam->nv4kPte; 915 916 // debug print - to remove when the code is robust enough 917 if (!gvaspaceIsAtsEnabled(pUserCtx->pGVAS) || 918 mmuFmtLevelPageSize(pLevelFmt) != RM_PAGE_SIZE_64K) 919 { 920 #if NV_PRINTF_STRINGS_ALLOWED 921 NV_PRINTF(LEVEL_ERROR, 922 "[GPU%u]: %sPA 0x%llX, Entries 0x%X-0x%X = %s FAIL\n", 923 pUserCtx->pGpu->gpuInstance, 924 bMirror ? g_gmmuUVMMirroringDirStrings[j] : "", 925 memdescGetPhysAddr(pMemDesc[j], AT_GPU, 0), 926 entryIndexLo, entryIndexHi, 927 g_gmmuFillStateStrings[fillState]); 928 #else // NV_PRINTF_STRINGS_ALLOWED 929 NV_PRINTF(LEVEL_ERROR, 930 "[GPU%u]: %cPA 0x%llX, Entries 0x%X-0x%X = %c FAIL\n", 931 pUserCtx->pGpu->gpuInstance, 932 bMirror ? _gmmuUVMMirroringDirString[j] : ' ', 933 memdescGetPhysAddr(pMemDesc[j], AT_GPU, 0), 934 entryIndexLo, entryIndexHi, 935 _gmmuFillStateString[fillState]); 936 #endif // NV_PRINTF_STRINGS_ALLOWED 937 938 DBG_BREAKPOINT(); 939 return; 940 } 941 942 // Copy nv4k template to each entry 943 for (i = entryIndexLo; i <= entryIndexHi; ++i) 944 { 945 NvU32 entryIndex = (i - entryIndexLo) * pLevelFmt->entrySize; 946 portMemCopy(&pEntries[entryIndex], 947 pLevelFmt->entrySize, 948 pNv4kEntry->v8, 949 pLevelFmt->entrySize); 950 } 951 break; 952 } 953 default: 954 NV_ASSERT(0); 955 break; 956 } 957 958 memmgrMemEndTransfer(pMemoryManager, &dest, sizeOfEntries, 959 TRANSFER_FLAGS_SHADOW_ALLOC); 960 } 961 962 *pProgress = entryIndexHi - entryIndexLo + 1; 963 } 964 965 static void 966 _gmmuWalkCBCopyEntries 967 ( 968 MMU_WALK_USER_CTX *pUserCtx, 969 const MMU_FMT_LEVEL *pLevelFmt, 970 const MMU_WALK_MEMDESC *pSrcMem, 971 const MMU_WALK_MEMDESC *pDstMem, 972 const NvU32 entryIndexLo, 973 const NvU32 entryIndexHi, 974 NvU32 *pProgress 975 ) 976 { 977 MEMORY_DESCRIPTOR *pSrcDesc = (MEMORY_DESCRIPTOR *)pSrcMem; 978 MEMORY_DESCRIPTOR *pDstDesc = (MEMORY_DESCRIPTOR *)pDstMem; 979 TRANSFER_SURFACE src = {0}; 980 TRANSFER_SURFACE dest = {0}; 981 982 src.pMemDesc = pSrcDesc; 983 src.offset = entryIndexLo * pLevelFmt->entrySize; 984 dest.pMemDesc = pDstDesc; 985 dest.offset = entryIndexLo * pLevelFmt->entrySize; 986 987 // Only copy if different source and destination memory. 988 if (!memdescDescIsEqual(pSrcDesc, pDstDesc)) 989 { 990 OBJGPU *pGpu = pUserCtx->pGpu; 991 NvU32 sizeOfEntries = (entryIndexHi - entryIndexLo + 1) * 992 pLevelFmt->entrySize; 993 994 NV_PRINTF(LEVEL_INFO, 995 "[GPU%u]: GVAS(%p) PA 0x%llX -> PA 0x%llX, Entries 0x%X-0x%X\n", 996 pGpu->gpuInstance, pUserCtx->pGVAS, 997 memdescGetPhysAddr(pSrcDesc, AT_GPU, 0), 998 memdescGetPhysAddr(pDstDesc, AT_GPU, 0), entryIndexLo, 999 entryIndexHi); 1000 1001 NV_ASSERT_OK(memmgrMemCopy(GPU_GET_MEMORY_MANAGER(pGpu), &dest, &src, 1002 sizeOfEntries, TRANSFER_FLAGS_NONE)); 1003 } 1004 1005 // Report full range complete. 1006 *pProgress = entryIndexHi - entryIndexLo + 1; 1007 } 1008 1009 const MMU_WALK_CALLBACKS g_gmmuWalkCallbacks = 1010 { 1011 _gmmuWalkCBLevelAlloc, 1012 _gmmuWalkCBLevelFree, 1013 _gmmuWalkCBUpdatePdb, 1014 _gmmuWalkCBUpdatePde, 1015 _gmmuWalkCBFillEntries, 1016 _gmmuWalkCBCopyEntries, 1017 NULL, 1018 }; 1019