1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "gpu/mem_mgr/mem_mgr.h" 25 #include "gpu/mem_mgr/heap.h" 26 #include "gpu/mem_sys/kern_mem_sys.h" 27 #include "gpu/mem_mgr/mem_utils.h" 28 #include "mem_mgr/video_mem.h" 29 #include "gpu/mem_mgr/fbsr.h" 30 #include "gpu/mmu/kern_gmmu.h" 31 #include "gpu/bus/kern_bus.h" 32 #include "core/locks.h" 33 #include "virtualization/kernel_vgpu_mgr.h" 34 #include "vgpu/rpc.h" 35 #include "core/thread_state.h" 36 #include "nvRmReg.h" 37 #include "gpu/fsp/kern_fsp.h" 38 #include "gpu/mem_mgr/phys_mem_allocator/numa.h" 39 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h" 40 #include "kernel/rmapi/rs_utils.h" 41 #include "rmapi/rmapi_utils.h" 42 #include "mmu/gmmu_fmt.h" 43 #include "class/cl0040.h" // NV01_MEMORY_LOCAL_USER 44 #include "class/cl503c.h" 45 #include "class/cl906f.h" // GF100_CHANNEL_GPFIFO 46 #include "os/os.h" 47 48 static NV_STATUS _memmgrCreateFBSR(MemoryManager *pMemoryManager, NvU32); 49 static NV_STATUS _memmgrCreateChildObjects(MemoryManager *pMemoryManager); 50 static void _memmgrInitRegistryOverrides(OBJGPU *pGpu, MemoryManager *pMemoryManager); 51 static NV_STATUS _memmgrInitMIGMemoryPartitionHeap(OBJGPU *pGpu, MemoryManager *pMemoryManager, 52 NvU32 swizzId, NV_RANGE *pAddrRange, 53 Heap **ppMemoryPartitionHeap); 54 static NV_STATUS _memmgrAllocInternalClientObjects(OBJGPU *pGpu, 55 MemoryManager *pMemoryManager); 56 static void _memmgrFreeInternalClientObjects(MemoryManager *pMemoryManager); 57 58 #define MEMUTILS_CHANNEL_GPFIFO_SIZE (NV906F_GP_ENTRY__SIZE * MEMUTILS_NUM_GPFIFIO_ENTRIES) 59 60 NV_STATUS 61 memmgrConstructEngine_IMPL 62 ( 63 OBJGPU *pGpu, 64 MemoryManager *pMemoryManager, 65 ENGDESCRIPTOR engDesc 66 ) 67 { 68 NV_STATUS rmStatus; 69 70 pMemoryManager->overrideInitHeapMin = 0; 71 pMemoryManager->overrideHeapMax = ~0ULL; 72 73 // Create the children 74 rmStatus = _memmgrCreateChildObjects(pMemoryManager); 75 if (rmStatus != NV_OK) 76 return rmStatus; 77 78 pMemoryManager->MIGMemoryPartitioningInfo.hClient = NV01_NULL_OBJECT; 79 pMemoryManager->MIGMemoryPartitioningInfo.hDevice = NV01_NULL_OBJECT; 80 pMemoryManager->MIGMemoryPartitioningInfo.hSubdevice = NV01_NULL_OBJECT; 81 pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange = NV_RANGE_EMPTY; 82 83 return NV_OK; 84 } 85 86 void 87 memmgrDestruct_IMPL 88 ( 89 MemoryManager *pMemoryManager 90 ) 91 { 92 NvU32 i; 93 94 for (i = 0; i < NUM_FBSR_TYPES; i++) 95 { 96 objDelete(pMemoryManager->pFbsr[i]); 97 pMemoryManager->pFbsr[i] = NULL; 98 } 99 100 objDelete(pMemoryManager->pHeap); 101 pMemoryManager->pHeap = NULL; 102 103 pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange = NV_RANGE_EMPTY; 104 } 105 106 static void 107 _memmgrInitRegistryOverrides(OBJGPU *pGpu, MemoryManager *pMemoryManager) 108 { 109 NvU32 data32; 110 111 // Check for ram size override. 112 pMemoryManager->Ram.fbOverrideSizeMb = (NvU64)~0; 113 if ((osReadRegistryDword(pGpu, NV_REG_STR_OVERRIDE_FB_SIZE, &data32) == NV_OK) && 114 (data32 != 0)) 115 { 116 NV_PRINTF(LEVEL_WARNING, "Regkey %s = %dM\n", 117 NV_REG_STR_OVERRIDE_FB_SIZE, data32); 118 // Used to override heap sizing at create 119 pMemoryManager->Ram.fbOverrideSizeMb = data32; 120 } 121 122 // 123 // Scrub on Free is enabled by default for GK110+ 124 // The reg key will be used to disable the scrub on free 125 // 126 if ((osReadRegistryDword(pGpu, NV_REG_STR_RM_DISABLE_SCRUB_ON_FREE, 127 &data32) == NV_OK) && data32) 128 { 129 pMemoryManager->bScrubOnFreeEnabled = NV_FALSE; 130 } 131 132 if ((osReadRegistryDword(pGpu, NV_REG_STR_RM_DISABLE_FAST_SCRUBBER, 133 &data32) == NV_OK) && data32) 134 { 135 pMemoryManager->bFastScrubberEnabled = NV_FALSE; 136 } 137 138 if (NV_OK == osReadRegistryDword(pGpu, NV_REG_STR_RM_SYSMEM_PAGE_SIZE, &data32)) 139 { 140 switch (data32) 141 { 142 case RM_PAGE_SIZE: 143 case RM_PAGE_SIZE_64K: 144 case RM_PAGE_SIZE_HUGE: 145 case RM_PAGE_SIZE_512M: 146 break; 147 default: 148 NV_ASSERT(0); 149 NV_PRINTF(LEVEL_ERROR, 150 "Sysmem page size 0x%x not supported! Defaulting to 4KB\n", 151 data32); 152 data32 = RM_PAGE_SIZE; 153 } 154 pMemoryManager->sysmemPageSize = data32; 155 } 156 else 157 { 158 pMemoryManager->sysmemPageSize = RM_PAGE_SIZE; 159 160 } 161 162 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_ALLOW_SYSMEM_LARGE_PAGES, &data32) == NV_OK) 163 { 164 pMemoryManager->bAllowSysmemHugePages = data32 ? NV_TRUE : NV_FALSE; 165 } 166 else 167 { 168 pMemoryManager->bAllowSysmemHugePages = NV_FALSE; 169 } 170 171 // This key should not be used on physical (GSP) RM. 172 if (!RMCFG_FEATURE_PLATFORM_GSP) 173 { 174 // Allow user to increase size of RM reserved heap via a regkey 175 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_INCREASE_RSVD_MEMORY_SIZE_MB, 176 &data32) == NV_OK) 177 { 178 pMemoryManager->rsvdMemorySizeIncrement = (NvU64)data32 << 20; 179 NV_PRINTF(LEVEL_ERROR, 180 "User specified increase in reserved size = %d MBs\n", 181 data32); 182 } 183 } 184 185 if (osReadRegistryDword(pGpu, 186 NV_REG_STR_RM_DISABLE_NONCONTIGUOUS_ALLOCATION, 187 &data32) == NV_OK) 188 { 189 if (data32 == NV_REG_STR_RM_DISABLE_NONCONTIGUOUS_ALLOCATION_TRUE) 190 { 191 pMemoryManager->bAllowNoncontiguousAllocation = NV_FALSE; 192 } 193 } 194 195 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_FBSR_PAGED_DMA, &data32) == NV_OK) 196 { 197 pMemoryManager->bEnableFbsrPagedDma = !!data32; 198 } 199 200 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_FBSR_FILE_MODE, &data32) == NV_OK) 201 { 202 if (data32 && RMCFG_FEATURE_PLATFORM_UNIX) 203 { 204 pMemoryManager->bEnableFbsrFileMode = NV_TRUE; 205 } 206 } 207 208 // 209 // Override PMA enable. PDB_PROP_FB_PMA_ENABLED is reconciled with 210 // PDB_PROP_FB_PLATFORM_PMA_SUPPORT to decide whether to enable PMA. 211 // 212 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_ENABLE_PMA, &data32) == NV_OK) 213 { 214 if (data32 == NV_REG_STR_RM_ENABLE_PMA_YES) 215 { 216 pMemoryManager->bPmaEnabled = NV_TRUE; 217 } 218 else 219 { 220 pMemoryManager->bPmaEnabled = NV_FALSE; 221 } 222 } 223 224 if (RMCFG_FEATURE_PLATFORM_WINDOWS_LDDM && !pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_TCC_MODE)) 225 { 226 pMemoryManager->bFbsrWddmModeEnabled = NV_TRUE; 227 } 228 229 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_FBSR_WDDM_MODE, &data32) == NV_OK) 230 { 231 pMemoryManager->bFbsrWddmModeEnabled = !!data32; 232 } 233 234 // 235 // Override PMA managed client page tables. 236 // NOTE: This is WAR for bug #s 1946145 and 1971628. 237 // This should be removed as part of heap removal and PMA refactor. 238 // 239 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_ENABLE_PMA_MANAGED_PTABLES, 240 &data32) == NV_OK) 241 { 242 if (data32 == NV_REG_STR_RM_ENABLE_PMA_MANAGED_PTABLES_NO) 243 { 244 memmgrSetClientPageTablesPmaManaged(pMemoryManager, NV_FALSE); 245 } 246 } 247 248 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_ENABLE_ADDRTREE, &data32) == NV_OK) 249 { 250 if (data32 == NV_REG_STR_RM_ENABLE_ADDRTREE_YES) 251 { 252 pMemoryManager->bPmaAddrTree = NV_TRUE; 253 NV_PRINTF(LEVEL_ERROR, "Enabled address tree for PMA via regkey.\n"); 254 } 255 } 256 else if (RMCFG_FEATURE_PLATFORM_MODS) 257 { 258 pMemoryManager->bPmaAddrTree = NV_TRUE; 259 NV_PRINTF(LEVEL_ERROR, "Enabled address tree for PMA for MODS.\n"); 260 } 261 } 262 263 NV_STATUS 264 memmgrStatePreInitLocked_IMPL 265 ( 266 OBJGPU *pGpu, 267 MemoryManager *pMemoryManager 268 ) 269 { 270 if (IS_GSP_CLIENT(pGpu)) 271 { 272 // 273 // Temporary hack to get OpenRM working without breaking SLI 274 // After fixing CORERM-4078, memmgrInitFbRegions() call should be removed from memsysStateInitLocked() 275 // and only left here 276 // 277 NV_ASSERT_OK_OR_RETURN(memmgrInitFbRegions(pGpu, pMemoryManager)); 278 } 279 280 // Determine the size of reserved memory 281 NV_ASSERT_OK_OR_RETURN(memmgrPreInitReservedMemory_HAL(pGpu, pMemoryManager)); 282 283 return NV_OK; 284 } 285 286 NV_STATUS 287 memmgrStateInitLocked_IMPL 288 ( 289 OBJGPU *pGpu, 290 MemoryManager *pMemoryManager 291 ) 292 { 293 NV_STATUS status = NV_OK; 294 NvU32 i; 295 NvBool bDynamicPageOffliningDisable = NV_FALSE; 296 297 NV_ASSERT_OK_OR_RETURN(memmgrInitReservedMemory_HAL(pGpu, pMemoryManager, pMemoryManager->Ram.fbAddrSpaceSizeMb << 20)); 298 299 _memmgrInitRegistryOverrides(pGpu, pMemoryManager); 300 301 // 302 // Enable dynamic page blacklisting at this point before we call CreateHeap 303 // since it internally calls heapGetBlacklistPages which depends on this property 304 // 305 if (!bDynamicPageOffliningDisable) 306 memmgrEnableDynamicPageOfflining_HAL(pGpu, pMemoryManager); 307 308 memmgrScrubRegistryOverrides_HAL(pGpu, pMemoryManager); 309 memmgrScrubInit_HAL(pGpu, pMemoryManager); 310 311 // 312 // Allocate framebuffer heap. All memory must be allocated from here to keep the world 313 // consistent (N.B. the heap size has been reduced by the amount of instance memory). 314 // 315 status = memmgrCreateHeap(pMemoryManager); 316 if (status != NV_OK) 317 { 318 return status; 319 } 320 321 // 322 // Just set up the memory pool now (basic init stuff). Actual physical 323 // frames are *NOT* added to the pool at this stage. 324 // 325 status = memmgrPageLevelPoolsCreate(pGpu, pMemoryManager); 326 if (status != NV_OK) 327 { 328 return status; 329 } 330 331 // RMCONFIG: only if FBSR engine is enabled 332 if (RMCFG_MODULE_FBSR) 333 { 334 // 335 // If a configuration is not supported, do not initialize 336 // the corresponding fbsr engine. 337 // 338 if (pMemoryManager->bFbsrWddmModeEnabled) 339 { 340 pMemoryManager->fbsrStartMode = FBSR_TYPE_WDDM_FAST_DMA_DEFERRED_NONPAGED; 341 } 342 else if (pMemoryManager->bEnableFbsrPagedDma) 343 { 344 pMemoryManager->fbsrStartMode = FBSR_TYPE_PAGED_DMA; 345 } 346 else if (pMemoryManager->bEnableFbsrFileMode) 347 { 348 pMemoryManager->fbsrStartMode = FBSR_TYPE_FILE; 349 } 350 else 351 { 352 pMemoryManager->fbsrStartMode = FBSR_TYPE_PERSISTENT; 353 } 354 355 for (i = pMemoryManager->fbsrStartMode; i < NUM_FBSR_TYPES; i++) 356 { 357 if (!pMemoryManager->bPersistentStandbyBuffer && 358 (i == FBSR_TYPE_PERSISTENT)) 359 { 360 continue; 361 } 362 363 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) && 364 (i == FBSR_TYPE_PAGED_DMA || i == FBSR_TYPE_DMA)) 365 { 366 continue; 367 } 368 369 status = fbsrInit_HAL(pGpu, pMemoryManager->pFbsr[i]); 370 371 // 372 // If one fbsr scheme failed, proceed to initializing the other 373 // fallback options. 374 // 375 if (status != NV_OK) 376 { 377 NV_PRINTF(LEVEL_WARNING, 378 "fbsrInit failed for supported type %d suspend-resume scheme\n", 379 i); 380 continue; 381 } 382 } 383 } 384 385 status = _memmgrAllocInternalClientObjects(pGpu, pMemoryManager); 386 if (status != NV_OK) 387 { 388 // 389 // TODO: Bug 3482892: Need a way to roll back StateInit 390 // steps in case of a failure 391 // WAR for now is to cleanup with memmgrStateDestroy(). 392 // 393 memmgrStateDestroy(pGpu, pMemoryManager); 394 return status; 395 } 396 397 return NV_OK; 398 } 399 400 NV_STATUS 401 memmgrStateLoad_IMPL 402 ( 403 OBJGPU *pGpu, 404 MemoryManager *pMemoryManager, 405 NvU32 flags 406 ) 407 { 408 // If fbOverrideSizeMb is set, finish setting up the FB parameters now that state init has finished 409 memmgrFinishHandleSizeOverrides_HAL(pGpu, pMemoryManager); 410 411 if ((flags & GPU_STATE_FLAGS_PRESERVING) && 412 !(flags & GPU_STATE_FLAGS_GC6_TRANSITION)) 413 { 414 // 415 // Only do initialization scrubs (i.e. RM reserved region) on 416 // non-GC6 transitions since GC6 cycles leave FB powered. 417 // 418 memmgrScrubInit_HAL(pGpu, pMemoryManager); 419 } 420 421 // Dump FB regions 422 memmgrDumpFbRegions(pGpu, pMemoryManager); 423 424 return NV_OK; 425 } 426 427 NV_STATUS 428 memmgrStatePreUnload_IMPL 429 ( 430 OBJGPU *pGpu, 431 MemoryManager *pMemoryManager, 432 NvU32 flags 433 ) 434 { 435 436 NV_ASSERT((flags & GPU_STATE_FLAGS_PRESERVING) || pMemoryManager->zbcSurfaces == 0); 437 438 return NV_OK; 439 } 440 441 NV_STATUS 442 memmgrStateUnload_IMPL 443 ( 444 OBJGPU *pGpu, 445 MemoryManager *pMemoryManager, 446 NvU32 flags 447 ) 448 { 449 if ((flags & GPU_STATE_FLAGS_PRESERVING) && 450 !(flags & GPU_STATE_FLAGS_GC6_TRANSITION)) 451 { 452 // 453 // Initialiation scrubs only happen during StateLoad on non-GC6 454 // transitions. 455 // 456 memmgrScrubDestroy_HAL(pGpu, pMemoryManager); 457 } 458 459 return NV_OK; 460 } 461 462 void 463 memmgrStateDestroy_IMPL 464 ( 465 OBJGPU *pGpu, 466 MemoryManager *pMemoryManager 467 ) 468 { 469 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 470 Heap *pHeap = MEMORY_MANAGER_GET_HEAP(pMemoryManager); 471 NvU32 i; 472 473 _memmgrFreeInternalClientObjects(pMemoryManager); 474 475 // Destroys the SW state of the page level pools 476 memmgrPageLevelPoolsDestroy(pGpu, pMemoryManager); 477 478 // Destroy the heap entirely, and all associated structures 479 if (pHeap) 480 { 481 kmemsysPreHeapDestruct_HAL(pGpu, pKernelMemorySystem); 482 483 objDelete(pHeap); 484 pMemoryManager->pHeap = NULL; 485 } 486 487 // RMCONFIG: only if FBSR engine is enabled 488 if (RMCFG_MODULE_FBSR) 489 { 490 // Cleanup fbsrReservedRanges 491 if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_BEFORE_BAR2PTE] != NULL) 492 memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_BEFORE_BAR2PTE]); 493 494 if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_AFTER_BAR2PTE] != NULL) 495 memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_AFTER_BAR2PTE]); 496 497 if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_HEAP] != NULL) 498 memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_HEAP]); 499 500 if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_NON_WPR] != NULL) 501 memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_NON_WPR]); 502 503 if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_WPR] != NULL) 504 memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_WPR]); 505 506 if (pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_VGA_WORKSPACE] != NULL) 507 memdescDestroy(pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_VGA_WORKSPACE]); 508 509 pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_BEFORE_BAR2PTE] = NULL; 510 pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_AFTER_BAR2PTE] = NULL; 511 pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_HEAP] = NULL; 512 pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_NON_WPR] = NULL; 513 pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_GSP_WPR] = NULL; 514 pMemoryManager->fbsrReservedRanges[FBSR_RESERVED_INST_MEMORY_VGA_WORKSPACE] = NULL; 515 516 for (i = 0; i < NUM_FBSR_TYPES; i++) 517 { 518 fbsrDestroy_HAL(pGpu, pMemoryManager->pFbsr[i]); 519 } 520 } 521 522 memmgrScrubDestroy_HAL(pGpu, pMemoryManager); 523 } 524 525 static NV_STATUS 526 _memmgrCreateChildObjects 527 ( 528 MemoryManager *pMemoryManager 529 ) 530 { 531 NV_STATUS status = NV_OK; 532 533 // RMCONFIG: only if FBSR engine is enabled 534 if (RMCFG_MODULE_FBSR) 535 { 536 NvU32 i; 537 538 // Create FBSR object for every type RM supports. 539 for (i = 0; i < NUM_FBSR_TYPES; i++) 540 { 541 status = _memmgrCreateFBSR(pMemoryManager, i); 542 if (status != NV_OK) 543 { 544 return status; 545 } 546 } 547 } 548 549 return status; 550 } 551 552 NV_STATUS 553 memmgrCreateHeap_IMPL 554 ( 555 MemoryManager *pMemoryManager 556 ) 557 { 558 Heap *newHeap; 559 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 560 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 561 NvU64 rsvdSize; 562 NvU64 size; 563 NV_STATUS status = NV_OK; 564 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig = 565 kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu)); 566 567 // If we're using FB regions then rsvd memory is already marked as a reserved region 568 if ((pMemoryManager->Ram.numFBRegions == 0) || (IS_VIRTUAL_WITH_SRIOV(pGpu))) 569 { 570 if (pMemorySystemConfig->bReservedMemAtBottom) 571 { 572 // rsvd memory is already accounted for in heapStart 573 rsvdSize = 0; 574 } 575 else 576 { 577 rsvdSize = pMemoryManager->rsvdMemorySize; 578 } 579 } 580 else 581 rsvdSize = 0; 582 583 // for vGPU, add extra FB tax incurred by host RM to reserved size 584 rsvdSize += memmgrGetFbTaxSize_HAL(pGpu, pMemoryManager); 585 586 // 587 // Fix up region descriptions to match with any FB override size 588 // 589 memmgrHandleSizeOverrides_HAL(pGpu, pMemoryManager); 590 591 // 592 // Calculate the FB heap size as the address space size, then deduct any reserved memory 593 // 594 size = pMemoryManager->Ram.fbAddrSpaceSizeMb << 20; 595 size -= NV_MIN(size, rsvdSize); 596 597 if((size != 0) || (pMemoryManager->bScanoutSysmem)) 598 { 599 status = objCreate(&newHeap, pMemoryManager, Heap); 600 if (status != NV_OK) 601 { 602 return status; 603 } 604 605 pMemoryManager->pHeap = newHeap; 606 607 if (memmgrIsPmaEnabled(pMemoryManager) && 608 memmgrIsPmaSupportedOnPlatform(pMemoryManager)) 609 { 610 portMemSet(&pMemoryManager->pHeap->pmaObject, 0, sizeof(pMemoryManager->pHeap->pmaObject)); 611 status = memmgrPmaInitialize(pGpu, pMemoryManager, &pMemoryManager->pHeap->pmaObject); 612 NV_ASSERT_OR_RETURN(status == NV_OK, status); 613 } 614 615 status = heapInit(pGpu, newHeap, 616 pMemoryManager->heapStartOffset, 617 size - pMemoryManager->heapStartOffset, HEAP_TYPE_RM_GLOBAL, GPU_GFID_PF, NULL); 618 NV_ASSERT_OR_RETURN(NV_OK == status, status); 619 620 if ((memmgrIsPmaInitialized(pMemoryManager)) && (pMemoryManager->pHeap->bHasFbRegions)) 621 { 622 status = memmgrPmaRegisterRegions(pGpu, pMemoryManager, pMemoryManager->pHeap, 623 &pMemoryManager->pHeap->pmaObject); 624 NV_ASSERT_OR_RETURN(status == NV_OK, status); 625 } 626 627 // Reserve vidmem for FSP usage, including FRTS, WPR2 628 status = memmgrReserveMemoryForFsp(pGpu, pMemoryManager); 629 if (status != NV_OK) 630 { 631 NV_PRINTF(LEVEL_ERROR, "Failed to reserve vidmem for WPR and FRTS.\n"); 632 return status; 633 } 634 635 if (!IsSLIEnabled(pGpu)) 636 { 637 // Do the actual blacklisting of pages from the heap 638 if (newHeap->blackListAddresses.count != 0) 639 { 640 status = heapBlackListPages(pGpu, newHeap); 641 642 if (status != NV_OK) 643 { 644 // Warn and continue 645 NV_PRINTF(LEVEL_WARNING, "Error 0x%x creating blacklist\n", 646 status); 647 } 648 } 649 } 650 651 kmemsysPostHeapCreate_HAL(pGpu, pKernelMemorySystem); 652 } 653 654 return status; 655 } 656 657 /* 658 * @brief Gets per-device suballocator. If it is not available, get shared heap. 659 * 660 * @param[in] pMemoryManager MemoryManager pointer 661 */ 662 Heap * 663 memmgrGetDeviceSuballocator_IMPL 664 ( 665 MemoryManager *pMemoryManager, 666 NvBool bForceSubheap 667 ) 668 { 669 670 if (!bForceSubheap) 671 { 672 // If no suballocator found, use heap 673 return MEMORY_MANAGER_GET_HEAP(pMemoryManager); 674 } 675 676 return NULL; 677 } 678 679 static NV_STATUS 680 _memmgrCreateFBSR 681 ( 682 MemoryManager *pMemoryManager, 683 NvU32 type 684 ) 685 { 686 OBJFBSR *pFbsr; 687 NV_STATUS status; 688 689 status = objCreate(&pFbsr, pMemoryManager, OBJFBSR); 690 if (status != NV_OK) 691 { 692 return status; 693 } 694 695 NV_ASSERT(pFbsr); 696 pMemoryManager->pFbsr[type] = pFbsr; 697 698 fbsrObjectInit(pFbsr, type); 699 700 return NV_OK; 701 } 702 703 static void 704 _memmgrFreeInternalClientObjects 705 ( 706 MemoryManager *pMemoryManager 707 ) 708 { 709 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 710 711 if (pMemoryManager->hThirdPartyP2P != 0) 712 { 713 pRmApi->Free(pRmApi, pMemoryManager->hClient, 714 pMemoryManager->hThirdPartyP2P); 715 pMemoryManager->hThirdPartyP2P = 0; 716 } 717 718 if (pMemoryManager->hClient != 0) 719 { 720 rmapiutilFreeClientAndDeviceHandles(pRmApi, 721 &pMemoryManager->hClient, 722 &pMemoryManager->hDevice, 723 &pMemoryManager->hSubdevice); 724 } 725 } 726 727 static NV_STATUS 728 _memmgrAllocInternalClientObjects 729 ( 730 OBJGPU *pGpu, 731 MemoryManager *pMemoryManager 732 ) 733 { 734 NV_STATUS status; 735 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 736 737 status = rmapiutilAllocClientAndDeviceHandles(pRmApi, pGpu, 738 &pMemoryManager->hClient, 739 &pMemoryManager->hDevice, 740 &pMemoryManager->hSubdevice); 741 if (status != NV_OK) 742 { 743 goto failed; 744 } 745 746 { 747 NV503C_ALLOC_PARAMETERS params; 748 NvHandle hThirdPartyP2P = 0; 749 750 NV_ASSERT_OK_OR_GOTO(status, serverutilGenResourceHandle(pMemoryManager->hClient, 751 &hThirdPartyP2P), 752 failed); 753 754 portMemSet(¶ms, 0, sizeof(params)); 755 params.flags = NV503C_ALLOC_PARAMETERS_FLAGS_TYPE_BAR1; 756 status = pRmApi->AllocWithHandle(pRmApi, 757 pMemoryManager->hClient, 758 pMemoryManager->hSubdevice, 759 hThirdPartyP2P, 760 NV50_THIRD_PARTY_P2P, ¶ms); 761 if (status != NV_OK) 762 { 763 NV_PRINTF(LEVEL_WARNING, "Error creating internal ThirdPartyP2P object: %x\n", 764 status); 765 pMemoryManager->hThirdPartyP2P = 0; 766 } 767 else 768 { 769 pMemoryManager->hThirdPartyP2P = hThirdPartyP2P; 770 } 771 772 } 773 774 return NV_OK; 775 776 failed: 777 _memmgrFreeInternalClientObjects(pMemoryManager); 778 779 return status; 780 } 781 782 /*! 783 * @brief Determine size of FB RAM which is used for RM internal allocations 784 * and PMA. 785 * 786 * @param[out] pFbUsedSize FB used memory size 787 * 788 * @returns NV_OK 789 */ 790 NV_STATUS 791 memmgrGetUsedRamSize_IMPL 792 ( 793 OBJGPU *pGpu, 794 MemoryManager *pMemoryManager, 795 NvU64 *pFbUsedSize 796 ) 797 { 798 Heap *pHeap = GPU_GET_HEAP(pGpu); 799 NvU64 heapFreeSpace, heapTotalSpace, pmaFreeSpace; 800 801 // 802 // Determine free memory in FB and substract with total FB memory. 803 // If PMA is initialized, then use the free memory size in PMA and 804 // heap otherwise only use heap free memory for calculation. 805 // 806 heapGetFree(pHeap, &heapFreeSpace); 807 heapGetSize(pHeap, &heapTotalSpace); 808 if (memmgrIsPmaInitialized(pMemoryManager)) 809 { 810 pmaGetFreeMemory(&pHeap->pmaObject, &pmaFreeSpace); 811 *pFbUsedSize = heapTotalSpace - heapFreeSpace - pmaFreeSpace; 812 } 813 else 814 { 815 *pFbUsedSize = heapTotalSpace - heapFreeSpace; 816 } 817 818 return NV_OK; 819 } 820 821 NV_STATUS 822 memmgrAllocHwResources_IMPL 823 ( 824 OBJGPU *pGpu, 825 MemoryManager *pMemoryManager, 826 FB_ALLOC_INFO *pFbAllocInfo 827 ) 828 { 829 MemoryManager *pMemoryManagerLoop; 830 FB_ALLOC_INFO *pTempInfo = NULL; 831 NvU32 skipFlag = (pFbAllocInfo->pageFormat->flags & NVOS32_ALLOC_FLAGS_SKIP_RESOURCE_ALLOC); 832 NV_STATUS rmStatus = NV_OK; 833 834 pTempInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO)); 835 if (pTempInfo == NULL) 836 { 837 NV_ASSERT(0); 838 return NV_ERR_NO_MEMORY; 839 } 840 841 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY) 842 { 843 NV_STATUS tempStatus; 844 *pTempInfo = *pFbAllocInfo; // struct copy 845 846 pMemoryManagerLoop = GPU_GET_MEMORY_MANAGER(pGpu); 847 848 tempStatus = memmgrAllocHal_HAL(pGpu, pMemoryManagerLoop, pTempInfo); 849 // be sure to return an intermediate error 850 if (NV_OK == rmStatus) 851 rmStatus = tempStatus; 852 } 853 SLI_LOOP_END 854 855 *pFbAllocInfo = *pTempInfo; // struct copy 856 portMemFree(pTempInfo); 857 858 pFbAllocInfo->pageFormat->flags &= ~NVOS32_ALLOC_FLAGS_SKIP_RESOURCE_ALLOC; 859 pFbAllocInfo->pageFormat->flags |= skipFlag; 860 861 return rmStatus; 862 } 863 864 NV_STATUS 865 memmgrFreeHwResources_IMPL 866 ( 867 OBJGPU *pGpu, 868 MemoryManager *pMemoryManager, 869 FB_ALLOC_INFO *pFbAllocInfo 870 ) 871 { 872 MemoryManager *pMemoryManagerLoop; 873 NV_STATUS rmStatus = NV_OK; 874 RMTIMEOUT timeout; 875 FB_ALLOC_INFO *pTempInfo = NULL; 876 877 pTempInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO)); 878 if (pTempInfo == NULL) 879 { 880 NV_ASSERT(0); 881 return NV_ERR_NO_MEMORY; 882 } 883 884 gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, 0); 885 886 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY) 887 { 888 NV_STATUS tempStatus; 889 pMemoryManagerLoop = GPU_GET_MEMORY_MANAGER(pGpu); 890 891 *pTempInfo = *pFbAllocInfo; 892 893 tempStatus = memmgrFreeHal_HAL(pGpu, pMemoryManagerLoop, pTempInfo, &timeout); 894 // be sure to return an intermediate error 895 if (NV_OK == rmStatus) 896 rmStatus = tempStatus; 897 898 } 899 SLI_LOOP_END 900 901 *pFbAllocInfo = *pTempInfo; 902 portMemFree(pTempInfo); 903 904 return rmStatus; 905 } 906 907 NvBool 908 memmgrLargePageSupported_IMPL 909 ( 910 MemoryManager *pMemoryManager, 911 NV_ADDRESS_SPACE addrSpace 912 ) 913 { 914 NvBool isSupported = NV_FALSE; 915 916 if (addrSpace == ADDR_FBMEM || addrSpace == ADDR_VIRTUAL) 917 { 918 isSupported = NV_TRUE; 919 } 920 else if (addrSpace == ADDR_SYSMEM) 921 { 922 isSupported = (pMemoryManager->sysmemPageSize != RM_PAGE_SIZE); 923 } 924 else 925 { 926 NV_ASSERT(0); 927 } 928 929 return isSupported; 930 } 931 932 NvBool 933 memmgrComprSupported_IMPL 934 ( 935 MemoryManager *pMemoryManager, 936 NV_ADDRESS_SPACE addrSpace 937 ) 938 { 939 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 940 NvBool isSupported = NV_FALSE; 941 942 if (GPU_GET_KERNEL_GMMU(pGpu) != NULL) 943 { 944 if (memmgrLargePageSupported(pMemoryManager, addrSpace) || 945 pMemoryManager->bSmallPageCompression) 946 { 947 if (addrSpace == ADDR_FBMEM || addrSpace == ADDR_VIRTUAL) 948 { 949 isSupported = NV_TRUE; 950 } 951 else if (addrSpace == ADDR_SYSMEM) 952 { 953 // Compression is allowed on vidmem or unified aperture (vidmem/sysmem is same w.r.t HW) 954 isSupported = (gpuIsUnifiedMemorySpaceEnabled(pGpu) && 955 pMemoryManager->bSysmemCompressionSupportDef); 956 NV_PRINTF(LEVEL_ERROR, "isSupported=%s\n", 957 isSupported ? "NV_TRUE" : "NV_FALSE"); 958 } 959 else 960 { 961 NV_ASSERT(0); 962 } 963 } 964 } 965 966 return isSupported; 967 } 968 969 NV_ADDRESS_SPACE 970 memmgrAllocGetAddrSpace_IMPL 971 ( 972 MemoryManager *pMemoryManager, 973 NvU32 flags, 974 NvU32 attr 975 ) 976 { 977 NV_ADDRESS_SPACE addrSpace = ADDR_UNKNOWN; 978 979 if (flags & NVOS32_ALLOC_FLAGS_VIRTUAL) 980 { 981 addrSpace = ADDR_VIRTUAL; 982 } 983 else if (FLD_TEST_DRF(OS32, _ATTR, _LOCATION, _VIDMEM, attr)) 984 { 985 addrSpace = ADDR_FBMEM; 986 } 987 else 988 { 989 // In case location is SYSMEM or ANY, allocate in vidmem if protected flag is set. 990 if (flags & NVOS32_ALLOC_FLAGS_PROTECTED) 991 { 992 addrSpace = ADDR_FBMEM; 993 } 994 else 995 { 996 addrSpace = ADDR_SYSMEM; 997 } 998 } 999 1000 return addrSpace; 1001 } 1002 1003 NvU32 1004 memmgrGetMappableRamSizeMb_IMPL(MemoryManager *pMemoryManager) 1005 { 1006 return NvU64_LO32(pMemoryManager->Ram.mapRamSizeMb); 1007 } 1008 // 1009 // ZBC clear create/destroy routines. 1010 // 1011 1012 NV_STATUS 1013 memmgrFillMemdescForPhysAttr_IMPL 1014 ( 1015 OBJGPU *pGpu, 1016 MemoryManager *pMemoryManager, 1017 PMEMORY_DESCRIPTOR pMemDesc, 1018 ADDRESS_TRANSLATION addressTranslation, 1019 NvU64 *pOffset, 1020 NvU32 *pMemAperture, 1021 NvU32 *pMemKind, 1022 NvU32 *pZCullId, 1023 NvU32 *pGpuCacheAttr, 1024 NvU32 *pGpuP2PCacheAttr, 1025 NvU64 *contigSegmentSize 1026 ) 1027 { 1028 NvU64 surfOffset = *pOffset, surfBase, surfLimit; 1029 NvU32 zcbitmap; 1030 1031 surfBase = memdescGetPhysAddr(pMemDesc, addressTranslation, 0); 1032 surfLimit = surfBase + pMemDesc->Size - 1; 1033 *pMemKind = memdescGetPteKind(pMemDesc); 1034 1035 *pOffset = memdescGetPhysAddr(pMemDesc, addressTranslation, surfOffset); 1036 1037 if (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM ) 1038 *pMemAperture = NV0041_CTRL_CMD_GET_SURFACE_PHYS_ATTR_APERTURE_VIDMEM; 1039 else if (memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM) 1040 *pMemAperture = NV0041_CTRL_CMD_GET_SURFACE_PHYS_ATTR_APERTURE_SYSMEM; 1041 else if (memdescGetAddressSpace(pMemDesc) == ADDR_VIRTUAL ) 1042 { 1043 // 1044 // XXX we could theoretically find whatever phys mem object is plugged 1045 // in at surfOffset w/in the virt object... that'd mean scanning 1046 // pMemory->DmaMappingList 1047 // 1048 return NV_ERR_NOT_SUPPORTED; 1049 } 1050 else 1051 return NV_ERR_GENERIC; 1052 1053 if (memdescGetGpuCacheAttrib(pMemDesc) == NV_MEMORY_CACHED) 1054 { 1055 *pGpuCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_CACHED; 1056 } 1057 else if (memdescGetGpuCacheAttrib(pMemDesc) == NV_MEMORY_UNCACHED) 1058 { 1059 *pGpuCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_UNCACHED; 1060 } 1061 else 1062 { 1063 *pGpuCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_CACHED_UNKNOWN; 1064 } 1065 1066 if (memdescGetGpuP2PCacheAttrib(pMemDesc) == NV_MEMORY_CACHED) 1067 { 1068 *pGpuP2PCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_CACHED; 1069 } 1070 else if (memdescGetGpuP2PCacheAttrib(pMemDesc) == NV_MEMORY_UNCACHED) 1071 { 1072 *pGpuP2PCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_UNCACHED; 1073 } 1074 else 1075 { 1076 *pGpuP2PCacheAttr = NV0041_CTRL_GET_SURFACE_PHYS_ATTR_GPU_CACHED_UNKNOWN; 1077 } 1078 1079 zcbitmap = FB_HWRESID_ZCULL_VAL_FERMI(memdescGetHwResId(pMemDesc)); //bitmap form... need a scalar 1080 for ( *pZCullId = 0; zcbitmap; zcbitmap >>= 1, *pZCullId += 1) {;;;} 1081 *pZCullId -= 1; // side effect if there is no zcull id of setting ~0 1082 1083 *contigSegmentSize = surfLimit - (surfBase + surfOffset) + 1; 1084 1085 if ( !memdescGetContiguity(pMemDesc, addressTranslation)) 1086 { 1087 // XXX overly conservative. we could scan the PTEs to find out if more pages are contig. 1088 NvU64 surfOffsetLimitSame4KBPage = (4*1024)*((surfBase + surfOffset)/(4*1024)) + (4*1024) - 1; 1089 if ( surfLimit >= surfOffsetLimitSame4KBPage ) 1090 *contigSegmentSize = surfOffsetLimitSame4KBPage - (surfBase + surfOffset) + 1; 1091 } 1092 1093 return NV_OK; 1094 } 1095 1096 NvU64 1097 memmgrDeterminePageSize_IMPL 1098 ( 1099 MemoryManager *pMemoryManager, 1100 NvHandle hClient, 1101 NvU64 memSize, 1102 NvU32 memFormat, 1103 NvU32 pageFormatFlags, 1104 NvU32 *pRetAttr, 1105 NvU32 *pRetAttr2 1106 ) 1107 { 1108 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager); 1109 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 1110 NV_ADDRESS_SPACE addrSpace; 1111 NvBool bIsBigPageSupported; 1112 RM_ATTR_PAGE_SIZE pageSizeAttr; 1113 NvU64 pageSize = 0; 1114 1115 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_TEGRA_SOC_NVDISPLAY) || (pKernelGmmu == NULL)) 1116 { 1117 pageSize = RM_PAGE_SIZE; 1118 } 1119 // Sanity check the arguments. 1120 else if (pRetAttr == NULL || pRetAttr2 == NULL) 1121 { 1122 NV_ASSERT_OR_RETURN(0, 0); 1123 } 1124 else 1125 { 1126 addrSpace = memmgrAllocGetAddrSpace(pMemoryManager, pageFormatFlags, *pRetAttr); 1127 1128 bIsBigPageSupported = memmgrLargePageSupported(pMemoryManager, addrSpace); 1129 pageSizeAttr = dmaNvos32ToPageSizeAttr(*pRetAttr, *pRetAttr2); 1130 1131 // 1132 // Precedence in page size selection 1133 // 1. CACHE_ONLY mode -> SMALL 1134 // 2. !BigPageSupport (Sysmem && GpuSmmuOff ) -> SMALL 1135 // 3. Client page size override -> Use override 1136 // 4. HugePageSupported && size >= HugePageSize -> HUGE 1137 // 5. Block-linear || size >= minSizeForBigPage || hClient || GpuSmmuOn -> BIG 1138 // 6. none of the above -> SMALL 1139 // 1140 // On Tegra, we don't have a carveout/FB in production. So, we're 1141 // not guaranteed to get BIG page sized or contiguous allocations 1142 // from OS. But we need BIG page sized allocations for efficient Big GPU 1143 // operation. We use the SMMU unit within the Tegra Memory Contoller (MC), 1144 // to construct BIG pages from the 4KB small page allocations from OS. 1145 // SMMU will linearize the discontiguous 4KB allocations into what will 1146 // appear to the GPU as a large contiguous physical allocation. 1147 // 1148 // RM will eventually decide whether a SYSMEM allocation needs BIG page 1149 // via GPU SMMU mapping. Right now, we give an option for RM clients to 1150 // force it, via the SMMU_ON_GPU attribute. 1151 // 1152 if (gpuIsCacheOnlyModeEnabled(pGpu)) 1153 { 1154 pageSize = RM_PAGE_SIZE; 1155 } 1156 else if (!bIsBigPageSupported) 1157 { 1158 if (RM_ATTR_PAGE_SIZE_BIG == pageSizeAttr || 1159 RM_ATTR_PAGE_SIZE_HUGE == pageSizeAttr || 1160 RM_ATTR_PAGE_SIZE_512MB == pageSizeAttr) 1161 { 1162 NV_PRINTF(LEVEL_ERROR, 1163 "Big/Huge/512MB page size not supported in sysmem.\n"); 1164 NV_ASSERT_OR_RETURN(0, 0); 1165 } 1166 else 1167 { 1168 pageSize = RM_PAGE_SIZE; 1169 } 1170 } 1171 else 1172 { 1173 switch (pageSizeAttr) 1174 { 1175 case RM_ATTR_PAGE_SIZE_INVALID: 1176 NV_PRINTF(LEVEL_ERROR, "invalid page size attr\n"); 1177 NV_ASSERT_OR_RETURN(0, 0); 1178 1179 case RM_ATTR_PAGE_SIZE_DEFAULT: 1180 { 1181 NvBool bUseDefaultHugePagesize = NV_TRUE; 1182 // WDDMV2 Windows it expect default page size to be 4K /64KB /128KB 1183 if (bUseDefaultHugePagesize && 1184 kgmmuIsHugePageSupported(pKernelGmmu) && 1185 (memSize >= RM_PAGE_SIZE_HUGE) && (addrSpace != ADDR_SYSMEM || 1186 pMemoryManager->sysmemPageSize == RM_PAGE_SIZE_HUGE)) 1187 { 1188 pageSize = RM_PAGE_SIZE_HUGE; 1189 break; 1190 } 1191 else if ((memFormat != NVOS32_ATTR_FORMAT_PITCH) || 1192 (memSize >= kgmmuGetMinBigPageSize(pKernelGmmu)) || hClient || 1193 FLD_TEST_DRF(OS32, _ATTR2, _SMMU_ON_GPU, _ENABLE, *pRetAttr2)) 1194 { 1195 pageSize = kgmmuGetMaxBigPageSize_HAL(pKernelGmmu); 1196 break; 1197 } 1198 1199 pageSize = RM_PAGE_SIZE; 1200 break; 1201 } 1202 1203 case RM_ATTR_PAGE_SIZE_4KB: 1204 pageSize = RM_PAGE_SIZE; 1205 break; 1206 1207 case RM_ATTR_PAGE_SIZE_BIG: 1208 pageSize = kgmmuGetMaxBigPageSize_HAL(pKernelGmmu); 1209 break; 1210 1211 case RM_ATTR_PAGE_SIZE_HUGE: 1212 if (kgmmuIsHugePageSupported(pKernelGmmu)) 1213 { 1214 pageSize = RM_PAGE_SIZE_HUGE; 1215 } 1216 else 1217 { 1218 NV_ASSERT_OR_RETURN(0, 0); 1219 } 1220 break; 1221 1222 case RM_ATTR_PAGE_SIZE_512MB: 1223 if (kgmmuIsPageSize512mbSupported(pKernelGmmu)) 1224 { 1225 pageSize = RM_PAGE_SIZE_512M; 1226 } 1227 else 1228 { 1229 NV_ASSERT_OR_RETURN(0, 0); 1230 } 1231 break; 1232 1233 default: 1234 NV_ASSERT(0); 1235 } 1236 } 1237 } 1238 1239 switch (pageSize) 1240 { 1241 case RM_PAGE_SIZE: 1242 *pRetAttr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _4KB, *pRetAttr); 1243 break; 1244 1245 case RM_PAGE_SIZE_64K: 1246 case RM_PAGE_SIZE_128K: 1247 *pRetAttr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _BIG, *pRetAttr); 1248 break; 1249 1250 case RM_PAGE_SIZE_HUGE: 1251 *pRetAttr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _HUGE, *pRetAttr); 1252 *pRetAttr2 = FLD_SET_DRF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _2MB, *pRetAttr2); 1253 break; 1254 1255 case RM_PAGE_SIZE_512M: 1256 *pRetAttr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _HUGE, *pRetAttr); 1257 *pRetAttr2 = FLD_SET_DRF(OS32, _ATTR2, _PAGE_SIZE_HUGE, _512MB, *pRetAttr2); 1258 break; 1259 1260 default: 1261 NV_ASSERT(0); 1262 } 1263 1264 return pageSize; 1265 } 1266 1267 /*! 1268 * Identify if platform's current configuration supports PMA 1269 */ 1270 NV_STATUS 1271 memmgrSetPlatformPmaSupport_IMPL 1272 ( 1273 OBJGPU *pGpu, 1274 MemoryManager *pMemoryManager 1275 ) 1276 { 1277 // 1278 // KMD in WDDM mode will not support pma managed client page tables as 1279 // in both cases client / OS manges it. 1280 // 1281 if (RMCFG_FEATURE_PLATFORM_WINDOWS_LDDM && !pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_TCC_MODE)) 1282 { 1283 memmgrSetClientPageTablesPmaManaged(pMemoryManager, NV_FALSE); 1284 } 1285 1286 // 1287 // FB management should use PMA on Unix/Linux/Mods/Windows 1288 // 1289 if (RMCFG_FEATURE_PLATFORM_UNIX 1290 || RMCFG_FEATURE_PLATFORM_MODS 1291 || RMCFG_FEATURE_PLATFORM_WINDOWS_LDDM) 1292 { 1293 pMemoryManager->bPmaSupportedOnPlatform = NV_TRUE; 1294 } 1295 1296 // 1297 // PMA memory management is not currently supported in non SRIOV VGPU environment. 1298 // The RPC mechanism needs to be expanded to distinguish allocation types. 1299 // Bug #1735412 1300 // 1301 // TODO : Remove these constraints. 1302 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu)) 1303 { 1304 pMemoryManager->bPmaSupportedOnPlatform = NV_FALSE; 1305 } 1306 1307 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_VIRTUALIZATION_MODE_HOST_VGPU)) 1308 { 1309 if (pMemoryManager->bVgpuPmaSupport) 1310 { 1311 memmgrSetClientPageTablesPmaManaged(pMemoryManager, NV_FALSE); 1312 } 1313 else 1314 { 1315 pMemoryManager->bPmaSupportedOnPlatform = NV_FALSE; 1316 } 1317 } 1318 return (NV_OK); 1319 } 1320 1321 /*! 1322 * Allocate console region in CPU-RM based on region table passed from Physical RM 1323 */ 1324 NV_STATUS 1325 memmgrAllocateConsoleRegion_IMPL 1326 ( 1327 OBJGPU *pGpu, 1328 MemoryManager *pMemoryManager, 1329 FB_REGION_DESCRIPTOR *pConsoleFbRegion 1330 ) 1331 { 1332 1333 NV_STATUS status = NV_OK; 1334 NvU32 consoleRegionId = 0x0; 1335 NvU64 regionSize; 1336 1337 if (pMemoryManager->Ram.ReservedConsoleDispMemSize > 0) 1338 { 1339 pConsoleFbRegion->base = pMemoryManager->Ram.fbRegion[consoleRegionId].base; 1340 pConsoleFbRegion->limit = pMemoryManager->Ram.fbRegion[consoleRegionId].limit; 1341 1342 regionSize = pConsoleFbRegion->limit - pConsoleFbRegion->base + 1; 1343 1344 // Once the console is reserved, we don't expect to reserve it again 1345 NV_ASSERT_OR_RETURN(pMemoryManager->Ram.pReservedConsoleMemDesc == NULL, 1346 NV_ERR_STATE_IN_USE); 1347 1348 status = memdescCreate(&pMemoryManager->Ram.pReservedConsoleMemDesc, pGpu, 1349 regionSize, RM_PAGE_SIZE_64K, NV_TRUE, ADDR_FBMEM, 1350 NV_MEMORY_UNCACHED, 1351 MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE); 1352 if (status != NV_OK) 1353 { 1354 pConsoleFbRegion->base = pConsoleFbRegion->limit = 0; 1355 return status; 1356 } 1357 1358 memdescDescribe(pMemoryManager->Ram.pReservedConsoleMemDesc, ADDR_FBMEM, 1359 pConsoleFbRegion->base, regionSize); 1360 memdescSetPageSize(pMemoryManager->Ram.pReservedConsoleMemDesc, 1361 AT_GPU, RM_PAGE_SIZE); 1362 1363 1364 NV_PRINTF(LEVEL_INFO, "Allocating console region of size: %llx, at base : %llx \n ", 1365 regionSize, pConsoleFbRegion->base); 1366 } 1367 1368 return status; 1369 } 1370 1371 void 1372 memmgrReleaseConsoleRegion_IMPL 1373 ( 1374 OBJGPU *pGpu, 1375 MemoryManager *pMemoryManager 1376 ) 1377 { 1378 memdescDestroy(pMemoryManager->Ram.pReservedConsoleMemDesc); 1379 pMemoryManager->Ram.pReservedConsoleMemDesc = NULL; 1380 } 1381 1382 PMEMORY_DESCRIPTOR 1383 memmgrGetReservedConsoleMemDesc_IMPL 1384 ( 1385 OBJGPU *pGpu, 1386 MemoryManager *pMemoryManager 1387 ) 1388 { 1389 return pMemoryManager->Ram.pReservedConsoleMemDesc; 1390 } 1391 1392 /*! 1393 * Reserve FB for allocating BAR2 Page Dirs and Page Tables 1394 */ 1395 void 1396 memmgrReserveBar2BackingStore_IMPL 1397 ( 1398 OBJGPU *pGpu, 1399 MemoryManager *pMemoryManager, 1400 NvU64 *pAddr 1401 ) 1402 { 1403 NvU64 tmpAddr = *pAddr; 1404 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 1405 1406 NvU32 pageDirsSize = kbusGetSizeOfBar2PageDirs_HAL(pGpu, pKernelBus); 1407 NvU32 pageTblsSize = kbusGetSizeOfBar2PageTables_HAL(pGpu, pKernelBus); 1408 1409 // Reserve space for BAR2 Page Dirs 1410 if (pKernelBus->PDEBAR2Aperture == ADDR_FBMEM) 1411 { 1412 tmpAddr = NV_ROUNDUP(tmpAddr, RM_PAGE_SIZE); 1413 pKernelBus->bar2[GPU_GFID_PF].pdeBase = tmpAddr; 1414 tmpAddr += pageDirsSize; 1415 } 1416 1417 // Reserve space for BAR2 Page Tables 1418 if (pKernelBus->PTEBAR2Aperture == ADDR_FBMEM) 1419 { 1420 tmpAddr = NV_ROUNDUP(tmpAddr, RM_PAGE_SIZE); 1421 pKernelBus->bar2[GPU_GFID_PF].pteBase = tmpAddr; 1422 tmpAddr += pageTblsSize; 1423 } 1424 1425 NV_PRINTF(LEVEL_INFO, "Reserve space for bar2 Page dirs offset = 0x%llx size = 0x%x\n", 1426 pKernelBus->bar2[GPU_GFID_PF].pdeBase, pageDirsSize); 1427 1428 NV_PRINTF(LEVEL_INFO, "Reserve space for bar2 Page tables offset = 0x%llx size = 0x%x\n", 1429 pKernelBus->bar2[GPU_GFID_PF].pteBase, pageTblsSize); 1430 1431 *pAddr = NV_ROUNDUP(tmpAddr, RM_PAGE_SIZE); 1432 } 1433 1434 /*! 1435 * Calculate the Vista reserved memory requirement per FB region for mixed type/density 1436 */ 1437 void 1438 memmgrCalcReservedFbSpace_IMPL 1439 ( 1440 OBJGPU *pGpu, 1441 MemoryManager *pMemoryManager 1442 ) 1443 { 1444 NvU64 rsvdFastSize = 0; 1445 NvU64 rsvdSlowSize = 0; 1446 NvU64 rsvdISOSize = 0; 1447 NvU32 i; 1448 NvU32 idxISORegion = 0; 1449 NvU32 idxFastRegion = 0; 1450 NvU32 idxSlowRegion = 0; 1451 NvBool bAllocProtected = NV_FALSE; 1452 1453 // 1454 // This is a hack solely for Vista (on Vista the OS controls the majority of heap). 1455 // Linux and Mac don't have reserved memory and doesn't use this function. 1456 // 1457 // On Vista, Fermi's instance memory is not reserved by RM anymore. 1458 // KMD has to reserve enough instance memory for driver private data. 1459 // This function does the calculation of needed space. See bug 642233. 1460 // While it returns the result in Mb, the calculation is made with byte 1461 // 1462 1463 // If we have no usable memory then we can't reserve any. 1464 if (!pMemoryManager->Ram.fbUsableMemSize) 1465 return; 1466 1467 // If reserved memory requirements have already been calculated, don't do it again. 1468 if (pMemoryManager->bLddmReservedMemoryCalculated) 1469 return; 1470 1471 memmgrCalcReservedFbSpaceHal_HAL(pGpu, pMemoryManager, &rsvdFastSize, &rsvdSlowSize, &rsvdISOSize); 1472 1473 // If we have regions defined, fill in the per-segment reserved memory requirement 1474 if (pMemoryManager->Ram.numFBRegions > 0) 1475 { 1476 FB_REGION_DESCRIPTOR *pFbRegion = NULL; 1477 NvU64 regionSize = 0; 1478 1479 // 1480 // Find the fastest and ISO regions. This search makes a soft assumption that 1481 // region #0 is not reserved, fastest, and supports ISO -- that would be stupid 1482 // 1483 for (i = 0; i < pMemoryManager->Ram.numFBRegions; i++) 1484 { 1485 pFbRegion = &pMemoryManager->Ram.fbRegion[i]; 1486 regionSize = (pFbRegion->limit - pFbRegion->base +1); 1487 1488 // Check only non-reserved regions (which are typically unpopulated blackholes in address space) 1489 if ((!pFbRegion->bRsvdRegion) && 1490 (bAllocProtected || !pFbRegion->bProtected) && 1491 (regionSize >= (rsvdFastSize + rsvdSlowSize + rsvdISOSize))) 1492 { 1493 // Find the fastest region 1494 if ((pFbRegion->performance > pMemoryManager->Ram.fbRegion[idxFastRegion].performance) 1495 || pMemoryManager->Ram.fbRegion[idxFastRegion].bRsvdRegion 1496 || (!bAllocProtected && pMemoryManager->Ram.fbRegion[idxFastRegion].bProtected)) 1497 { 1498 idxFastRegion = i; 1499 } 1500 // Find the slowest region 1501 if ((pFbRegion->performance < pMemoryManager->Ram.fbRegion[idxSlowRegion].performance) 1502 || pMemoryManager->Ram.fbRegion[idxSlowRegion].bRsvdRegion 1503 || (!bAllocProtected && pMemoryManager->Ram.fbRegion[idxSlowRegion].bProtected)) 1504 { 1505 idxSlowRegion = i; 1506 } 1507 // Find the fastest ISO region 1508 if (pFbRegion->bSupportISO) 1509 { 1510 if ((!pMemoryManager->Ram.fbRegion[idxISORegion].bSupportISO) || 1511 (pFbRegion->performance > pMemoryManager->Ram.fbRegion[idxISORegion].performance) 1512 || (!bAllocProtected && pMemoryManager->Ram.fbRegion[idxISORegion].bProtected)) 1513 { 1514 idxISORegion = i; 1515 } 1516 } 1517 } 1518 } 1519 1520 // There should *ALWAYS* be a region that supports ISO, even if we have no display 1521 NV_ASSERT(pMemoryManager->Ram.fbRegion[idxISORegion].bSupportISO); 1522 1523 // There should *ALWAYS* be a non-reserved region that is faster than reserved and supports ISO 1524 NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxISORegion].bRsvdRegion); 1525 NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxFastRegion].bRsvdRegion); 1526 NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxSlowRegion].bRsvdRegion); 1527 1528 if (!bAllocProtected) 1529 { 1530 NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxISORegion].bProtected); 1531 NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxFastRegion].bProtected); 1532 NV_ASSERT(!pMemoryManager->Ram.fbRegion[idxSlowRegion].bProtected); 1533 } 1534 1535 // 1536 // Vista expects to be able to VidHeapControl allocate a cursor in ISO 1537 // 1538 // For mixed density reserved memory should be split between "fast" and 1539 // "slow" memory. Fast memory should also support ISO. The policy to 1540 // prefer "slow" vs "fast" memory is platform dependent. 1541 // 1542 pMemoryManager->Ram.fbRegion[idxISORegion].rsvdSize += rsvdISOSize; 1543 pMemoryManager->Ram.fbRegion[idxSlowRegion].rsvdSize += rsvdSlowSize; 1544 pMemoryManager->Ram.fbRegion[idxFastRegion].rsvdSize += rsvdFastSize; 1545 1546 pMemoryManager->bLddmReservedMemoryCalculated = NV_TRUE; 1547 } 1548 } 1549 1550 /*! 1551 * Init channel size 1552 * 1553 * @param[in] pChannel OBJCHANNEL pointer 1554 * @param[in] numCopyBlocks Number of copies that should fit in the push buffer 1555 * 1556 * @returns NV_STATUS 1557 */ 1558 void 1559 memmgrMemUtilsSetupChannelBufferSizes_IMPL 1560 ( 1561 MemoryManager *pMemoryManager, 1562 OBJCHANNEL *pChannel, 1563 NvU32 numCopyBlocks 1564 ) 1565 { 1566 // set channel specific sizes 1567 pChannel->channelPbSize = numCopyBlocks * MEMUTILS_SIZE_PER_BLOCK_INBYTES; 1568 pChannel->channelNotifierSize = MEMUTILS_CHANNEL_NOTIFIER_SIZE; 1569 pChannel->channelNumGpFifioEntries = MEMUTILS_NUM_GPFIFIO_ENTRIES; 1570 pChannel->methodSizePerBlock = MEMUTILS_SIZE_PER_BLOCK_INBYTES; 1571 pChannel->channelSize = pChannel->channelPbSize + MEMUTILS_CHANNEL_GPFIFO_SIZE + MEMUTILS_CHANNEL_SEMAPHORE_SIZE; 1572 pChannel->semaOffset = pChannel->channelPbSize + MEMUTILS_CHANNEL_GPFIFO_SIZE; 1573 pChannel->finishPayloadOffset = pChannel->semaOffset + 4; 1574 } 1575 1576 NV_STATUS memmgrFree_IMPL 1577 ( 1578 OBJGPU *pGpu, 1579 MemoryManager *pMemoryManager, 1580 Heap *pHeap, 1581 NvHandle hClient, 1582 NvHandle hDevice, 1583 NvHandle hVASpace, 1584 NvU32 owner, 1585 MEMORY_DESCRIPTOR *pMemDesc 1586 ) 1587 { 1588 NvU64 offsetAlign; 1589 NV_STATUS status; 1590 NvU32 pmaFreeFlag = 0; 1591 1592 // IRQL TEST: must be running at equivalent of passive-level 1593 IRQL_ASSERT_AND_RETURN(!osIsRaisedIRQL()); 1594 1595 if (pMemDesc == NULL) 1596 return NV_ERR_INVALID_ARGUMENT; 1597 1598 offsetAlign = memdescGetPhysAddr(pMemDesc, AT_GPU, 0); 1599 1600 if (owner == NVOS32_BLOCK_TYPE_FREE) 1601 return NV_ERR_INVALID_ARGUMENT; 1602 1603 // Virtual heap allocs are tagged vitual and always own the memdesc 1604 if (memdescGetAddressSpace(pMemDesc) == ADDR_VIRTUAL) 1605 { 1606 OBJVASPACE *pVAS = NULL; 1607 RsClient *pClient; 1608 1609 status = serverGetClientUnderLock(&g_resServ, hClient, &pClient); 1610 if (status != NV_OK) 1611 return status; 1612 1613 status = vaspaceGetByHandleOrDeviceDefault(pClient, hDevice, hVASpace, &pVAS); 1614 if (status != NV_OK) 1615 return status; 1616 1617 status = vaspaceFree(pVAS, offsetAlign); 1618 memdescDestroy(pMemDesc); 1619 return status; 1620 } 1621 1622 // Free up the memory allocated by PMA. 1623 if (pMemDesc->pPmaAllocInfo) 1624 { 1625 FB_ALLOC_INFO *pFbAllocInfo = NULL; 1626 FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat = NULL; 1627 OBJGPU *pMemdescOwnerGpu = NULL; 1628 1629 // 1630 // A memdesc can be duped under a peer device. In that case, before 1631 // freeing FB make sure the GPU which owns the memdesc is available. 1632 // Otherwise, just assert, destroy the memdesc and return NV_OK to 1633 // make sure rest of the clean up happens correctly as we are on 1634 // destroy path. 1635 // Note this is just a WAR till ressrv bring in cleanup of dup objects 1636 // on GPU tear down. 1637 // RS-TODO: Nuke this check once the cleanup is implemented. 1638 // 1639 if (pGpu != pMemDesc->pGpu) 1640 { 1641 if (!gpumgrIsGpuPointerValid(pMemDesc->pGpu)) 1642 { 1643 // 1644 // This should never happen. GPU tear down should always clear 1645 // the duped memory list after resource server implements it. 1646 // For now just assert! 1647 // 1648 NV_ASSERT(0); 1649 memdescDestroy(pMemDesc); 1650 goto pma_free_exit; 1651 } 1652 } 1653 1654 pMemdescOwnerGpu = pMemDesc->pGpu; 1655 1656 // 1657 // Similar to the above WAR, if portMem alocations fail for any reason, 1658 // just assert and return NV_OK to ensure that the rest of the clean up 1659 // happens correctly. 1660 // 1661 pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO)); 1662 if (pFbAllocInfo == NULL) 1663 { 1664 NV_ASSERT(0); 1665 goto pma_free_exit; 1666 } 1667 1668 pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT)); 1669 if (pFbAllocPageFormat == NULL) { 1670 NV_ASSERT(0); 1671 goto pma_free_exit; 1672 } 1673 1674 portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO)); 1675 portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT)); 1676 pFbAllocInfo->hClient = hClient; 1677 pFbAllocInfo->pageFormat = pFbAllocPageFormat; 1678 1679 // 1680 // Do not release any HW resources associated with this allocation 1681 // until the last reference to the allocation is freed. Passing 1682 // hwresid = 0 and format = pitch to memmgrFreeHwResources will ensure 1683 // that no comptags/zcull/zbc resources are freed. 1684 // 1685 if (pMemDesc->RefCount == 1) 1686 { 1687 pFbAllocInfo->hwResId = memdescGetHwResId(pMemDesc); 1688 pFbAllocInfo->format = memdescGetPteKind(pMemDesc); 1689 } 1690 else 1691 { 1692 pFbAllocInfo->hwResId = 0; 1693 pFbAllocInfo->format = 0; 1694 } 1695 pFbAllocInfo->offset = offsetAlign; 1696 pFbAllocInfo->size = pMemDesc->Size; 1697 1698 // Free any HW resources allocated. 1699 memmgrFreeHwResources(pMemdescOwnerGpu, 1700 GPU_GET_MEMORY_MANAGER(pMemdescOwnerGpu), pFbAllocInfo); 1701 1702 if (pMemDesc->pPmaAllocInfo != NULL) 1703 { 1704 // Disabling scrub on free for non compressible surfaces 1705 if (RMCFG_FEATURE_PLATFORM_MODS && 1706 !memmgrIsKind_HAL(GPU_GET_MEMORY_MANAGER(pMemdescOwnerGpu), 1707 FB_IS_KIND_COMPRESSIBLE, 1708 memdescGetPteKind(pMemDesc))) 1709 { 1710 pmaFreeFlag = PMA_FREE_SKIP_SCRUB; 1711 } 1712 1713 vidmemPmaFree(pMemdescOwnerGpu, pHeap, pMemDesc->pPmaAllocInfo, pmaFreeFlag); 1714 NV_PRINTF(LEVEL_INFO, "Freeing PMA allocation\n"); 1715 } 1716 1717 pma_free_exit: 1718 portMemFree(pFbAllocInfo); 1719 portMemFree(pFbAllocPageFormat); 1720 memdescDestroy(pMemDesc); 1721 1722 return NV_OK; 1723 } 1724 1725 return heapFree(pGpu, pHeap, owner, pMemDesc); 1726 } 1727 1728 NV_STATUS 1729 memmgrSetPartitionableMem_IMPL 1730 ( 1731 OBJGPU *pGpu, 1732 MemoryManager *pMemoryManager 1733 ) 1734 { 1735 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 1736 NV2080_CTRL_INTERNAL_MEMSYS_SET_PARTITIONABLE_MEM_PARAMS params = {0}; 1737 Heap *pHeap = GPU_GET_HEAP(pGpu); 1738 NvU64 bottomRsvdSize = 0; 1739 NvU64 topRsvdSize = 0; 1740 NvU32 bottomRegionIdx = 0xFFFF; 1741 NvU32 topRegionIdx = 0xFFFF; 1742 NvU32 i; 1743 NvU64 size; 1744 NvU64 base; 1745 NvU64 offset; 1746 NvU64 freeMem; 1747 1748 // 1749 // Find out the first and the last region for which internal heap or 1750 // bRsvdRegion is true. In Ampere we should never have more than two 1751 // discontigous RM reserved region 1752 // To-Do - Bug 2301972 - Make sure that reserved memory is aligned to VMMU 1753 // segments 1754 // 1755 for (i = 0; i < pMemoryManager->Ram.numFBRegions; i++) 1756 { 1757 if (pMemoryManager->Ram.fbRegion[i].bInternalHeap || 1758 pMemoryManager->Ram.fbRegion[i].bRsvdRegion) 1759 { 1760 NvU64 rsvdSize = (pMemoryManager->Ram.fbRegion[i].limit - 1761 pMemoryManager->Ram.fbRegion[i].base + 1); 1762 1763 // Check if this is bottom reserved region 1764 if (pMemoryManager->Ram.fbRegion[i].base == 0) 1765 { 1766 bottomRegionIdx = i; 1767 bottomRsvdSize += rsvdSize; 1768 } 1769 else if (i > 0 && (pMemoryManager->Ram.fbRegion[i-1].bInternalHeap || 1770 pMemoryManager->Ram.fbRegion[i-1].bRsvdRegion) && 1771 (pMemoryManager->Ram.fbRegion[i].base == pMemoryManager->Ram.fbRegion[i - 1].limit + 1)) 1772 { 1773 // See if this is the contigous region with previous discovery 1774 if (bottomRegionIdx == (i - 1)) 1775 { 1776 // Contigous bottom region 1777 bottomRsvdSize += rsvdSize; 1778 } 1779 else 1780 { 1781 // Contigous top region 1782 topRsvdSize += rsvdSize; 1783 } 1784 } 1785 else 1786 { 1787 // 1788 // Make sure we don't have discontigous reserved regions as 1789 // they are not supported by HW also and we need to support 1790 // these by using blacklisting mechanism. 1791 // 1792 if (topRegionIdx != 0xFFFF) 1793 { 1794 NV_PRINTF(LEVEL_ERROR, 1795 "More than two discontigous rsvd regions found. " 1796 "Rsvd region base - 0x%llx, Rsvd region Size - 0x%llx\n", 1797 pMemoryManager->Ram.fbRegion[i].base, rsvdSize); 1798 NV_ASSERT(0); 1799 return NV_ERR_INVALID_STATE; 1800 } 1801 1802 topRegionIdx = i; 1803 topRsvdSize += rsvdSize; 1804 } 1805 } 1806 } 1807 1808 // 1809 // Sanity check against the biggest available memory chunk. Pick the smallest 1810 // of biggest available memory chunk or calculated total - reserved memory as 1811 // in vGPU we are still using OBJHEAP and there are some allocations which 1812 // happens at the top of the heap before we program this register 1813 // 1814 if (!memmgrIsPmaInitialized(pMemoryManager)) 1815 { 1816 NvU64 bytesTotal; 1817 const NvU64 vgpuHeapWarSize = 256 *1024 * 1024; 1818 NV_ASSERT_OK_OR_RETURN(heapInfo(pHeap, &freeMem, &bytesTotal, &base, 1819 &offset, &size)); 1820 1821 // 1822 // offset is the starting address of biggest empty block whose size is 1823 // returned and we care about the base of largest empty block 1824 // 1825 base = offset; 1826 1827 // 1828 // WAR - Bug-2383259 - TilL PMA is not enabled in vGPU-Host 1829 // we need to delay reserve some memory at the top to full fill lazy 1830 // allocations like FECS and GPCCS uCode. Leave 256MB at the top for 1831 // such lazy allocations 1832 // 1833 if (size > vgpuHeapWarSize) 1834 { 1835 size -= vgpuHeapWarSize; 1836 } 1837 } 1838 else 1839 { 1840 PMA_REGION_DESCRIPTOR *pFirstPmaRegionDesc = NULL; 1841 NvU32 numPmaRegions; 1842 1843 NV_ASSERT_OK_OR_RETURN(pmaGetRegionInfo(&pHeap->pmaObject, 1844 &numPmaRegions, &pFirstPmaRegionDesc)); 1845 1846 base = pFirstPmaRegionDesc->base; 1847 pmaGetFreeMemory(&pHeap->pmaObject, &freeMem); 1848 pmaGetTotalMemory(&pHeap->pmaObject, &size); 1849 1850 // 1851 // MIG won't be used alongside APM and hence the check below is of no use 1852 // Even if we enable the check for APM the check will fail given that after 1853 // enabling "scrub on free" using virtual CE writes, memory gets consumed by 1854 // page tables backing the scrubber channel virtual mappings and hence the 1855 // calculation below no longer holds good 1856 // 1857 if (!gpuIsApmFeatureEnabled(pGpu) || 1858 !memmgrUseVasForCeMemoryOps(pMemoryManager) || 1859 IS_MIG_ENABLED(pGpu)) 1860 { 1861 // 1862 // PMA should be completely free at this point, otherwise we risk 1863 // not setting the right partitionable range (pmaGetLargestFree's 1864 // offset argument is not implemented as of this writing, so we 1865 // only get the base address of the region that contains it). There 1866 // is a known allocation from the top-level scrubber channel that 1867 // is expected to be no larger than 64K. Issue a warning for any 1868 // other uses. 1869 // 1870 if ((size > RM_PAGE_SIZE_64K) && 1871 (freeMem < (size - RM_PAGE_SIZE_64K))) 1872 { 1873 NV_PRINTF(LEVEL_ERROR, 1874 "Assumption that PMA is empty (after accounting for the top-level scrubber) is not met!\n"); 1875 NV_PRINTF(LEVEL_ERROR, 1876 " free space = 0x%llx bytes, total space = 0x%llx bytes\n", 1877 freeMem, size); 1878 NV_ASSERT_OR_RETURN(freeMem >= (size - RM_PAGE_SIZE_64K), 1879 NV_ERR_INVALID_STATE); 1880 } 1881 } 1882 } 1883 1884 if (size == 0) 1885 { 1886 NV_PRINTF(LEVEL_ERROR, 1887 "No partitionable memory. MIG memory partitioning can't be enabled.\n"); 1888 return NV_OK; 1889 } 1890 1891 if (base != bottomRsvdSize) 1892 { 1893 NV_PRINTF(LEVEL_ERROR, 1894 "Partitionable memory start - 0x%llx not aligned with RM reserved " 1895 "region base-end - 0x%llx\n", base, bottomRsvdSize); 1896 return NV_ERR_INVALID_STATE; 1897 } 1898 1899 params.partitionableMemSize = size; 1900 params.bottomRsvdSize = bottomRsvdSize; 1901 params.topRsvdSize = topRsvdSize; 1902 1903 // Call physical MemorySystem to align and program the partitionable range 1904 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 1905 pRmApi->Control(pRmApi, 1906 pGpu->hInternalClient, 1907 pGpu->hInternalSubdevice, 1908 NV2080_CTRL_CMD_INTERNAL_MEMSYS_SET_PARTITIONABLE_MEM, 1909 ¶ms, 1910 sizeof(params))); 1911 1912 pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange = 1913 rangeMake(params.partitionableStartAddr, params.partitionableEndAddr); 1914 1915 // 1916 // Make sure the created range is a valid range. 1917 // rangeIsEmpty checks lo > hi, which should be good enough to catch 1918 // inverted range case. 1919 // 1920 NV_ASSERT_OR_RETURN(!rangeIsEmpty(pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange), 1921 NV_ERR_INVALID_STATE); 1922 1923 if (!KBUS_CPU_VISIBLE_BAR12_DISABLED(pGpu)) 1924 { 1925 NV_ASSERT_OK_OR_RETURN(memmgrSetMIGPartitionableBAR1Range(pGpu, pMemoryManager)); 1926 } 1927 1928 if (IS_GSP_CLIENT(pGpu)) 1929 { 1930 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 1931 1932 // 1933 // The Physical RM initializes its AMAPLIB context via 1934 // memsysSetPartitionableMem_HAL(). The GSP Client RM has a separate 1935 // AMAPLIB context that must also be initialized. 1936 // 1937 kmemsysReadMIGMemoryCfg_HAL(pGpu, pKernelMemorySystem); 1938 } 1939 1940 return NV_OK; 1941 } 1942 1943 NV_STATUS 1944 memmgrFillComprInfo_IMPL 1945 ( 1946 OBJGPU *pGpu, 1947 MemoryManager *pMemoryManager, 1948 NvU64 pageSize, 1949 NvU32 pageCount, 1950 NvU32 kind, 1951 NvU64 surfOffset, 1952 NvU32 compTagStartOffset, 1953 COMPR_INFO *pComprInfo 1954 ) 1955 { 1956 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig = 1957 kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu)); 1958 1959 portMemSet(pComprInfo, 0, sizeof(*pComprInfo)); 1960 1961 pComprInfo->kind = kind; 1962 1963 if (!memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, kind)) 1964 return NV_OK; 1965 1966 NV_ASSERT(compTagStartOffset != ~(NvU32)0); 1967 1968 pComprInfo->compPageShift = pMemorySystemConfig->comprPageShift; 1969 pComprInfo->compTagLineMin = compTagStartOffset; 1970 pComprInfo->compPageIndexLo = (NvU32)(surfOffset >> pComprInfo->compPageShift); 1971 pComprInfo->compPageIndexHi = (NvU32)((surfOffset + pageSize * pageCount - 1) >> pComprInfo->compPageShift); 1972 pComprInfo->compTagLineMultiplier = 1; 1973 1974 return NV_OK; 1975 } 1976 1977 NV_STATUS 1978 memmgrGetKindComprForGpu_KERNEL 1979 ( 1980 MemoryManager *pMemoryManager, 1981 MEMORY_DESCRIPTOR *pMemDesc, 1982 OBJGPU *pMappingGpu, 1983 NvU64 offset, 1984 NvU32 *pKind, 1985 COMPR_INFO *pComprInfo 1986 ) 1987 { 1988 NvU32 ctagId = FB_HWRESID_CTAGID_VAL_FERMI(memdescGetHwResId(pMemDesc)); 1989 NvU32 kind = memdescGetPteKindForGpu(pMemDesc, pMappingGpu); 1990 const MEMORY_SYSTEM_STATIC_CONFIG *pMappingMemSysConfig = 1991 kmemsysGetStaticConfig(pMappingGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pMappingGpu)); 1992 1993 // Compression is not supported on memory not backed by a GPU 1994 if (pMemDesc->pGpu != NULL && memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, kind) && 1995 (ctagId == 0 || ctagId == FB_HWRESID_CTAGID_VAL_FERMI(-1))) 1996 { 1997 portMemSet(pComprInfo, 0, sizeof(*pComprInfo)); 1998 1999 pComprInfo->kind = kind; 2000 pComprInfo->compPageShift = pMappingMemSysConfig->comprPageShift; 2001 pComprInfo->bPhysBasedComptags = NV_TRUE; 2002 pComprInfo->compTagLineMin = 1; 2003 } 2004 else 2005 { 2006 if (ctagId == FB_HWRESID_CTAGID_VAL_FERMI(0xcdcdcdcd)) 2007 { 2008 portMemSet(pComprInfo, 0, sizeof(*pComprInfo)); 2009 2010 pComprInfo->kind = memmgrGetUncompressedKind_HAL(pMappingGpu, pMemoryManager, kind, NV_TRUE); 2011 } 2012 else 2013 { 2014 memmgrFillComprInfoUncompressed(pMemoryManager, kind, pComprInfo); 2015 } 2016 } 2017 2018 *pKind = pComprInfo->kind; 2019 2020 return NV_OK; 2021 } 2022 2023 NV_STATUS 2024 memmgrGetKindComprFromMemDesc_IMPL 2025 ( 2026 MemoryManager *pMemoryManager, 2027 MEMORY_DESCRIPTOR *pMemDesc, 2028 NvU64 offset, 2029 NvU32 *kind, 2030 COMPR_INFO *pComprInfo 2031 ) 2032 { 2033 return memmgrGetKindComprForGpu_HAL(pMemoryManager, pMemDesc, pMemDesc->pGpu, 2034 offset, kind, pComprInfo); 2035 } 2036 2037 void 2038 memmgrSetMIGPartitionableMemoryRange_IMPL 2039 ( 2040 OBJGPU *pGpu, 2041 MemoryManager *pMemoryManager, 2042 NV_RANGE range 2043 ) 2044 { 2045 pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange = range; 2046 } 2047 2048 NV_RANGE 2049 memmgrGetMIGPartitionableMemoryRange_IMPL 2050 ( 2051 OBJGPU *pGpu, 2052 MemoryManager *pMemoryManager 2053 ) 2054 { 2055 return pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange; 2056 } 2057 2058 /* 2059 * @brief Sets total partitionable BAR1 2060 */ 2061 NV_STATUS 2062 memmgrSetMIGPartitionableBAR1Range_IMPL 2063 ( 2064 OBJGPU *pGpu, 2065 MemoryManager *pMemoryManager 2066 ) 2067 { 2068 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 2069 OBJVASPACE *pBar1VAS = kbusGetBar1VASpace_HAL(pGpu, pKernelBus); 2070 OBJEHEAP *pVASHeap; 2071 NvU64 largestFreeOffset = 0; 2072 NvU64 largestFreeSize = 0; 2073 NvU64 partitionableBar1Start; 2074 NvU64 partitionableBar1End; 2075 2076 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_ZERO_FB)) 2077 return NV_OK; 2078 2079 pVASHeap = vaspaceGetHeap(pBar1VAS); 2080 2081 // Get partitionable BAR1 range 2082 pVASHeap->eheapInfo(pVASHeap, NULL, NULL, &largestFreeOffset, &largestFreeSize, NULL, NULL); 2083 2084 // 2085 // We are not considering alignment here because VA space is reserved/allocated in chunks of pages 2086 // so largestFreeOffset should be already aligned. 2087 // 2088 partitionableBar1Start = largestFreeOffset; 2089 partitionableBar1End = largestFreeOffset + largestFreeSize - 1; 2090 NV_ASSERT_OR_RETURN(partitionableBar1Start >= vaspaceGetVaStart(pBar1VAS), NV_ERR_INVALID_STATE); 2091 NV_ASSERT_OR_RETURN(partitionableBar1End <= vaspaceGetVaLimit(pBar1VAS), NV_ERR_INVALID_STATE); 2092 2093 pMemoryManager->MIGMemoryPartitioningInfo.partitionableBar1Range = rangeMake(partitionableBar1Start, partitionableBar1End); 2094 return NV_OK; 2095 } 2096 2097 NV_RANGE 2098 memmgrGetMIGPartitionableBAR1Range_IMPL 2099 ( 2100 OBJGPU *pGpu, 2101 MemoryManager *pMemoryManager 2102 ) 2103 { 2104 return pMemoryManager->MIGMemoryPartitioningInfo.partitionableBar1Range; 2105 } 2106 2107 NV_STATUS 2108 memmgrAllocMIGGPUInstanceMemory_VF 2109 ( 2110 OBJGPU *pGpu, 2111 MemoryManager *pMemoryManager, 2112 NvU32 swizzId, 2113 NvHandle *phMemory, 2114 NV_RANGE *pAddrRange, 2115 Heap **ppMemoryPartitionHeap 2116 ) 2117 { 2118 // For vGpu we have a static memory allocation 2119 *phMemory = NV01_NULL_OBJECT; 2120 *pAddrRange = pMemoryManager->MIGMemoryPartitioningInfo.partitionableMemoryRange; 2121 *ppMemoryPartitionHeap = GPU_GET_HEAP(pGpu); 2122 2123 return NV_OK; 2124 } 2125 2126 // Function to allocate memory for a GPU instance 2127 NV_STATUS 2128 memmgrAllocMIGGPUInstanceMemory_PF 2129 ( 2130 OBJGPU *pGpu, 2131 MemoryManager *pMemoryManager, 2132 NvU32 swizzId, 2133 NvHandle *phMemory, 2134 NV_RANGE *pAddrRange, 2135 Heap **ppMemoryPartitionHeap 2136 ) 2137 { 2138 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 2139 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 2140 NV_STATUS rmStatus = NV_OK; 2141 NvHandle hMemory = 0; 2142 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 2143 2144 NV_ASSERT_OR_RETURN(pKernelMIGManager != NULL, NV_ERR_INVALID_STATE); 2145 NV_ASSERT_OK_OR_RETURN(kmemsysGetMIGGPUInstanceMemInfo(pGpu, pKernelMemorySystem, swizzId, pAddrRange)); 2146 2147 // 2148 // Only allocate memory for non swizzID-0 GPU instances as swizzID-0 owns full 2149 // gpu and there is no need to pre-reserve memory for that and non 2150 // coherent systems. In coherent NUMA systems, NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE 2151 // is not supported and the memory comes from the MIG partition memory 2152 // NUMA node. 2153 // 2154 if (kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, swizzId)) 2155 { 2156 { 2157 // 2158 // Allocate memory using vidHeapControl 2159 // 2160 // vidHeapControl calls should happen outside GPU locks 2161 // This is a PMA requirement as memory allocation calls may invoke eviction 2162 // which UVM could get stuck behind GPU lock 2163 // See Bug 1735851-#24 2164 // 2165 rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL); 2166 2167 // Allocate gpfifo entries 2168 NV_MEMORY_ALLOCATION_PARAMS memAllocParams; 2169 portMemSet(&memAllocParams, 0, sizeof(NV_MEMORY_ALLOCATION_PARAMS)); 2170 memAllocParams.owner = HEAP_OWNER_RM_CLIENT_GENERIC; 2171 memAllocParams.type = NVOS32_TYPE_IMAGE; 2172 memAllocParams.size = rangeLength(*pAddrRange); 2173 memAllocParams.attr = DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM); 2174 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PHYSICALITY, _CONTIGUOUS); 2175 memAllocParams.attr |= DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _DEFAULT); 2176 memAllocParams.attr2 = DRF_DEF(OS32, _ATTR2, _PAGE_OFFLINING, _OFF); // free the offlined pages 2177 memAllocParams.flags |= NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE; 2178 memAllocParams.rangeLo = 0; 2179 memAllocParams.rangeHi = 0; 2180 memAllocParams.offset = pAddrRange->lo; // Offset needed if fixed address allocation 2181 memAllocParams.hVASpace = 0; // Physical allocation 2182 memAllocParams.internalflags = NVOS32_ALLOC_INTERNAL_FLAGS_SKIP_SCRUB; 2183 2184 rmStatus = pRmApi->Alloc(pRmApi, 2185 pMemoryManager->MIGMemoryPartitioningInfo.hClient, 2186 pMemoryManager->MIGMemoryPartitioningInfo.hSubdevice, 2187 &hMemory, 2188 NV01_MEMORY_LOCAL_USER, 2189 &memAllocParams); 2190 2191 // Reaquire the GPU locks 2192 if (rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_MEM) != NV_OK) 2193 { 2194 NV_PRINTF(LEVEL_ERROR, "failed to grab RM-Lock\n"); 2195 DBG_BREAKPOINT(); 2196 rmStatus = NV_ERR_GENERIC; 2197 goto cleanup; 2198 } 2199 2200 if (rmStatus != NV_OK) 2201 { 2202 NV_PRINTF(LEVEL_ERROR, 2203 "Unable to allocate physical memory for GPU instance.\n"); 2204 return rmStatus; 2205 } 2206 } 2207 } 2208 rmStatus = _memmgrInitMIGMemoryPartitionHeap(pGpu, pMemoryManager, swizzId, pAddrRange, ppMemoryPartitionHeap); 2209 if (rmStatus != NV_OK) 2210 { 2211 NV_PRINTF(LEVEL_ERROR, "Unable to initialize memory partition heap\n"); 2212 goto cleanup; 2213 } 2214 2215 NV_PRINTF(LEVEL_INFO, 2216 "Allocated memory partition heap for swizzId - %d with StartAddr - 0x%llx, endAddr - 0x%llx.\n", 2217 swizzId, pAddrRange->lo, pAddrRange->hi); 2218 2219 *phMemory = hMemory; 2220 return rmStatus; 2221 2222 cleanup: 2223 pRmApi->Free(pRmApi, pMemoryManager->MIGMemoryPartitioningInfo.hClient, hMemory); 2224 2225 return rmStatus; 2226 } 2227 2228 // Function to initialize heap for managing MIG partition memory 2229 static NV_STATUS 2230 _memmgrInitMIGMemoryPartitionHeap 2231 ( 2232 OBJGPU *pGpu, 2233 MemoryManager *pMemoryManager, 2234 NvU32 swizzId, 2235 NV_RANGE *pAddrRange, 2236 Heap **ppMemoryPartitionHeap 2237 ) 2238 { 2239 NV_STATUS status = NV_OK; 2240 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 2241 Heap *pMemoryPartitionHeap = NULL; 2242 NvU64 partitionBaseAddr = pAddrRange->lo; 2243 NvU64 partitionSize = rangeLength(*pAddrRange); 2244 2245 // Use default heap for swizzID-0 as we don't prereserve memory for swizzID-0 2246 NV_ASSERT_OR_RETURN(pKernelMIGManager != NULL, NV_ERR_INVALID_STATE); 2247 if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, swizzId)) 2248 { 2249 *ppMemoryPartitionHeap = pMemoryManager->pHeap; 2250 return NV_OK; 2251 } 2252 else 2253 { 2254 *ppMemoryPartitionHeap = NULL; 2255 } 2256 2257 NV_ASSERT_OK_OR_GOTO( 2258 status, 2259 objCreate(ppMemoryPartitionHeap, pMemoryManager, Heap), 2260 fail); 2261 2262 pMemoryPartitionHeap = *ppMemoryPartitionHeap; 2263 2264 if (memmgrIsPmaEnabled(pMemoryManager) && 2265 memmgrIsPmaSupportedOnPlatform(pMemoryManager)) 2266 { 2267 portMemSet(&pMemoryPartitionHeap->pmaObject, 0, sizeof(pMemoryPartitionHeap->pmaObject)); 2268 NV_ASSERT_OK_OR_GOTO( 2269 status, 2270 memmgrPmaInitialize(pGpu, pMemoryManager, &pMemoryPartitionHeap->pmaObject), 2271 fail); 2272 2273 } 2274 2275 NV_ASSERT_OK_OR_GOTO( 2276 status, 2277 heapInit(pGpu, pMemoryPartitionHeap, partitionBaseAddr, 2278 partitionSize, 2279 HEAP_TYPE_PARTITION_LOCAL, 2280 GPU_GFID_PF, 2281 NULL), 2282 fail); 2283 2284 if (memmgrIsPmaInitialized(pMemoryManager) && 2285 (pMemoryPartitionHeap->bHasFbRegions)) 2286 { 2287 NV_ASSERT_OK_OR_GOTO( 2288 status, 2289 memmgrPmaRegisterRegions(pGpu, pMemoryManager, pMemoryPartitionHeap, 2290 &pMemoryPartitionHeap->pmaObject), 2291 fail); 2292 } 2293 2294 if (!IsSLIEnabled(pGpu)) 2295 { 2296 // Do the actual blacklisting of pages from the heap 2297 if (pMemoryPartitionHeap->blackListAddresses.count != 0) 2298 { 2299 status = heapBlackListPages(pGpu, pMemoryPartitionHeap); 2300 2301 if (status != NV_OK) 2302 { 2303 // Warn and continue 2304 NV_PRINTF(LEVEL_WARNING, "Error 0x%x creating blacklist\n", 2305 status); 2306 } 2307 } 2308 } 2309 2310 return NV_OK; 2311 2312 fail: 2313 2314 if (pMemoryPartitionHeap != NULL) 2315 { 2316 objDelete(pMemoryPartitionHeap); 2317 *ppMemoryPartitionHeap = NULL; 2318 } 2319 2320 return status; 2321 } 2322 2323 // Function to free GPU instance memory 2324 NV_STATUS 2325 memmgrFreeMIGGPUInstanceMemory_IMPL 2326 ( 2327 OBJGPU *pGpu, 2328 MemoryManager *pMemoryManager, 2329 NvU32 swizzId, 2330 NvHandle hMemory, 2331 Heap **ppMemoryPartitionHeap 2332 ) 2333 { 2334 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 2335 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 2336 2337 NV_ASSERT_OR_RETURN(pKernelMIGManager != NULL, NV_ERR_INVALID_STATE); 2338 2339 // Nothing to do for swizzId 0 as we neither allocate memory nor allocate new heap object 2340 if (!kmigmgrIsMemoryPartitioningNeeded_HAL(pGpu, pKernelMIGManager, swizzId)) 2341 return NV_OK; 2342 2343 objDelete(*ppMemoryPartitionHeap); 2344 *ppMemoryPartitionHeap = NULL; 2345 2346 // Free allocated memory 2347 if ((hMemory != NV01_NULL_OBJECT)) 2348 { 2349 pRmApi->Free(pRmApi, pMemoryManager->MIGMemoryPartitioningInfo.hClient, hMemory); 2350 } 2351 return NV_OK; 2352 } 2353 2354 void memmgrComprInfoDisableCompression_IMPL 2355 ( 2356 MemoryManager *pMemoryManager, 2357 COMPR_INFO *pComprInfo 2358 ) 2359 { 2360 memmgrFillComprInfoUncompressed(pMemoryManager, pComprInfo->kind, pComprInfo); 2361 } 2362 2363 void memmgrFillComprInfoUncompressed_IMPL 2364 ( 2365 MemoryManager *pMemoryManager, 2366 NvU32 kind, 2367 COMPR_INFO *pComprInfo 2368 ) 2369 { 2370 if (memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, kind)) 2371 kind = memmgrGetUncompressedKind_HAL(ENG_GET_GPU(pMemoryManager), pMemoryManager, kind, NV_FALSE); 2372 2373 portMemSet(pComprInfo, 0, sizeof(*pComprInfo)); 2374 pComprInfo->kind = kind; 2375 } 2376 2377 /*! 2378 * @brief Creates the SW state of the page level pools. 2379 * 2380 * @param pGpu 2381 * @param pMemoryManager 2382 * 2383 * @returns On success, returns NV_OK. 2384 * On failure, returns error code. 2385 */ 2386 NV_STATUS 2387 memmgrPageLevelPoolsCreate_IMPL 2388 ( 2389 OBJGPU *pGpu, 2390 MemoryManager *pMemoryManager 2391 ) 2392 { 2393 NV_STATUS status = NV_OK; 2394 2395 if (RMCFG_FEATURE_PMA && 2396 memmgrIsPmaInitialized(pMemoryManager) && 2397 memmgrAreClientPageTablesPmaManaged(pMemoryManager)) 2398 { 2399 Heap *pHeap = GPU_GET_HEAP(pGpu); 2400 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu); 2401 const GMMU_FMT *pFmt = NULL; 2402 2403 pFmt = kgmmuFmtGet(pKernelGmmu, GMMU_FMT_VERSION_DEFAULT, 0); 2404 NV_ASSERT_OR_RETURN(NULL != pFmt, NV_ERR_INVALID_ARGUMENT); 2405 2406 status = rmMemPoolSetup((void *)&pHeap->pmaObject, &pMemoryManager->pPageLevelReserve, 2407 (pFmt->version == GMMU_FMT_VERSION_1) ? POOL_CONFIG_GMMU_FMT_1 : POOL_CONFIG_GMMU_FMT_2); 2408 2409 NV_ASSERT(NV_OK == status); 2410 2411 } 2412 return status; 2413 } 2414 2415 /*! 2416 * @brief Destroys the SW state of the page level pools. 2417 * 2418 * @param pGpu 2419 * @param pMemoryManager 2420 * 2421 * @returns 2422 */ 2423 void 2424 memmgrPageLevelPoolsDestroy_IMPL 2425 ( 2426 OBJGPU *pGpu, 2427 MemoryManager *pMemoryManager 2428 ) 2429 { 2430 if (RMCFG_FEATURE_PMA && 2431 memmgrIsPmaInitialized(pMemoryManager) && 2432 memmgrAreClientPageTablesPmaManaged(pMemoryManager)) 2433 { 2434 rmMemPoolDestroy(pMemoryManager->pPageLevelReserve); 2435 pMemoryManager->pPageLevelReserve = NULL; 2436 } 2437 } 2438 2439 /*! 2440 * @brief Gets page level pool to use 2441 * 2442 * @param pGpu 2443 * @param pMemoryManager 2444 * @param[in] hClient client handle 2445 * @param[out] ppMemPoolInfo page level pool 2446 * 2447 * @returns On success, returns NV_OK. 2448 * On failure, returns error code. 2449 */ 2450 NV_STATUS 2451 memmgrPageLevelPoolsGetInfo_IMPL 2452 ( 2453 OBJGPU *pGpu, 2454 MemoryManager *pMemoryManager, 2455 NvHandle hClient, 2456 RM_POOL_ALLOC_MEM_RESERVE_INFO **ppMemPoolInfo 2457 ) 2458 { 2459 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 2460 NvBool bMemPartitioningEnabled = (pKernelMIGManager != NULL) && kmigmgrIsMIGMemPartitioningEnabled(pGpu, pKernelMIGManager); 2461 RM_POOL_ALLOC_MEM_RESERVE_INFO *pMemPool = NULL; 2462 NV_ASSERT_OR_RETURN(ppMemPoolInfo != NULL, NV_ERR_INVALID_ARGUMENT); 2463 2464 if (!memmgrIsPmaInitialized(pMemoryManager) || 2465 !memmgrAreClientPageTablesPmaManaged(pMemoryManager)) 2466 { 2467 return NV_ERR_INVALID_STATE; 2468 } 2469 2470 // If memory partitioning is enabled, then use per-partition pool allocator 2471 if (bMemPartitioningEnabled) 2472 { 2473 MIG_INSTANCE_REF ref; 2474 NV_ASSERT_OK_OR_RETURN( 2475 kmigmgrGetInstanceRefFromClient(pGpu, pKernelMIGManager, hClient, &ref)); 2476 pMemPool = ref.pKernelMIGGpuInstance->pPageTableMemPool; 2477 } 2478 else 2479 { 2480 pMemPool = pMemoryManager->pPageLevelReserve; 2481 } 2482 NV_ASSERT_OR_RETURN(pMemPool != NULL, NV_ERR_INVALID_STATE); 2483 2484 *ppMemPoolInfo = pMemPool; 2485 return NV_OK; 2486 } 2487 2488 /*! 2489 * @brief Initialize the PMA object 2490 * 2491 * @param pGpu 2492 * @param pMemoryManager 2493 * @param[in] pPma Pointer to the PMA object to init 2494 * 2495 * @returns On success, returns NV_OK. 2496 * On failure, returns error code. 2497 */ 2498 NV_STATUS 2499 memmgrPmaInitialize_IMPL 2500 ( 2501 OBJGPU *pGpu, 2502 MemoryManager *pMemoryManager, 2503 PMA *pPma 2504 ) 2505 { 2506 NvU32 pmaInitFlags = PMA_INIT_NONE; 2507 NV_STATUS status = NV_OK; 2508 NvBool bNumaEnabled = osNumaOnliningEnabled(pGpu->pOsGpuInfo); 2509 2510 NV_ASSERT(memmgrIsPmaEnabled(pMemoryManager) && 2511 memmgrIsPmaSupportedOnPlatform(pMemoryManager)); 2512 2513 if (memmgrIsPmaForcePersistence(pMemoryManager)) 2514 { 2515 pmaInitFlags |= PMA_INIT_FORCE_PERSISTENCE; 2516 } 2517 2518 if (memmgrIsScrubOnFreeEnabled(pMemoryManager)) 2519 { 2520 pmaInitFlags |= PMA_INIT_SCRUB_ON_FREE; 2521 } 2522 2523 // Disable client page table management on SLI. 2524 if (IsSLIEnabled(pGpu)) 2525 { 2526 memmgrSetClientPageTablesPmaManaged(pMemoryManager, NV_FALSE); 2527 } 2528 2529 if (bNumaEnabled) 2530 { 2531 NV_PRINTF(LEVEL_INFO, "Initializing PMA with NUMA flag.\n"); 2532 pmaInitFlags |= PMA_INIT_NUMA; 2533 } 2534 2535 if (memmgrIsPmaAddrTree(pMemoryManager)) 2536 { 2537 pmaInitFlags |= PMA_INIT_ADDRTREE; 2538 } 2539 2540 status = pmaInitialize(pPma, pmaInitFlags); 2541 if (status != NV_OK) 2542 { 2543 NV_PRINTF(LEVEL_ERROR, "Failed to initialize PMA!\n"); 2544 return status; 2545 } 2546 2547 if (bNumaEnabled) 2548 { 2549 2550 NvU32 numaSkipReclaimVal = NV_REG_STR_RM_NUMA_ALLOC_SKIP_RECLAIM_PERCENTAGE_DEFAULT; 2551 2552 if (osReadRegistryDword(pGpu, NV_REG_STR_RM_NUMA_ALLOC_SKIP_RECLAIM_PERCENTAGE, &numaSkipReclaimVal) == NV_OK) 2553 { 2554 if (numaSkipReclaimVal > NV_REG_STR_RM_NUMA_ALLOC_SKIP_RECLAIM_PERCENTAGE_MAX) 2555 { 2556 numaSkipReclaimVal = NV_REG_STR_RM_NUMA_ALLOC_SKIP_RECLAIM_PERCENTAGE_MAX; 2557 } 2558 } 2559 pmaNumaSetReclaimSkipThreshold(pPma, numaSkipReclaimVal); 2560 2561 } 2562 2563 return NV_OK; 2564 } 2565 2566 NV_STATUS 2567 memmgrInitFbRegions_IMPL 2568 ( 2569 OBJGPU *pGpu, 2570 MemoryManager *pMemoryManager 2571 ) 2572 { 2573 NV_ASSERT_OR_RETURN(pMemoryManager->Ram.numFBRegions == 0, NV_ERR_INVALID_STATE); 2574 2575 // Dont setup regions if FB is broken and we aren't using L2 cache as "FB". 2576 if ((pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) && 2577 !gpuIsCacheOnlyModeEnabled(pGpu))) 2578 return NV_OK; 2579 2580 NV_ASSERT_OK_OR_RETURN(memmgrInitBaseFbRegions_HAL(pGpu, pMemoryManager)); 2581 2582 NV_ASSERT_OK_OR_RETURN(memmgrInitFbRegionsHal_HAL(pGpu, pMemoryManager)); 2583 2584 // 2585 // Build a list of regions sorted by allocation priority 2586 // (highest to lowest). Used for allocations using ObjHeap. 2587 // 2588 memmgrRegenerateFbRegionPriority(pGpu, pMemoryManager); 2589 2590 if (RMCFG_FEATURE_PLATFORM_WINDOWS_LDDM) 2591 { 2592 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_EXTERNAL_HEAP_CONTROL)) 2593 { 2594 // KMD in WDDM mode 2595 if (pMemoryManager->bMixedDensityFbp) 2596 { 2597 // 2598 // For mixed memory on LDDM platforms, when we are using kernel-managed 2599 // heap (not TCC mode), we want to prefer allocating in slow memory to conserve 2600 // fast memory for applications. 2601 // 2602 pMemoryManager->bPreferSlowRegion = NV_TRUE; 2603 } 2604 } 2605 } 2606 2607 NV_ASSERT_OK_OR_RETURN(memmgrSetPlatformPmaSupport(pGpu, pMemoryManager)); 2608 2609 return NV_OK; 2610 } 2611 2612 /*! 2613 * @brief Register regions to the PMA object 2614 * 2615 * @param pGpu 2616 * @param pMemoryManager 2617 * @param[in] pPma Pointer to the PMA object to register with 2618 * 2619 * @returns On success, returns NV_OK. 2620 * On failure, returns error code. 2621 */ 2622 NV_STATUS 2623 memmgrPmaRegisterRegions_IMPL 2624 ( 2625 OBJGPU *pGpu, 2626 MemoryManager *pMemoryManager, 2627 Heap *pHeap, 2628 PMA *pPma 2629 ) 2630 { 2631 HEAP_TYPE_INTERNAL heapType = pHeap->heapType; 2632 PMA_REGION_DESCRIPTOR pmaRegion; 2633 NvU32 pmaRegionIdx = 0; 2634 NvU32 i; 2635 PMA_BLACKLIST_ADDRESS *pBlacklistPages = NULL; 2636 NvU32 blRegionCount = 0; 2637 NvU32 blPageIndex; 2638 NvU32 blackListCount; 2639 NvU64 base, size; 2640 NV_STATUS status = NV_OK; 2641 2642 blackListCount = pHeap->blackListAddresses.count; 2643 base = pHeap->base; 2644 size = pHeap->total; 2645 2646 // 2647 // If there are blacklisted pages, prepare a staging buffer to pass the 2648 // per-region blacklisted pages to PMA 2649 // 2650 if (blackListCount > 0) 2651 { 2652 pBlacklistPages = portMemAllocNonPaged( 2653 sizeof(PMA_BLACKLIST_ADDRESS) * blackListCount); 2654 if (pBlacklistPages == NULL) 2655 { 2656 NV_PRINTF(LEVEL_ERROR, 2657 "Could not allocate memory for blackList!\n"); 2658 status = NV_ERR_NO_MEMORY; 2659 goto _pmaInitFailed; 2660 } 2661 } 2662 2663 for (i = 0; i < pMemoryManager->Ram.numFBRegions; i++) 2664 { 2665 // 2666 // Skip all regions that are completely outside the heap boundry 2667 // OR marked as internal(used for internal RM allocations) 2668 // OR marked as reserved(used for console, display, link training buffer etc.) 2669 // 2670 if ((pMemoryManager->Ram.fbRegion[i].limit < base || 2671 pMemoryManager->Ram.fbRegion[i].base >= (base + size)) || 2672 (pMemoryManager->Ram.fbRegion[i].bInternalHeap) || 2673 (pMemoryManager->Ram.fbRegion[i].bRsvdRegion)) 2674 { 2675 continue; 2676 } 2677 2678 NV_PRINTF(LEVEL_INFO, 2679 "PMA: Register FB region[%d] %llx..%llx EXTERNAL\n", i, 2680 pMemoryManager->Ram.fbRegion[i].base, pMemoryManager->Ram.fbRegion[i].limit); 2681 2682 pmaRegion.base = pMemoryManager->Ram.fbRegion[i].base; 2683 pmaRegion.limit = pMemoryManager->Ram.fbRegion[i].limit; 2684 2685 // Check if the base of managed memory is not based at FB region base. 2686 if (pmaRegion.base < base) 2687 { 2688 pmaRegion.base = base; 2689 } 2690 2691 // check if limit of managed memory is less than FB region limit 2692 if (pmaRegion.limit >= (base + size)) 2693 { 2694 pmaRegion.limit = base + size - 1; 2695 } 2696 2697 pmaRegion.performance = pMemoryManager->Ram.fbRegion[i].performance; 2698 pmaRegion.bSupportCompressed = pMemoryManager->Ram.fbRegion[i].bSupportCompressed; 2699 pmaRegion.bSupportISO = pMemoryManager->Ram.fbRegion[i].bSupportISO; 2700 pmaRegion.bProtected = pMemoryManager->Ram.fbRegion[i].bProtected; 2701 2702 // 2703 // Now we know the region, find if it has any blacklisted pages 2704 // TODO: Try to coalesce to unique 64K pages 2705 // 2706 blRegionCount = 0; 2707 if (pBlacklistPages != NULL) 2708 { 2709 for (blPageIndex = 0; blPageIndex < blackListCount; blPageIndex++) 2710 { 2711 if ((pHeap->blackListAddresses.data[blPageIndex].address 2712 != NV2080_CTRL_FB_OFFLINED_PAGES_INVALID_ADDRESS) && 2713 (pHeap->blackListAddresses.data[blPageIndex].address >= pmaRegion.base) && 2714 (pHeap->blackListAddresses.data[blPageIndex].address <= pmaRegion.limit)) 2715 { 2716 // Collect the region's blacklisted pages 2717 pBlacklistPages[blRegionCount].physOffset = pHeap->blackListAddresses.data[blPageIndex].address; 2718 2719 pBlacklistPages[blRegionCount].bIsDynamic = 2720 ((pHeap->blackListAddresses.data[blPageIndex].type == 2721 NV2080_CTRL_FB_OFFLINED_PAGES_SOURCE_DPR_MULTIPLE_SBE) || 2722 (pHeap->blackListAddresses.data[blPageIndex].type == 2723 NV2080_CTRL_FB_OFFLINED_PAGES_SOURCE_DPR_DBE)); 2724 2725 blRegionCount++; 2726 } 2727 } 2728 } 2729 2730 NV_PRINTF(LEVEL_INFO, 2731 "Register FB region %llx..%llx of size %llx with PMA\n", 2732 pmaRegion.base, pmaRegion.limit, 2733 pmaRegion.limit - pmaRegion.base + 1); 2734 // 2735 // Register the region for PMA management, and note if asynchronous 2736 // scrubbing is enabled. Synchronous scrubbing is done before 2737 // heap/PMA is initialized, but asynchronously scrubbed pages will 2738 // need to be unmarked once they are scrubbed. 2739 // 2740 status = pmaRegisterRegion(pPma, pmaRegionIdx, 2741 memmgrEccScrubInProgress_HAL(pGpu, pMemoryManager), 2742 &pmaRegion, blRegionCount, 2743 ((blRegionCount==0) ? NULL : pBlacklistPages)); 2744 if (status != NV_OK) 2745 { 2746 NV_PRINTF(LEVEL_ERROR, 2747 "failed to register FB region %llx..%llx with PMA\n", 2748 pmaRegion.base, pmaRegion.limit); 2749 DBG_BREAKPOINT(); 2750 goto _pmaInitFailed; 2751 } 2752 pmaRegionIdx++; 2753 } 2754 2755 // 2756 // bug #200354346, make sure the RM reserved region(s) are 2757 // scrubbed during the region creation itself. Top Down scrubber, 2758 // skips the RM reserved region(s) because the assumption is, they 2759 // are pre-scrubbed. 2760 // 2761 if (heapType != HEAP_TYPE_PARTITION_LOCAL) 2762 memmgrScrubInternalRegions_HAL(pGpu, pMemoryManager); 2763 2764 _pmaInitFailed: 2765 portMemFree(pBlacklistPages); 2766 2767 if ((status == NV_OK) && (pMemoryManager->fbOverrideStartKb != 0)) 2768 { 2769 NvU64 allocSize = NV_ALIGN_UP(((NvU64)pMemoryManager->fbOverrideStartKb << 10), PMA_GRANULARITY); 2770 NvU32 numPages = (NvU32)(allocSize >> PMA_PAGE_SHIFT); 2771 PMA_ALLOCATION_OPTIONS allocOptions = {0}; 2772 2773 allocOptions.flags = PMA_ALLOCATE_CONTIGUOUS; 2774 allocOptions.flags |= PMA_ALLOCATE_SPECIFY_ADDRESS_RANGE; 2775 allocOptions.physBegin = 0; 2776 allocOptions.physEnd = allocSize - 1; 2777 2778 // This is intentionally thrown away 2779 NvU64 *pPages = NULL; 2780 pPages = portMemAllocNonPaged(numPages * sizeof(NvU64)); 2781 if (pPages != NULL) 2782 { 2783 // Accommodate the regkey override for FB start 2784 status = pmaAllocatePages(pPma, numPages, _PMA_64KB, &allocOptions, pPages); 2785 portMemFree(pPages); 2786 } 2787 } 2788 2789 if (status != NV_OK) 2790 { 2791 if (memmgrIsPmaInitialized(pMemoryManager)) 2792 { 2793 if (heapType != HEAP_TYPE_PARTITION_LOCAL) 2794 { 2795 memmgrSetPmaInitialized(pMemoryManager, NV_FALSE); 2796 } 2797 pmaDestroy(pPma); 2798 } 2799 } 2800 2801 return status; 2802 } 2803 2804 /*! 2805 * @brief Allocate internal handles for MIG partition memory allocation 2806 */ 2807 NV_STATUS 2808 memmgrAllocMIGMemoryAllocationInternalHandles_IMPL 2809 ( 2810 OBJGPU *pGpu, 2811 MemoryManager *pMemoryManager 2812 ) 2813 { 2814 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 2815 2816 NV_ASSERT_OR_RETURN(pMemoryManager->MIGMemoryPartitioningInfo.hClient == NV01_NULL_OBJECT, NV_ERR_INVALID_STATE); 2817 NV_ASSERT_OK_OR_RETURN( 2818 rmapiutilAllocClientAndDeviceHandles(pRmApi, pGpu, 2819 &pMemoryManager->MIGMemoryPartitioningInfo.hClient, 2820 &pMemoryManager->MIGMemoryPartitioningInfo.hDevice, 2821 &pMemoryManager->MIGMemoryPartitioningInfo.hSubdevice)); 2822 2823 return NV_OK; 2824 } 2825 2826 /*! 2827 * @brief Free internal handles used to support MIG memory partitioning 2828 */ 2829 void 2830 memmgrFreeMIGMemoryAllocationInternalHandles_IMPL 2831 ( 2832 OBJGPU *pGpu, 2833 MemoryManager *pMemoryManager 2834 ) 2835 { 2836 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 2837 2838 rmapiutilFreeClientAndDeviceHandles(pRmApi, 2839 &pMemoryManager->MIGMemoryPartitioningInfo.hClient, 2840 &pMemoryManager->MIGMemoryPartitioningInfo.hDevice, 2841 &pMemoryManager->MIGMemoryPartitioningInfo.hSubdevice); 2842 } 2843 2844 /*! 2845 * @brief Gets free memory (client visible) for all valid GPU instances 2846 */ 2847 void 2848 memmgrGetFreeMemoryForAllMIGGPUInstances_IMPL 2849 ( 2850 OBJGPU *pGpu, 2851 MemoryManager *pMemoryManager, 2852 NvU64 *pBytes 2853 ) 2854 { 2855 NvU64 val = 0; 2856 Heap *pHeap = NULL; 2857 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 2858 KERNEL_MIG_GPU_INSTANCE *pKernelMIGGPUInstance; 2859 2860 *pBytes = 0; 2861 2862 FOR_EACH_VALID_GPU_INSTANCE(pGpu, pKernelMIGManager, pKernelMIGGPUInstance) 2863 { 2864 NV_ASSERT(pKernelMIGGPUInstance->pMemoryPartitionHeap != NULL); 2865 pHeap = pKernelMIGGPUInstance->pMemoryPartitionHeap; 2866 2867 if (memmgrIsPmaInitialized(pMemoryManager)) 2868 pmaGetFreeMemory(&pHeap->pmaObject, &val); 2869 else 2870 heapGetFree(pHeap, &val); 2871 2872 *pBytes += val; 2873 } 2874 FOR_EACH_VALID_GPU_INSTANCE_END(); 2875 } 2876 2877 void 2878 memmgrGetTopLevelScrubberStatus_IMPL 2879 ( 2880 OBJGPU *pGpu, 2881 MemoryManager *pMemoryManager, 2882 NvBool *pbTopLevelScrubberEnabled, 2883 NvBool *pbTopLevelScrubberConstructed 2884 ) 2885 { 2886 NvBool bTopLevelScrubberEnabled = NV_FALSE; 2887 NvBool bTopLevelScrubberConstructed = NV_FALSE; 2888 NvU32 pmaConfigs = PMA_QUERY_SCRUB_ENABLED | PMA_QUERY_SCRUB_VALID; 2889 2890 if (memmgrIsPmaInitialized(pMemoryManager)) 2891 { 2892 Heap *pHeap = GPU_GET_HEAP(pGpu); 2893 NV_ASSERT_OK(pmaQueryConfigs(&pHeap->pmaObject, &pmaConfigs)); 2894 bTopLevelScrubberEnabled = (pmaConfigs & PMA_QUERY_SCRUB_ENABLED) != 0x0; 2895 bTopLevelScrubberConstructed = (pmaConfigs & PMA_QUERY_SCRUB_VALID) != 0x0; 2896 } 2897 2898 if (pbTopLevelScrubberEnabled != NULL) 2899 *pbTopLevelScrubberEnabled = bTopLevelScrubberEnabled; 2900 if (pbTopLevelScrubberConstructed != NULL) 2901 *pbTopLevelScrubberConstructed = bTopLevelScrubberConstructed; 2902 } 2903 2904 /** 2905 * @brief Save pre-MIG top level scrubber constructed status and teardown if constructed 2906 */ 2907 NV_STATUS 2908 memmgrSaveAndDestroyTopLevelScrubber_IMPL 2909 ( 2910 OBJGPU *pGpu, 2911 MemoryManager *pMemoryManager 2912 ) 2913 { 2914 // Save the pre-MIG top-level scrubber status for later 2915 memmgrGetTopLevelScrubberStatus(pGpu, pMemoryManager, NULL, &pMemoryManager->MIGMemoryPartitioningInfo.bNonMIGTopLevelScrubber); 2916 2917 // Destroy the top level scrubber if it exists 2918 if (pMemoryManager->MIGMemoryPartitioningInfo.bNonMIGTopLevelScrubber) 2919 { 2920 // Delete top level scrubber 2921 NV_ASSERT_OK_OR_RETURN(memmgrScrubHandlePreSchedulingDisable_HAL(pGpu, pMemoryManager)); 2922 } 2923 2924 return NV_OK; 2925 } 2926 2927 /** 2928 * @brief Init top level scrubber if previous status was constructed 2929 */ 2930 NV_STATUS 2931 memmgrInitSavedTopLevelScrubber_IMPL 2932 ( 2933 OBJGPU *pGpu, 2934 MemoryManager *pMemoryManager 2935 ) 2936 { 2937 if (!pMemoryManager->MIGMemoryPartitioningInfo.bNonMIGTopLevelScrubber) 2938 return NV_OK; 2939 2940 NV_ASSERT_OK_OR_RETURN(memmgrScrubHandlePostSchedulingEnable_HAL(pGpu, pMemoryManager)); 2941 2942 return NV_OK; 2943 } 2944 2945 /*! 2946 * @brief Return the full address range for the partition assigend for the vGPU. 2947 * 2948 * @param[in] pGpu 2949 * @param[in] pMemoryManager 2950 * @param[out] base reference to the base address of the partition 2951 * @param[out] size reference to the overall size of the partition 2952 */ 2953 static void 2954 _memmgrGetFullMIGAddrRange 2955 ( 2956 OBJGPU *pGpu, 2957 MemoryManager *pMemoryManager, 2958 NvU64 *base, 2959 NvU64 *size 2960 ) 2961 { 2962 NvU32 i; 2963 NvU64 lo, hi; 2964 2965 *base = 0; 2966 *size = 0; 2967 if (pMemoryManager->Ram.numFBRegions == 0) 2968 { 2969 return; 2970 } 2971 2972 lo = pMemoryManager->Ram.fbRegion[0].base; 2973 hi = pMemoryManager->Ram.fbRegion[0].limit; 2974 2975 for (i = 1; i < pMemoryManager->Ram.numFBRegions; i++) 2976 { 2977 if (pMemoryManager->Ram.fbRegion[i].base < lo) 2978 { 2979 lo = pMemoryManager->Ram.fbRegion[i].base; 2980 } 2981 2982 if (pMemoryManager->Ram.fbRegion[i].limit > hi) 2983 { 2984 hi = pMemoryManager->Ram.fbRegion[i].limit; 2985 } 2986 } 2987 2988 *base = lo; 2989 *size = hi - lo + 1; 2990 } 2991 2992 /*! 2993 * @brief Discover MIG partitionable memory range based on PMA status 2994 */ 2995 NV_STATUS 2996 memmgrDiscoverMIGPartitionableMemoryRange_VF 2997 ( 2998 OBJGPU *pGpu, 2999 MemoryManager *pMemoryManager, 3000 NV_RANGE *pMemoryRange 3001 ) 3002 { 3003 NvU64 size; 3004 NvU64 base; 3005 3006 // Set memory information 3007 if (!memmgrIsPmaInitialized(pMemoryManager)) 3008 { 3009 Heap *pHeap = GPU_GET_HEAP(pGpu); 3010 NvU64 freeMem; 3011 NvU64 bytesTotal; 3012 NvU64 offset; 3013 3014 NV_ASSERT_OK_OR_RETURN(heapInfo(pHeap, &freeMem, &bytesTotal, &base, 3015 &offset, &size)); 3016 3017 // 3018 // offset is the starting address of biggest empty block whose size is 3019 // returned and we care about the base of largest empty block 3020 // 3021 base = offset; 3022 } 3023 else 3024 { 3025 // 3026 // In the case of vGPU, pmaGetLargestFree only returns the user-visible 3027 // PMA region and not the reserved/internal regions that constitute the 3028 // overall partition size assigned to the vGPU. 3029 // This is misleading as pMemoryManager->partitionableMemoryRange is expected to 3030 // represent the actual partition size. 3031 // 3032 _memmgrGetFullMIGAddrRange(pGpu, pMemoryManager, &base, &size); 3033 } 3034 3035 *pMemoryRange = rangeMake(base, base + size - 1); 3036 3037 return NV_OK; 3038 } 3039 3040 NV_STATUS 3041 memmgrReserveMemoryForFsp_IMPL 3042 ( 3043 OBJGPU *pGpu, 3044 MemoryManager *pMemoryManager 3045 ) 3046 { 3047 KernelFsp *pKernelFsp = GPU_GET_KERNEL_FSP(pGpu); 3048 3049 // 3050 // If we sent FSP commands to boot ACR, we need to allocate the surfaces 3051 // used by FSP and ACR as WPR/FRTS here from the reserved heap 3052 // 3053 if (pKernelFsp && (!pKernelFsp->getProperty(pKernelFsp, PDB_PROP_KFSP_DISABLE_FRTS_VIDMEM) && 3054 (pKernelFsp->getProperty(pKernelFsp, PDB_PROP_KFSP_BOOT_COMMAND_OK)))) 3055 { 3056 3057 // For GSP-RM flow, we don't need to allocate WPR since it is handled by CPU 3058 if (pKernelFsp->getProperty(pKernelFsp, PDB_PROP_KFSP_GSP_MODE_GSPRM)) 3059 { 3060 return NV_OK; 3061 } 3062 3063 } 3064 return NV_OK; 3065 } 3066 3067 NvU64 3068 memmgrGetVgpuHostRmReservedFb_KERNEL 3069 ( 3070 OBJGPU *pGpu, 3071 MemoryManager *pMemoryManager, 3072 NvU32 vgpuTypeId 3073 ) 3074 { 3075 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu); 3076 NV2080_CTRL_INTERNAL_MEMMGR_GET_VGPU_CONFIG_HOST_RESERVED_FB_PARAMS params = {0}; 3077 3078 params.vgpuTypeId = vgpuTypeId; 3079 // Send to GSP to get amount of FB reserved for the host 3080 NV_ASSERT_OK_OR_RETURN(pRmApi->Control(pRmApi, 3081 pGpu->hInternalClient, 3082 pGpu->hInternalSubdevice, 3083 NV2080_CTRL_CMD_INTERNAL_MEMMGR_GET_VGPU_CONFIG_HOST_RESERVED_FB, 3084 ¶ms, 3085 sizeof(params))); 3086 return params.hostReservedFb; 3087 } 3088