1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "mem_mgr/video_mem.h" 25 #include "gpu/mem_mgr/mem_desc.h" 26 #include "gpu/mem_mgr/heap.h" 27 #include "gpu/mem_mgr/mem_mgr.h" 28 #include "gpu/mem_mgr/mem_utils.h" 29 #include "gpu/mem_sys/kern_mem_sys.h" 30 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h" 31 #include "vgpu/rpc.h" 32 #include "core/locks.h" 33 #include "kernel/gpu/rc/kernel_rc.h" 34 #include "diagnostics/gpu_acct.h" 35 #include "Nvcm.h" 36 #include "gpu/bus/third_party_p2p.h" 37 #include "gpu/bus/kern_bus.h" 38 39 #include "class/cl0040.h" // NV01_MEMORY_LOCAL_USER 40 41 /*! 42 * _vidmemQueryAlignment 43 * 44 * @brief 45 * Returns the size and alignment for this allocation. 46 * 47 * @param[in] pRVHCP Pointer to RmVidHeapControlParams data 48 * @param[in] pAllocData Pointer to VIDHEAP_ALLOC_DATA 49 * @param[out] pSize The size aligned to the HW/requested alignment 50 * @param[out] pAlign The alignment required for this allocation. 51 52 * @returns 53 * NV_OK Operation is successful. 54 */ 55 static NV_STATUS 56 _vidmemQueryAlignment 57 ( 58 MEMORY_ALLOCATION_REQUEST *pAllocRequest, 59 NvU64 *pSize, 60 NvU64 *pAlign 61 ) 62 { 63 NV_MEMORY_ALLOCATION_PARAMS *pAllocData = pAllocRequest->pUserParams; 64 OBJGPU *pGpu = pAllocRequest->pGpu; 65 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 66 NV_STATUS rmStatus = NV_OK; 67 NvU64 size = pAllocData->size; 68 NvU64 pageSize = 0; 69 NvU64 align = 0; 70 NvU32 retAttr = pAllocData->attr; 71 NvU32 retAttr2 = pAllocData->attr2; 72 73 NV_ASSERT_OR_RETURN((NULL != pSize) && (NULL != pAlign), 74 NV_ERR_INVALID_ARGUMENT); 75 76 if ((pAllocData->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_HINT) || 77 (pAllocData->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE)) 78 align = pAllocData->alignment; 79 else 80 align = RM_PAGE_SIZE; 81 82 // Fetch RM page size 83 pageSize = memmgrDeterminePageSize(pMemoryManager, pAllocRequest->hClient, size, pAllocData->format, 84 pAllocData->flags, &retAttr, &retAttr2); 85 86 if (pageSize == 0) 87 { 88 rmStatus = NV_ERR_INVALID_STATE; 89 NV_ASSERT_OK_FAILED("memmgrDeterminePageSize", rmStatus); 90 return rmStatus; 91 } 92 93 // Fetch memory alignment 94 NV_ASSERT_OK_OR_RETURN(memmgrAllocDetermineAlignment_HAL(pGpu, pMemoryManager, 95 &size, &align, 0, pAllocData->flags, retAttr, retAttr2, 0)); 96 97 *pSize = size; 98 *pAlign = align + 1; 99 100 return rmStatus; 101 } 102 103 /*! 104 * _vidmemPmaAllocate 105 * 106 * @brief 107 * Allocates memory on vidmem through PMA. 108 * 109 * @param[in] pHeap Pointer to Heap object 110 * @param[in] pAllocRequest Pointer to the MEMORY_ALLOCATION_REQUEST. 111 * 112 * @returns 113 * NV_OK Operation is successful 114 * NV_ERR_* Error code in case of errors. 115 */ 116 static NV_STATUS 117 _vidmemPmaAllocate 118 ( 119 Heap *pHeap, 120 MEMORY_ALLOCATION_REQUEST *pAllocRequest 121 ) 122 { 123 NV_MEMORY_ALLOCATION_PARAMS *pAllocData = pAllocRequest->pUserParams; 124 OBJGPU *pGpu = pAllocRequest->pGpu; 125 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 126 PMA *pPma = &pHeap->pmaObject; 127 NvU64 size = 0; 128 NvU32 pageCount; 129 NvU32 pmaInfoSize; 130 NvU64 pageSize; 131 NV_STATUS status; 132 NvU64 sizeAlign = 0; 133 PMA_ALLOCATION_OPTIONS allocOptions = {0}; 134 NvBool bContig; 135 NvU32 subdevInst = gpumgrGetSubDeviceInstanceFromGpu(pGpu); 136 NvBool bCompressed = !FLD_TEST_DRF(OS32, _ATTR, _COMPR, 137 _NONE, pAllocData->attr); 138 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 139 NvU32 gfid; 140 NvU32 pmaConfig = PMA_QUERY_NUMA_ENABLED; 141 142 status = pmaQueryConfigs(pPma, &pmaConfig); 143 NV_ASSERT(status == NV_OK); 144 145 // 146 // In NUMA platforms, contig memory is allocated using page order from 147 // kernel and that could lead to memory wastage when the size is not 148 // naturally aligned to page order. Prefer non-contig when clients 149 // are okay with NON_CONTIG. 150 // 151 if ((status == NV_OK) && (pmaConfig & PMA_QUERY_NUMA_ENABLED)) 152 { 153 bContig = 154 !FLD_TEST_DRF(OS32, _ATTR, _PHYSICALITY, 155 _ALLOW_NONCONTIGUOUS, pAllocData->attr) && 156 !FLD_TEST_DRF(OS32, _ATTR, _PHYSICALITY, 157 _NONCONTIGUOUS, pAllocData->attr); 158 } 159 else 160 { 161 bContig = !FLD_TEST_DRF(OS32, _ATTR, 162 _PHYSICALITY, _NONCONTIGUOUS, 163 pAllocData->attr); 164 } 165 166 // LOCK: acquire device lock 167 status = rmDeviceGpuLocksAcquire(pGpu, GPUS_LOCK_FLAGS_NONE, 168 RM_LOCK_MODULES_MEM_PMA); 169 NV_ASSERT_OR_RETURN(status == NV_OK, status); 170 171 if (bCompressed && 172 (vgpuGetCallingContextGfid(pGpu, &gfid) == NV_OK) && 173 pKernelBus->bar1[gfid].bStaticBar1Enabled) 174 { 175 // Override the attr to use 2MB page size 176 pAllocData->attr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _HUGE, pAllocData->attr); 177 178 NV_PRINTF(LEVEL_INFO, 179 "Overrode the page size to 2MB on this compressed vidmem for the static bar1\n"); 180 } 181 182 NV_PRINTF(LEVEL_INFO, "PMA input\n"); 183 NV_PRINTF(LEVEL_INFO, " Owner: 0x%x\n", pAllocData->owner); 184 NV_PRINTF(LEVEL_INFO, " hMemory: 0x%x\n", pAllocRequest->hMemory); 185 NV_PRINTF(LEVEL_INFO, " Type: 0x%x\n", pAllocData->type); 186 NV_PRINTF(LEVEL_INFO, " Flags: 0x%x\n", pAllocData->flags); 187 NV_PRINTF(LEVEL_INFO, " Begin: 0x%08llx\n", pAllocData->rangeLo); 188 NV_PRINTF(LEVEL_INFO, " End: 0x%08llx\n", pAllocData->rangeHi); 189 NV_PRINTF(LEVEL_INFO, " Height: 0x%x\n", pAllocData->height); 190 NV_PRINTF(LEVEL_INFO, " Width: 0x%x\n", pAllocData->width); 191 NV_PRINTF(LEVEL_INFO, " Pitch: 0x%x\n", pAllocData->pitch); 192 NV_PRINTF(LEVEL_INFO, " Size: 0x%08llx\n", pAllocData->size); 193 NV_PRINTF(LEVEL_INFO, " Alignment: 0x%08llx\n", 194 pAllocData->alignment); 195 NV_PRINTF(LEVEL_INFO, " Offset: 0x%08llx\n", pAllocData->offset); 196 NV_PRINTF(LEVEL_INFO, " Attr: 0x%x\n", pAllocData->attr); 197 NV_PRINTF(LEVEL_INFO, " Attr2: 0x%x\n", pAllocData->attr2); 198 NV_PRINTF(LEVEL_INFO, " Format: 0x%x\n", pAllocData->format); 199 NV_PRINTF(LEVEL_INFO, " ComprCovg: 0x%x\n", pAllocData->comprCovg); 200 NV_PRINTF(LEVEL_INFO, " ZCullCovg: 0x%x\n", pAllocData->zcullCovg); 201 NV_PRINTF(LEVEL_INFO, " CtagOffset: 0x%x\n", pAllocData->ctagOffset); 202 NV_PRINTF(LEVEL_INFO, " hVASpace: 0x%x\n", pAllocData->hVASpace); 203 204 // Get the page size returned by RM. 205 pageSize = stdmemQueryPageSize(pMemoryManager, pAllocRequest->hClient, pAllocData); 206 NV_ASSERT_OR_RETURN(pageSize != 0, NV_ERR_INVALID_STATE); 207 208 if (pageSize == RM_PAGE_SIZE) 209 { 210 // 211 // TODO Remove this after the suballocator is in place 212 // Minimum granularity of PMA is 64K. 213 // 214 pageSize = RM_PAGE_SIZE_64K; 215 } 216 217 // Get the alignment returned by RM without actually allocating. 218 status = _vidmemQueryAlignment(pAllocRequest, &size, &sizeAlign); 219 220 // 221 // Pass the turn blacklist off flag to PMA Allocation API 222 // No need for FB check since PMA only manages FB 223 // Bug:2451834, gpuCheckPageRetirementSupport should not be called outside 224 // RM lock. 225 // 226 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_ALLOW_PAGE_RETIREMENT) && 227 gpuCheckPageRetirementSupport_HAL(pGpu) && 228 FLD_TEST_DRF(OS32, _ATTR2, _BLACKLIST, _OFF, pAllocData->attr2)) 229 { 230 allocOptions.flags |= PMA_ALLOCATE_TURN_BLACKLIST_OFF; 231 } 232 233 // UNLOCK: release device lock 234 rmDeviceGpuLocksRelease(pGpu, GPUS_LOCK_FLAGS_NONE, NULL); 235 236 NV_ASSERT_OR_RETURN(NV_OK == status, status); 237 238 // RM allocations are always pinned. 239 allocOptions.flags |= PMA_ALLOCATE_PINNED; 240 241 if (pAllocData->flags & NVOS32_ALLOC_FLAGS_PERSISTENT_VIDMEM) 242 { 243 allocOptions.flags |= PMA_ALLOCATE_PERSISTENT; 244 } 245 246 // Check for VPR region. 247 if (pAllocData->flags & NVOS32_ALLOC_FLAGS_PROTECTED) 248 { 249 allocOptions.flags |= PMA_ALLOCATE_PROTECTED_REGION; 250 } 251 252 // Check memory alloc direction. 253 if (pAllocData->flags & NVOS32_ALLOC_FLAGS_FORCE_REVERSE_ALLOC) 254 { 255 allocOptions.flags |= PMA_ALLOCATE_REVERSE_ALLOC; 256 } 257 258 // Fixed address allocations. 259 if (pAllocData->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE) 260 { 261 NvU64 offsetAlign = NV_MAX(sizeAlign, pageSize); 262 263 allocOptions.flags |= PMA_ALLOCATE_SPECIFY_ADDRESS_RANGE; 264 allocOptions.physBegin = NV_ALIGN_DOWN(pAllocData->offset, offsetAlign); 265 allocOptions.physEnd = NV_ALIGN_UP(allocOptions.physBegin + size, offsetAlign) - 1; 266 size = allocOptions.physEnd - allocOptions.physBegin + 1; 267 } 268 269 // Range based allocations. 270 if (pAllocData->flags & NVOS32_ALLOC_FLAGS_USE_BEGIN_END) 271 { 272 allocOptions.flags |= PMA_ALLOCATE_SPECIFY_ADDRESS_RANGE; 273 allocOptions.physBegin = NV_ALIGN_UP(pAllocData->rangeLo, ((NvU64)pageSize)); 274 allocOptions.physEnd = NV_ALIGN_DOWN(pAllocData->rangeHi, ((NvU64)pageSize)); 275 allocOptions.physEnd = (allocOptions.physEnd > 0) ? 276 allocOptions.physEnd - 1 : 0; 277 NV_ASSERT_OR_RETURN(allocOptions.physBegin <= allocOptions.physEnd, 278 NV_ERR_INVALID_ARGUMENT); 279 } 280 281 // Skip scrubber, used only by scrubber construction path 282 if (pAllocData->internalflags & NVOS32_ALLOC_INTERNAL_FLAGS_SKIP_SCRUB) 283 { 284 allocOptions.flags |= PMA_ALLOCATE_NO_ZERO; 285 } 286 287 // Pass along client requested alignment 288 allocOptions.flags |= PMA_ALLOCATE_FORCE_ALIGNMENT; 289 allocOptions.alignment = NV_MAX(sizeAlign, pageSize); 290 291 292 // Get the number of pages to be allocated by PMA 293 pageCount = (NvU32) NV_DIV_AND_CEIL(size, pageSize); 294 295 retry_alloc: 296 // Evaluate the size of the PMA_ALLOC_INFO struct. 297 if (bContig) 298 { 299 allocOptions.flags |= PMA_ALLOCATE_CONTIGUOUS; 300 pmaInfoSize = sizeof(PMA_ALLOC_INFO); 301 } 302 else 303 { 304 pmaInfoSize = sizeof(PMA_ALLOC_INFO) + ((pageCount - 1) * sizeof(NvU64)); 305 } 306 307 // Alloc the tracking structure and store the values in it. 308 pAllocRequest->pPmaAllocInfo[subdevInst] = portMemAllocNonPaged(pmaInfoSize); 309 NV_ASSERT_OR_RETURN(NULL != pAllocRequest->pPmaAllocInfo[subdevInst], NV_ERR_NO_MEMORY); 310 portMemSet(pAllocRequest->pPmaAllocInfo[subdevInst], 0, pmaInfoSize); 311 312 pAllocRequest->pPmaAllocInfo[subdevInst]->pageSize = pageSize; 313 pAllocRequest->pPmaAllocInfo[subdevInst]->pageCount = pageCount; 314 pAllocRequest->pPmaAllocInfo[subdevInst]->allocSize = ((NvU64) pageCount) * pageSize; 315 pAllocRequest->pPmaAllocInfo[subdevInst]->bContig = bContig; 316 pAllocRequest->pPmaAllocInfo[subdevInst]->refCount = 1; 317 pAllocRequest->pPmaAllocInfo[subdevInst]->flags = allocOptions.flags; 318 319 NV_PRINTF(LEVEL_INFO, "\nNVRM: Size requested: 0x%llx bytes\n", size); 320 NV_PRINTF(LEVEL_INFO, " PageSize: 0x%llx bytes\n", pageSize); 321 NV_PRINTF(LEVEL_INFO, " PageCount: 0x%x\n", pageCount); 322 NV_PRINTF(LEVEL_INFO, " Actual Size: 0x%llx\n", 323 pAllocRequest->pPmaAllocInfo[subdevInst]->allocSize); 324 NV_PRINTF(LEVEL_INFO, " Contiguous: %s\n", bContig ? "YES" : "NO"); 325 326 // Get the allocation from PMA. 327 status = pmaAllocatePages(pPma, pageCount, pageSize, &allocOptions, 328 pAllocRequest->pPmaAllocInfo[subdevInst]->pageArray); 329 if (NV_OK != status) 330 { 331 portMemFree(pAllocRequest->pPmaAllocInfo[subdevInst]); 332 pAllocRequest->pPmaAllocInfo[subdevInst] = NULL; 333 334 if (bContig) 335 { 336 if (FLD_TEST_DRF(OS32, _ATTR, _PHYSICALITY, _ALLOW_NONCONTIGUOUS, pAllocData->attr) || 337 (FLD_TEST_DRF(OS32, _ATTR, _PHYSICALITY, _DEFAULT, pAllocData->attr) && 338 pHeap->getProperty(pHeap, PDB_PROP_HEAP_NONCONTIG_ALLOC_BY_DEFAULT))) 339 { 340 bContig = NV_FALSE; 341 allocOptions.flags &= ~PMA_ALLOCATE_CONTIGUOUS; 342 NV_PRINTF(LEVEL_INFO, 343 "pmaAllocatePages failed -- retrying as noncontiguous\n"); 344 goto retry_alloc; 345 } 346 } 347 348 NV_PRINTF(LEVEL_WARNING, "pmaAllocatePages failed (%x)\n", status); 349 } 350 else 351 { 352 pAllocData->attr = (bContig ? 353 FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY, _CONTIGUOUS, pAllocData->attr) : 354 FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY, _NONCONTIGUOUS, pAllocData->attr)); 355 } 356 357 return status; 358 } 359 360 /*! 361 * vidmemPmaFree 362 * 363 * @brief 364 * Frees the memory allocated by PMA 365 * 366 * @param[in] pGpu Pointer to OBJGPU 367 * @param[in] pHeap Pointer to Heap object 368 * @param[in] pPmaAllocInfo Pointer to the PMA allocation tracking structure 369 * @param[in] flag Flags to modify PMA behavior 370 * 371 * @returns NONE 372 */ 373 void 374 vidmemPmaFree 375 ( 376 OBJGPU *pGpu, 377 Heap *pHeap, 378 PMA_ALLOC_INFO *pPmaAllocInfo, 379 NvU32 flags 380 ) 381 { 382 PMA *pPma = &pHeap->pmaObject; 383 NvU32 pmaFreeFlags = flags; 384 385 NV_ASSERT_OR_RETURN_VOID(NULL != pPmaAllocInfo); 386 387 // Decrement the refcount and free only in case of zero references. 388 pPmaAllocInfo->refCount--; 389 if (pPmaAllocInfo->refCount != 0) 390 { 391 return; 392 } 393 394 // 395 // Skip the scrubber if the memory is allocated with scrubber skipped. 396 // The only use case is scrubber internal allocations. 397 // 398 if (pPmaAllocInfo->flags & PMA_ALLOCATE_NO_ZERO) 399 { 400 pmaFreeFlags |= PMA_FREE_SKIP_SCRUB; 401 } 402 403 if (pPmaAllocInfo->bContig) 404 { 405 pmaFreePages(pPma, pPmaAllocInfo->pageArray, 1, 406 pPmaAllocInfo->allocSize, pmaFreeFlags); 407 } 408 else 409 { 410 pmaFreePages(pPma, pPmaAllocInfo->pageArray, 411 pPmaAllocInfo->pageCount, 412 pPmaAllocInfo->pageSize, pmaFreeFlags); 413 } 414 portMemFree(pPmaAllocInfo); 415 pPmaAllocInfo = NULL; 416 } 417 418 Heap* 419 vidmemGetHeap 420 ( 421 OBJGPU *pGpu, 422 Device *pDevice, 423 NvBool bSubheap 424 ) 425 { 426 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 427 NV_STATUS status = NV_OK; 428 429 if (bSubheap) 430 { 431 Heap *pHeap = memmgrGetDeviceSuballocator(pMemoryManager, bSubheap); 432 433 NV_CHECK_OR_RETURN(LEVEL_ERROR, pHeap != NULL && pHeap->heapType == HEAP_TYPE_PHYS_MEM_SUBALLOCATOR, NULL); 434 return pHeap; 435 } 436 437 if (IS_MIG_IN_USE(pGpu)) 438 { 439 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 440 Heap *pMemoryPartitionHeap = NULL; 441 442 status = kmigmgrGetMemoryPartitionHeapFromDevice(pGpu, pKernelMIGManager, pDevice, 443 &pMemoryPartitionHeap); 444 if (status == NV_OK) 445 { 446 if (pMemoryPartitionHeap != NULL) 447 return pMemoryPartitionHeap; 448 } 449 else 450 { 451 NV_PRINTF(LEVEL_ERROR, 452 "failed to get memory partition heap for hClient = 0x%x, hDevice = 0x%x\n", 453 RES_GET_CLIENT_HANDLE(pDevice), RES_GET_HANDLE(pDevice)); 454 return NULL; 455 } 456 } 457 458 return MEMORY_MANAGER_GET_HEAP(pMemoryManager); 459 } 460 461 static NV_STATUS 462 vidmemCopyConstruct 463 ( 464 VideoMemory *pVideoMemory, 465 CALL_CONTEXT *pCallContext, 466 RS_RES_ALLOC_PARAMS_INTERNAL *pParams 467 ) 468 { 469 Memory *pMemorySrc = dynamicCast(pParams->pSrcRef->pResource, Memory); 470 OBJGPU *pGpu = pMemorySrc->pGpu; 471 NV_STATUS status; 472 473 NV_ASSERT_OR_RETURN(!memdescGetCustomHeap(pMemorySrc->pMemDesc), NV_ERR_INVALID_ARGUMENT); 474 475 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY) 476 MEMORY_DESCRIPTOR *pSrcMemDesc = memdescGetMemDescFromGpu(pMemorySrc->pMemDesc, pGpu); 477 status = heapReference(pGpu, pSrcMemDesc->pHeap, pMemorySrc->HeapOwner, 478 pSrcMemDesc); 479 NV_ASSERT(status == NV_OK); 480 SLI_LOOP_END 481 482 return NV_OK; 483 } 484 485 /*! 486 * vidmemConstruct 487 * 488 * @brief 489 * This routine provides common allocation services used by the 490 * following heap allocation functions: 491 * NVOS32_FUNCTION_ALLOC_SIZE 492 * NVOS32_FUNCTION_ALLOC_SIZE_RANGE 493 * NVOS32_FUNCTION_ALLOC_TILED_PITCH_HEIGHT 494 * 495 * @param[in] pVideoMemory Pointer to VideoMemory object 496 * @param[in] pCallContext Pointer to the current CALL_CONTEXT. 497 * @param[in] pParams Pointer to the alloc params 498 * 499 * @return 'NV_OK' 500 * Operation completed successfully. 501 * @return 'NV_ERR_NO_MEMORY' 502 * There is not enough available memory to satisfy allocation request. 503 * @return 'NV_ERR_NOT_SUPPORTED' 504 * Operation not supported on broken FB. 505 * @return 'NV_ERR_INSUFFICIENT_RESOURCES' 506 * Not enough available resources to satisfy allocation request. 507 */ 508 NV_STATUS 509 vidmemConstruct_IMPL 510 ( 511 VideoMemory *pVideoMemory, 512 CALL_CONTEXT *pCallContext, 513 RS_RES_ALLOC_PARAMS_INTERNAL *pParams 514 ) 515 { 516 Memory *pMemory = staticCast(pVideoMemory, Memory); 517 NV_MEMORY_ALLOCATION_PARAMS *pAllocData = pParams->pAllocParams; 518 NvHandle hClient = pCallContext->pClient->hClient; 519 NvHandle hParent = pCallContext->pResourceRef->pParentRef->hResource; 520 MEMORY_ALLOCATION_REQUEST allocRequest = {0}; 521 MEMORY_ALLOCATION_REQUEST *pAllocRequest = &allocRequest; 522 OBJGPU *pGpu = pMemory->pGpu; 523 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 524 Heap *pHeap; 525 NvBool bSubheap = NV_FALSE; 526 MEMORY_DESCRIPTOR *pTopLevelMemDesc = NULL; 527 MEMORY_DESCRIPTOR *pTempMemDesc = NULL; 528 HWRESOURCE_INFO hwResource; 529 RsClient *pRsClient = pCallContext->pClient; 530 RsResourceRef *pResourceRef = pCallContext->pResourceRef; 531 RsResourceRef *pDeviceRef; 532 Device *pDevice; 533 NvU32 gpuCacheAttrib; 534 NvBool bIsPmaAlloc = NV_FALSE; 535 NvU64 sizeOut; 536 NvU64 offsetOut; 537 NvU64 offsetOutTemp; 538 NvBool bLockAcquired = NV_FALSE; 539 NvU32 attr = 0; 540 NvU32 attr2 = 0; 541 FB_ALLOC_INFO *pFbAllocInfo = NULL; 542 FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat = NULL; 543 NV_STATUS rmStatus = NV_OK; 544 545 NV_ASSERT_OK_OR_RETURN( 546 refFindAncestorOfType(pResourceRef, classId(Device), &pDeviceRef)); 547 548 pDevice = dynamicCast(pDeviceRef->pResource, Device); 549 550 if (RS_IS_COPY_CTOR(pParams)) 551 { 552 if (!rmDeviceGpuLockIsOwner(pGpu->gpuInstance) && !rmGpuLockIsOwner()) 553 { 554 NV_ASSERT_OK_OR_GOTO(rmStatus, 555 rmDeviceGpuLocksAcquire(pGpu, GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_MEM), 556 done); 557 558 bLockAcquired = NV_TRUE; 559 } 560 561 rmStatus = vidmemCopyConstruct(pVideoMemory, pCallContext, pParams); 562 goto done; 563 } 564 565 NV_CHECK_OK_OR_RETURN(LEVEL_WARNING, stdmemValidateParams(pGpu, hClient, pAllocData)); 566 NV_CHECK_OR_RETURN(LEVEL_WARNING, 567 DRF_VAL(OS32, _ATTR, _LOCATION, pAllocData->attr) == NVOS32_ATTR_LOCATION_VIDMEM && 568 !(pAllocData->flags & NVOS32_ALLOC_FLAGS_VIRTUAL), 569 NV_ERR_INVALID_ARGUMENT); 570 571 stdmemDumpInputAllocParams(pAllocData, pCallContext); 572 573 bSubheap = FLD_TEST_DRF(OS32, _ATTR2, _ALLOCATE_FROM_SUBHEAP, _YES, pAllocData->attr2); 574 pHeap = vidmemGetHeap(pGpu, pDevice, bSubheap); 575 NV_CHECK_OR_RETURN(LEVEL_INFO, pHeap != NULL, NV_ERR_INVALID_STATE); 576 577 if (gpuIsCCorApmFeatureEnabled(pGpu) && 578 !FLD_TEST_DRF(OS32, _ATTR2, _MEMORY_PROTECTION, _UNPROTECTED, pAllocData->attr2)) 579 { 580 pAllocData->flags |= NVOS32_ALLOC_FLAGS_PROTECTED; 581 } 582 else if (gpuIsCCFeatureEnabled(pGpu) && 583 FLD_TEST_DRF(OS32, _ATTR2, _MEMORY_PROTECTION, _UNPROTECTED, pAllocData->attr2)) 584 { 585 // CC-TODO: Remove this once non-CPR regions are created 586 NV_PRINTF(LEVEL_ERROR, "Non-CPR region not yet created\n"); 587 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT); 588 } 589 else if (!gpuIsCCorApmFeatureEnabled(pGpu) && 590 FLD_TEST_DRF(OS32, _ATTR2, _MEMORY_PROTECTION, _PROTECTED, pAllocData->attr2)) 591 { 592 NV_PRINTF(LEVEL_ERROR, "Protected memory not enabled but PROTECTED flag is set by client"); 593 return NV_ERR_INVALID_ARGUMENT; 594 } 595 596 pAllocRequest->classNum = NV01_MEMORY_LOCAL_USER; 597 pAllocRequest->pUserParams = pAllocData; 598 pAllocRequest->hMemory = pResourceRef->hResource; 599 pAllocRequest->hClient = hClient; 600 pAllocRequest->hParent = hParent; 601 pAllocRequest->pGpu = pGpu; 602 pAllocRequest->internalflags = NVOS32_ALLOC_INTERNAL_FLAGS_CLIENTALLOC; 603 pAllocRequest->pHwResource = &hwResource; 604 605 // Unsure if we need to keep separate copies, but keeping old behavior for now. 606 sizeOut = pAllocData->size; 607 offsetOut = pAllocData->offset; 608 609 bIsPmaAlloc = memmgrIsPmaInitialized(pMemoryManager) && 610 !bSubheap && 611 !(pAllocData->flags & NVOS32_ALLOC_FLAGS_WPR1) && 612 !(pAllocData->flags & NVOS32_ALLOC_FLAGS_WPR2) && 613 (!(pAllocData->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE) || 614 heapIsPmaManaged(pGpu, pHeap, pAllocData->offset, pAllocData->offset+pAllocData->size-1)); 615 616 // Scrub-on-free is not supported by heap. Make sure clients don't get unscrubbed allocations 617 NV_CHECK_OR_RETURN(LEVEL_WARNING, 618 !memmgrIsScrubOnFreeEnabled(pMemoryManager) || bIsPmaAlloc || bSubheap, 619 NV_ERR_INVALID_STATE); 620 621 // Get the allocation from PMA if enabled. 622 if (bIsPmaAlloc) 623 { 624 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY) 625 pAllocRequest->pGpu = pGpu; 626 rmStatus = _vidmemPmaAllocate(vidmemGetHeap(pGpu, pDevice, NV_FALSE), pAllocRequest); 627 if (NV_OK != rmStatus) 628 SLI_LOOP_GOTO(done); 629 SLI_LOOP_END; 630 } 631 632 if (RMCFG_FEATURE_RM_BASIC_LOCK_MODEL) 633 { 634 // 635 // Can't move locking up as PMA locks need to be taken first. 636 // Acquire the lock *only after* PMA is done allocating. 637 // 638 if (!rmDeviceGpuLockIsOwner(pGpu->gpuInstance) && !rmGpuLockIsOwner()) 639 { 640 rmStatus = rmDeviceGpuLocksAcquire(pGpu, GPUS_LOCK_FLAGS_NONE, 641 RM_LOCK_MODULES_MEM); 642 NV_ASSERT_OR_GOTO(NV_OK == rmStatus, done); 643 644 bLockAcquired = NV_TRUE; 645 } 646 else 647 { 648 NV_ASSERT(0); 649 } 650 } 651 652 { 653 // 654 // If using thwap to generate an allocation failure here, fail the 655 // alloc right away. 656 // 657 KernelRc *pKernelRc = GPU_GET_KERNEL_RC(pGpu); 658 if (pKernelRc != NULL && 659 !krcTestAllowAlloc(pGpu, pKernelRc, 660 NV_ROBUST_CHANNEL_ALLOCFAIL_HEAP)) 661 { 662 rmStatus = NV_ERR_INSUFFICIENT_RESOURCES; 663 goto done; 664 } 665 } 666 667 // Don't allow FB allocations if FB is broken unless it is a virtual allocation or running in L2 cache only mode 668 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) && 669 !gpuIsCacheOnlyModeEnabled(pGpu)) 670 { 671 NV_ASSERT_FAILED("Video memory requested despite BROKEN FB"); 672 rmStatus = NV_ERR_NOT_SUPPORTED; 673 goto done; 674 } 675 676 if (gpuIsDebuggerActive_HAL(pGpu)) 677 { 678 // Bug 643431 - WAR for GR WFI timeouts when debugger is active 679 rmStatus = NV_ERR_BUSY_RETRY; 680 goto done; 681 } 682 683 pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO)); 684 NV_ASSERT_TRUE_OR_GOTO(rmStatus, pFbAllocInfo != NULL, NV_ERR_NO_MEMORY, done); 685 686 pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT)); 687 NV_ASSERT_TRUE_OR_GOTO(rmStatus, pFbAllocPageFormat != NULL, NV_ERR_NO_MEMORY, done); 688 689 // Call heapAlloc to get memory. 690 if (gpumgrGetBcEnabledStatus(pGpu)) 691 { 692 MEMORY_DESCRIPTOR *pPrev = NULL; 693 694 // VGPU won't run in SLI. So no need to set subheap flags in memdesc. 695 NV_ASSERT(!bSubheap); 696 697 // Create dummy top level memdesc 698 rmStatus = memdescCreate(&pTopLevelMemDesc, pGpu, RM_PAGE_SIZE, 0, 699 NV_TRUE, 700 ADDR_FBMEM, 701 NV_MEMORY_UNCACHED, 702 MEMDESC_FLAGS_DUMMY_TOPLEVEL); 703 if (rmStatus != NV_OK) 704 goto done; 705 pPrev = pTopLevelMemDesc; 706 pTopLevelMemDesc->_subDeviceAllocCount = gpumgrGetSubDeviceCountFromGpu(pGpu); // very important to have this here 707 pTopLevelMemDesc->_flags |= MEMDESC_FLAGS_ALLOC_PER_SUBDEVICE; 708 709 offsetOutTemp = ~((NvU64)0); 710 offsetOut = 0; 711 sizeOut = 0; 712 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY) 713 { 714 // Call heapAlloc to get memory. 715 pAllocRequest->pMemDesc = NULL; // heapAlloc_IMPL needs a NULL pMemdesc in order for it to be allocated, 716 717 portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO)); 718 portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT)); 719 pFbAllocInfo->pageFormat = pFbAllocPageFormat; 720 721 memUtilsInitFBAllocInfo(pAllocRequest->pUserParams, pFbAllocInfo, hClient, hParent); 722 723 rmStatus = memmgrAllocResources(pGpu, pMemoryManager, pAllocRequest, pFbAllocInfo); 724 if (rmStatus != NV_OK) 725 SLI_LOOP_GOTO(done); 726 727 rmStatus = vidmemAllocResources(pGpu, pMemoryManager, pAllocRequest, pFbAllocInfo, 728 vidmemGetHeap(pGpu, pDevice, NV_FALSE)); 729 if (rmStatus != NV_OK) 730 SLI_LOOP_GOTO(done); 731 732 NV_ASSERT(pAllocRequest->pMemDesc); 733 734 // 735 // Spoof the flags contiguity, size and alignment of heapAlloc'ed subdev memdesc 736 // to dummy top level memdesc we created 737 // 738 pTopLevelMemDesc->Alignment = pAllocRequest->pMemDesc->Alignment; 739 pTopLevelMemDesc->_flags = pAllocRequest->pMemDesc->_flags | MEMDESC_FLAGS_ALLOC_PER_SUBDEVICE | MEMDESC_FLAGS_DUMMY_TOPLEVEL; 740 pTopLevelMemDesc->Size = pAllocRequest->pMemDesc->Size; 741 pTopLevelMemDesc->ActualSize = pAllocRequest->pMemDesc->ActualSize; 742 pTopLevelMemDesc->_pageSize = pAllocRequest->pMemDesc->_pageSize; 743 pTopLevelMemDesc->pHeap = pAllocRequest->pMemDesc->pHeap; 744 745 // add pAllocData->pMemDesc for subdev to linked list 746 pPrev->_pNext = pAllocRequest->pMemDesc; 747 pPrev = pAllocRequest->pMemDesc; 748 749 // 750 // After Bugs 1967134, 1511574, 1448340, 1761278, 1993033 are implemented, remove the code below and 751 // always set offsetOut = ~0 for the broadcast case. 752 // Then remove the interface to remove the physical offset. 753 // 754 if (offsetOutTemp == ~((NvU64)0)) // 1st 755 offsetOut = offsetOutTemp = memdescGetPhysAddr(pAllocRequest->pMemDesc, AT_GPU, 0); 756 else if (offsetOut != ~((NvU64)0)) 757 { 758 offsetOutTemp = memdescGetPhysAddr(pAllocRequest->pMemDesc, AT_GPU, 0); 759 if (offsetOut != offsetOutTemp) 760 { 761 offsetOut = ~((NvU64)0); 762 } 763 } 764 765 NV_ASSERT(!sizeOut || pAllocRequest->pMemDesc->Size == sizeOut); 766 sizeOut = pAllocRequest->pMemDesc->Size; 767 } 768 SLI_LOOP_END; 769 770 pTempMemDesc = memdescGetMemDescFromGpu(pTopLevelMemDesc, pGpu); 771 } 772 else 773 { 774 775 portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO)); 776 portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT)); 777 pFbAllocInfo->pageFormat = pFbAllocPageFormat; 778 779 memUtilsInitFBAllocInfo(pAllocRequest->pUserParams, pFbAllocInfo, hClient, hParent); 780 781 rmStatus = memmgrAllocResources(pGpu, pMemoryManager, pAllocRequest, pFbAllocInfo); 782 if (rmStatus != NV_OK) 783 goto done; 784 785 rmStatus = vidmemAllocResources(pGpu, pMemoryManager, pAllocRequest, pFbAllocInfo, pHeap); 786 if (rmStatus != NV_OK) 787 goto done; 788 789 NV_ASSERT(pAllocRequest->pMemDesc); 790 791 pTempMemDesc = pTopLevelMemDesc = pAllocRequest->pMemDesc; 792 offsetOut = memdescGetPhysAddr(pTempMemDesc, AT_GPU, 0); 793 sizeOut = pTempMemDesc->Size; 794 795 if (bSubheap) 796 memdescSetFlag(pTempMemDesc, MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE, NV_TRUE); 797 } 798 799 pAllocData->limit = sizeOut - 1; 800 801 if (bIsPmaAlloc) 802 { 803 // Cache the PMA_ALLOC_INFO structure. 804 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY) 805 memdescGetMemDescFromGpu(pTopLevelMemDesc, pGpu)->pPmaAllocInfo = pAllocRequest->pPmaAllocInfo[gpumgrGetSubDeviceInstanceFromGpu(pGpu)]; 806 SLI_LOOP_END; 807 } 808 809 // 810 // Set the unprotected flag in memdesc. Some control calls will use 811 // this flag to determine if this memory lies in the protected or 812 // unprotected region and use that to gather statistics like total 813 // protected and unprotected memory usage by different clients, etc 814 // 815 if (gpuIsCCorApmFeatureEnabled(pGpu) && 816 FLD_TEST_DRF(OS32, _ATTR2, _MEMORY_PROTECTION, _UNPROTECTED, pAllocData->attr2)) 817 { 818 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY) 819 memdescSetFlag(memdescGetMemDescFromGpu(pTopLevelMemDesc, pGpu), 820 MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY, NV_TRUE); 821 SLI_LOOP_END; 822 } 823 824 // 825 // Video memory is always locally transparently cached. It does not require 826 // any cache managment. Marked cached unconditionally. Non-coherent peer 827 // caching is handled with an override at mapping time. 828 // 829 if (DRF_VAL(OS32, _ATTR2, _GPU_CACHEABLE, pAllocData->attr2) == 830 NVOS32_ATTR2_GPU_CACHEABLE_DEFAULT) 831 { 832 pAllocData->attr2 = FLD_SET_DRF(OS32, _ATTR2, _GPU_CACHEABLE, _YES, 833 pAllocData->attr2); 834 } 835 gpuCacheAttrib = NV_MEMORY_CACHED; 836 837 // ClientDB can set the pagesize for memdesc. 838 // With GPU SMMU mapping, this needs to be set on the SMMU memdesc. 839 // So SMMU allocation should happen before memConstructCommon() 840 // Eventaully SMMU allocation will be part of memdescAlloc(). 841 842 // 843 // There are a few cases where the heap will return an existing 844 // memdesc. Only update attributes if it is new. 845 // 846 // @todo attr tracking should move into heapAlloc 847 // 848 if (pTempMemDesc->RefCount == 1) 849 { 850 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY); 851 memdescSetGpuCacheAttrib(memdescGetMemDescFromGpu(pTopLevelMemDesc, pGpu), gpuCacheAttrib); 852 SLI_LOOP_END; 853 854 855 // An SMMU mapping will be added to FB allocations in the following cases: 856 // 1. RM clients forcing SMMU mapping via flags 857 // GPU Arch verification with VPR is one such usecase. 858 859 if (FLD_TEST_DRF(OS32, _ATTR2, _SMMU_ON_GPU, _ENABLE, pAllocData->attr2)) 860 { 861 NV_ASSERT_FAILED("SMMU mapping allocation is not supported for ARMv7"); 862 rmStatus = NV_ERR_NOT_SUPPORTED; 863 864 memdescFree(pTopLevelMemDesc); 865 memdescDestroy(pTopLevelMemDesc); 866 goto done; 867 } 868 } 869 870 rmStatus = memConstructCommon(pMemory, pAllocRequest->classNum, pAllocData->flags, 871 pTopLevelMemDesc, pAllocData->owner, pHeap, pAllocData->attr, 872 pAllocData->attr2, 0, pAllocData->type, 873 pAllocData->tag, &hwResource); 874 if (rmStatus != NV_OK) 875 { 876 memdescFree(pTopLevelMemDesc); 877 memdescDestroy(pTopLevelMemDesc); 878 879 goto done; 880 } 881 NV_ASSERT(pMemory->pMemDesc); 882 NV_ASSERT(pMemory->pHeap); 883 884 // 885 // vGPU: 886 // 887 // Since vGPU does all real hardware management in the 888 // host, if we are in guest OS (where IS_VIRTUAL(pGpu) is true), 889 // do an RPC to the host to do the hardware update. 890 // 891 // XXX: This is a hack for now. No Hw resources are assumed to be used in the call. 892 // The host is only requested to make an alias to the allocated heap. 893 894 if (!IS_GSP_CLIENT(pGpu)) 895 { 896 NV_RM_RPC_ALLOC_VIDMEM(pGpu, 897 hClient, 898 hParent, 899 pAllocRequest->hMemory, 900 pTopLevelMemDesc, 901 sizeOut, 902 attr, 903 attr2, 904 pAllocData->type, 905 pAllocData->flags, 906 pAllocData->height, 907 pAllocData->width, 908 pAllocData->format, 909 pAllocData->comprCovg, 910 pAllocData->zcullCovg, 911 pAllocData->alignment, 912 pAllocData->pitch, 913 pAllocData->ctagOffset, 914 rmStatus); 915 916 if (rmStatus != NV_OK) 917 { 918 memDestructCommon(pMemory); 919 memdescFree(pTopLevelMemDesc); 920 memdescDestroy(pTopLevelMemDesc); 921 pTopLevelMemDesc = NULL; 922 goto done; 923 } 924 925 pMemory->bRpcAlloc = NV_TRUE; 926 } 927 928 if (RMCFG_MODULE_GPUACCT) 929 { 930 OBJGPU *pGpu = pMemory->pGpu; 931 OBJSYS *pSys = SYS_GET_INSTANCE(); 932 GpuAccounting *pGpuAcct = SYS_GET_GPUACCT(pSys); 933 RmClient *pClient = dynamicCast(pRsClient, RmClient); 934 NvU64 fbUsage; 935 NV2080_CTRL_GPU_PID_INFO_DATA pidInfoData; 936 NV2080_CTRL_SMC_SUBSCRIPTION_INFO smcInfo; 937 938 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_ACCOUNTING_ON) && 939 (pMemory->pSubDevice == NULL) && // Skipping for subdevice memory allocations. Was this intentional? 940 (pCallContext->secInfo.privLevel < RS_PRIV_LEVEL_KERNEL)) 941 { 942 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 943 NvBool bSmcGpuPartitioningEnabled = IS_MIG_IN_USE(pGpu); 944 MIG_INSTANCE_REF partitionRef = kmigmgrMakeNoMIGReference(); 945 NvBool bGlobalInfo = NV_TRUE; 946 smcInfo.computeInstanceId = PARTITIONID_INVALID; 947 smcInfo.gpuInstanceId = PARTITIONID_INVALID; 948 // 949 // With SMC GPU partitioning enabled, get associated partition ref and 950 // only account for partitionLocal usages 951 // 952 if (bSmcGpuPartitioningEnabled) 953 { 954 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR, 955 kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager, 956 pDevice, &partitionRef), 957 done); 958 bGlobalInfo = NV_FALSE; 959 } 960 portMemSet(&pidInfoData, 0, sizeof(NV2080_CTRL_GPU_PID_INFO_DATA)); 961 962 gpuFindClientInfoWithPidIterator(pGpu, pClient->ProcID, 963 pClient->SubProcessID, 964 classId(Memory), 965 &pidInfoData, 966 &smcInfo, 967 &partitionRef, 968 bGlobalInfo); 969 970 // Only account for memory owned by the process. 971 fbUsage = pidInfoData.vidMemUsage.memPrivate + 972 pidInfoData.vidMemUsage.memSharedOwned; 973 974 gpuacctUpdateProcPeakFbUsage(pGpuAcct, pGpu->gpuInstance, 975 pClient->ProcID, pClient->SubProcessID,fbUsage); 976 } 977 } 978 979 pAllocData->size = sizeOut; 980 pAllocData->offset = offsetOut; 981 982 stdmemDumpOutputAllocParams(pAllocData); 983 984 done: 985 if (bSubheap && pTempMemDesc != NULL && rmStatus != NV_OK) 986 heapRemoveRef(pHeap); 987 988 portMemFree(pFbAllocPageFormat); 989 portMemFree(pFbAllocInfo); 990 991 if (bLockAcquired) 992 { 993 // UNLOCK: release GPUs lock 994 rmDeviceGpuLocksRelease(pGpu, GPUS_LOCK_FLAGS_NONE, NULL); 995 } 996 997 if (bIsPmaAlloc && NV_OK != rmStatus) 998 { 999 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY) 1000 1001 if (pAllocRequest->pPmaAllocInfo[gpumgrGetSubDeviceInstanceFromGpu(pGpu)]) 1002 vidmemPmaFree(pGpu, vidmemGetHeap(pGpu, pDevice, NV_FALSE), 1003 pAllocRequest->pPmaAllocInfo[gpumgrGetSubDeviceInstanceFromGpu(pGpu)], 0); 1004 SLI_LOOP_END; 1005 } 1006 1007 return rmStatus; 1008 } 1009 1010 void 1011 vidmemDestruct_IMPL 1012 ( 1013 VideoMemory *pVideoMemory 1014 ) 1015 { 1016 Memory *pMemory = staticCast(pVideoMemory, Memory); 1017 OBJGPU *pGpu = pMemory->pGpu; 1018 MEMORY_DESCRIPTOR *pMemDesc = pMemory->pMemDesc; 1019 1020 // Free any association of the memory with existing third-party p2p object 1021 CliUnregisterMemoryFromThirdPartyP2P(pMemory); 1022 1023 memDestructCommon(pMemory); 1024 1025 // free the video memory based on how it was alloced ... a non-zero 1026 // heapOwner indicates it was heapAlloc-ed. 1027 if (!memdescGetCustomHeap(pMemDesc)) 1028 { 1029 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 1030 NvHandle hClient = RES_GET_CLIENT_HANDLE(pVideoMemory); 1031 NvHandle hParent = RES_GET_PARENT_HANDLE(pVideoMemory); 1032 NvU32 heapOwner = pMemory->HeapOwner; 1033 NV_STATUS status; 1034 1035 NV_PRINTF(LEVEL_INFO, "Function: FREE\n"); 1036 NV_PRINTF(LEVEL_INFO, " Owner: 0x%x\n", heapOwner); 1037 NV_PRINTF(LEVEL_INFO, " hMemory: 0x%x\n", RES_GET_HANDLE(pVideoMemory)); 1038 1039 // 1040 // memHandle (and the block's size/type) is returned, but not 1041 // needed ... the caller already has the correct handle to pass 1042 // to memDestructCommon 1043 // 1044 if (gpumgrGetBcEnabledStatus(pGpu) && 1045 (memdescGetAddressSpace(memdescGetMemDescFromGpu(pMemDesc, pGpu)) == ADDR_FBMEM)) 1046 { 1047 MEMORY_DESCRIPTOR *pNextMemDesc = NULL, *pSubdevMemDesc = NULL; 1048 pSubdevMemDesc = pMemDesc->_pNext; 1049 1050 NV_ASSERT(pMemDesc->_subDeviceAllocCount > 1); 1051 NV_ASSERT(!IS_MIG_IN_USE(pGpu)); 1052 1053 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY); 1054 if (pSubdevMemDesc == NULL) 1055 { 1056 NV_ASSERT(0); 1057 SLI_LOOP_BREAK; 1058 } 1059 // Unlink per-gpu memdesc for SLI client allocations before freeing heap 1060 pNextMemDesc = pSubdevMemDesc->_pNext; 1061 1062 status = memmgrFree(pGpu, 1063 pMemoryManager, 1064 pSubdevMemDesc->pHeap, 1065 hClient, 1066 hParent, // device or subdevice 1067 0, 1068 heapOwner, 1069 pSubdevMemDesc); 1070 NV_ASSERT(status == NV_OK); 1071 1072 pSubdevMemDesc = pNextMemDesc; 1073 SLI_LOOP_END; 1074 1075 memdescFree(pMemDesc); 1076 memdescDestroy(pMemDesc); 1077 } 1078 else 1079 { 1080 Heap *pHeap = pMemDesc->pHeap; 1081 1082 NV_ASSERT(pMemDesc->_subDeviceAllocCount == 1); 1083 status = memmgrFree(pGpu, 1084 pMemoryManager, 1085 pHeap, 1086 hClient, 1087 hParent, // device or subdevice 1088 0, 1089 heapOwner, 1090 pMemDesc); 1091 NV_ASSERT(status == NV_OK); 1092 1093 } 1094 } 1095 } 1096 1097 NV_STATUS 1098 vidmemAllocResources 1099 ( 1100 OBJGPU *pGpu, 1101 MemoryManager *pMemoryManager, 1102 MEMORY_ALLOCATION_REQUEST *pAllocRequest, 1103 FB_ALLOC_INFO *pFbAllocInfo, 1104 Heap *pHeap 1105 ) 1106 { 1107 NV_STATUS status = NV_OK; 1108 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu); 1109 NV_MEMORY_ALLOCATION_PARAMS *pVidHeapAlloc = pAllocRequest->pUserParams; 1110 NvU64 requestedSize = pVidHeapAlloc->size; 1111 HWRESOURCE_INFO *pHwResource = NULL; 1112 MEMORY_DESCRIPTOR *pMemDesc = NULL; 1113 NvBool bAllocedMemDesc = NV_FALSE; 1114 NvBool bAllocedMemory = NV_FALSE; 1115 NvBool bNoncontigAllowed = NV_FALSE; 1116 NvBool bNoncontigAllocation = NV_FALSE; 1117 NvHandle hVASpace = pVidHeapAlloc->hVASpace; 1118 NvBool bIsPmaOwned = NV_FALSE; 1119 NvU32 subdeviceInst = gpumgrGetSubDeviceInstanceFromGpu(pGpu); 1120 NvBool bContig = NV_TRUE; 1121 1122 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig = 1123 kmemsysGetStaticConfig(pGpu, pKernelMemorySystem); 1124 1125 // Most vidmem allocations external to RM get routed to PMA. 1126 bIsPmaOwned = (pAllocRequest->pPmaAllocInfo[subdeviceInst] != NULL); 1127 1128 // check if pMemorySystemConfig is not yet initialized on Offload client. 1129 if (pMemorySystemConfig == NULL) 1130 { 1131 status = NV_ERR_INVALID_STATE; 1132 NV_ASSERT(0); 1133 goto failed; 1134 } 1135 1136 if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_VIRTUAL_ONLY) 1137 { 1138 NV_PRINTF(LEVEL_WARNING, 1139 "Virtual-only flag used with physical allocation\n"); 1140 status = NV_ERR_INVALID_ARGUMENT; 1141 goto failed; 1142 } 1143 // 1144 // In NUMA systems, the memory allocation comes from kernel 1145 // and kernel doesn't support fixed address allocation. 1146 // 1147 if ((pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE) && 1148 bIsPmaOwned && 1149 osNumaOnliningEnabled(pGpu->pOsGpuInfo)) 1150 { 1151 NV_PRINTF(LEVEL_WARNING, 1152 "NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE for PMA cannot be " 1153 "accommodated for NUMA systems\n"); 1154 status = NV_ERR_INVALID_ARGUMENT; 1155 goto failed; 1156 } 1157 if (FLD_TEST_DRF(OS32, _ATTR2, _32BIT_POINTER, _ENABLE, pVidHeapAlloc->attr2)) 1158 { 1159 NV_PRINTF(LEVEL_WARNING, 1160 "Virtual-only 32-bit pointer attr used with physical allocation\n"); 1161 status = NV_ERR_INVALID_ARGUMENT; 1162 goto failed; 1163 } 1164 if (hVASpace != 0) 1165 { 1166 NV_PRINTF(LEVEL_WARNING, 1167 "VA space handle used with physical allocation\n"); 1168 status = NV_ERR_INVALID_ARGUMENT; 1169 goto failed; 1170 } 1171 1172 // Prior to this change, heap was silently ignoring non-contig Vidmem allocation requests. 1173 // With this change to allow non-contig vidmem allocation, I was getting a DVS Extended Sanity failures & regression on Windows. 1174 // It seems Windows is making some allocations with non-contig flag, but was expecting contig allocation. 1175 // So enable the non-contig path only for verif platforms. 1176 // 1177 bContig = FLD_TEST_DRF(OS32, _ATTR, _PHYSICALITY, _DEFAULT, pVidHeapAlloc->attr) ? 1178 !pHeap->getProperty(pHeap, PDB_PROP_HEAP_NONCONTIG_ALLOC_BY_DEFAULT) : 1179 !FLD_TEST_DRF(OS32, _ATTR, _PHYSICALITY, _NONCONTIGUOUS, pVidHeapAlloc->attr); 1180 1181 NV_CHECK_OK_OR_GOTO(status, LEVEL_WARNING, memUtilsAllocMemDesc(pGpu, pAllocRequest, pFbAllocInfo, &pMemDesc, pHeap, 1182 ADDR_FBMEM, bContig, &bAllocedMemDesc), failed); 1183 1184 #ifndef NV_DISABLE_NONCONTIG_ALLOC 1185 bNoncontigAllowed = 1186 (!bContig || FLD_TEST_DRF(OS32, _ATTR, _PHYSICALITY, _ALLOW_NONCONTIGUOUS, pVidHeapAlloc->attr)) && 1187 pMemoryManager->bAllowNoncontiguousAllocation && 1188 !FLD_TEST_DRF(OS32, _ATTR, _FORMAT, _SWIZZLED, pVidHeapAlloc->attr); 1189 #endif 1190 1191 if (bIsPmaOwned) 1192 { 1193 pFbAllocInfo->offset = pMemDesc->_pteArray[0]; 1194 1195 if (pMemoryManager->bEnableDynamicGranularityPageArrays == NV_TRUE) 1196 { 1197 // 1198 // set pagearray granularity if dynamic memdesc pagesize is enabled 1199 // this ensures consistency in calculation of page count 1200 // 1201 pMemDesc->pageArrayGranularity = pAllocRequest->pPmaAllocInfo[subdeviceInst]->pageSize; 1202 } 1203 1204 if (bContig) 1205 { 1206 NV_PRINTF(LEVEL_INFO, "---> PMA Path taken contiguous\n"); 1207 pVidHeapAlloc->attr = FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY, 1208 _CONTIGUOUS, 1209 pVidHeapAlloc->attr); 1210 1211 memdescDescribe(pAllocRequest->pMemDesc, ADDR_FBMEM, 1212 pAllocRequest->pPmaAllocInfo[subdeviceInst]->pageArray[0], 1213 pFbAllocInfo->adjustedSize); 1214 } 1215 else 1216 { 1217 NV_PRINTF(LEVEL_INFO, "---> PMA Path taken discontiguous\n"); 1218 NV_ASSERT(!bContig && bNoncontigAllowed); 1219 pVidHeapAlloc->attr = FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY, 1220 _NONCONTIGUOUS, 1221 pVidHeapAlloc->attr); 1222 memdescFillPages(pAllocRequest->pMemDesc, 0, 1223 pAllocRequest->pPmaAllocInfo[subdeviceInst]->pageArray, 1224 pAllocRequest->pPmaAllocInfo[subdeviceInst]->pageCount, 1225 pAllocRequest->pPmaAllocInfo[subdeviceInst]->pageSize); 1226 } 1227 } 1228 else 1229 { 1230 OBJHEAP_ALLOC_DATA allocData = { 0 }; 1231 1232 bNoncontigAllocation = !bContig; 1233 1234 allocData.alignment = pVidHeapAlloc->alignment; 1235 allocData.allocSize = pFbAllocInfo->size + pFbAllocInfo->pad; 1236 1237 status = heapAlloc(pGpu, 1238 pFbAllocInfo->hClient, 1239 pHeap, 1240 pAllocRequest, 1241 pAllocRequest->hMemory, 1242 &allocData, 1243 pFbAllocInfo, 1244 &pHwResource, 1245 &bNoncontigAllocation, 1246 bNoncontigAllowed, 1247 bAllocedMemDesc); 1248 1249 // heapAlloc might create a new memdesc for compbit/discontig case 1250 pMemDesc = pAllocRequest->pMemDesc; 1251 1252 if (status != NV_OK) 1253 { 1254 goto failed; 1255 } 1256 1257 bAllocedMemory = NV_TRUE; 1258 } 1259 1260 if (!bIsPmaOwned && (pVidHeapAlloc->type != NVOS32_TYPE_PMA)) 1261 { 1262 NvU64 size, numBlocks; 1263 NvU32 i; 1264 1265 // 1266 // Pre-fill cache to prevent FB read accesses if in cache only mode and not doing one time pre-fill 1267 // Also, only need to fill the *requested* size and not the actual allocation size 1268 // These might not work with noncontig allocation since they assume 1269 // physically contig memory 1270 // 1271 if (!bNoncontigAllocation && 1272 gpuIsCacheOnlyModeEnabled(pGpu) && 1273 !pMemorySystemConfig->bL2PreFill) 1274 { 1275 NV_STATUS preFillStatus; 1276 1277 requestedSize = (requestedSize == 0) ? pVidHeapAlloc->size : requestedSize; 1278 preFillStatus = kmemsysPreFillCacheOnlyMemory_HAL(pGpu, pKernelMemorySystem, 1279 pFbAllocInfo->offset, requestedSize); 1280 NV_ASSERT(preFillStatus == NV_OK); 1281 } 1282 1283 if (memdescGetContiguity(pMemDesc, AT_GPU)) 1284 { 1285 size = pFbAllocInfo->adjustedSize; 1286 numBlocks = 1; // One contiguous page 1287 } 1288 else 1289 { 1290 // Only 4k-sized noncontig pages supported currently 1291 size = RM_PAGE_SIZE; 1292 numBlocks = pMemDesc->PageCount; 1293 } 1294 1295 for (i = 0; i < numBlocks; i++) 1296 { 1297 // Ensures memory is fully initialized 1298 memmgrScrubMemory_HAL(pGpu, pMemoryManager, memdescGetPte(pMemDesc, AT_GPU, i), size); 1299 } 1300 } 1301 1302 // get possibly updated surface attributes 1303 pVidHeapAlloc->attr = pFbAllocInfo->retAttr; 1304 pVidHeapAlloc->attr2 = pFbAllocInfo->retAttr2; 1305 1306 // update contiguity attribute to reflect memdesc 1307 if (memdescGetContiguity(pAllocRequest->pMemDesc, AT_GPU)) 1308 { 1309 pVidHeapAlloc->attr = FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY, 1310 _CONTIGUOUS, 1311 pVidHeapAlloc->attr); 1312 } 1313 else 1314 { 1315 pVidHeapAlloc->attr = FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY, 1316 _NONCONTIGUOUS, 1317 pVidHeapAlloc->attr); 1318 } 1319 1320 // TODO remove once we don't have hwResource in MEM_BLOCK 1321 if (pHwResource != NULL) 1322 { 1323 pHwResource->attr = pFbAllocInfo->retAttr; 1324 pHwResource->attr2 = pFbAllocInfo->retAttr2; 1325 pHwResource->hwResId = pFbAllocInfo->hwResId; 1326 pHwResource->comprCovg = pFbAllocInfo->comprCovg; 1327 pHwResource->ctagOffset = pFbAllocInfo->ctagOffset; 1328 } 1329 1330 pVidHeapAlloc->offset = pFbAllocInfo->offset; 1331 1332 if (pAllocRequest->pHwResource != NULL) 1333 { 1334 pAllocRequest->pHwResource->attr = pFbAllocInfo->retAttr; 1335 pAllocRequest->pHwResource->attr2 = pFbAllocInfo->retAttr2; 1336 pAllocRequest->pHwResource->hwResId = pFbAllocInfo->hwResId; 1337 pAllocRequest->pHwResource->comprCovg = pFbAllocInfo->comprCovg; 1338 pAllocRequest->pHwResource->ctagOffset = pFbAllocInfo->ctagOffset; 1339 pAllocRequest->pHwResource->hwResId = pFbAllocInfo->hwResId; 1340 } 1341 1342 return NV_OK; 1343 1344 failed: 1345 if (bAllocedMemory) 1346 { 1347 memmgrFree(pGpu, pMemoryManager, pHeap, 1348 pFbAllocInfo->hClient, pFbAllocInfo->hDevice, 0, 1349 pVidHeapAlloc->owner, 1350 pMemDesc); 1351 } 1352 else 1353 { 1354 memmgrFreeHwResources(pGpu, pMemoryManager, pFbAllocInfo); 1355 } 1356 1357 if (bAllocedMemDesc) 1358 { 1359 memdescDestroy(pAllocRequest->pMemDesc); 1360 pAllocRequest->pMemDesc = NULL; 1361 } 1362 1363 return status; 1364 } 1365 1366 NV_STATUS 1367 vidmemCheckCopyPermissions_IMPL 1368 ( 1369 VideoMemory *pVideoMemory, 1370 OBJGPU *pDstGpu, 1371 Device *pDstDevice 1372 ) 1373 { 1374 Memory *pMemory = staticCast(pVideoMemory, Memory); 1375 OBJGPU *pSrcGpu = pMemory->pGpu; 1376 NvHandle hSrcClient = RES_GET_CLIENT_HANDLE(pVideoMemory); 1377 NvHandle hDstClient = RES_GET_CLIENT_HANDLE(pDstDevice); 1378 KernelMIGManager *pSrcKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pSrcGpu); 1379 KernelMIGManager *pDstKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pDstGpu); 1380 NvBool bSrcClientKernel = (rmclientGetCachedPrivilegeByHandle(hSrcClient) >= RS_PRIV_LEVEL_KERNEL); 1381 NvBool bDstClientKernel = (rmclientGetCachedPrivilegeByHandle(hDstClient) >= RS_PRIV_LEVEL_KERNEL); 1382 1383 // 1384 // XXX: In case of MIG memory, duping across GPU instances is not allowed 1385 // Bug 2815350 - Due to this bug, allow kernel clients to bypass this check 1386 // 1387 if (!bDstClientKernel && (IS_MIG_IN_USE(pSrcGpu) || IS_MIG_IN_USE(pDstGpu))) 1388 { 1389 // 1390 // Due to Bug 2815350 we have to take an exception for kernel clients, 1391 // hence we can't use a direct instanceRef check. 1392 // Rather than defaulting to heap based checks, keeping the 1393 // instanceRef check in else as that's what we should only have 1394 // when Bug 2815350 is fixed. 1395 // Both clients are kernel - Force subscription check. No exception needed. 1396 // Only SrcClientKernel - Enforce subscription check in dstClient 1397 // DstClientKernel - Ignore any enforcement as kernel clients are 1398 // allowed to dup without any enforcement 1399 // 1400 1401 if (bSrcClientKernel) 1402 { 1403 // Get memory partition heap from both clients and compare 1404 Heap *pDstClientHeap = NULL; 1405 NV_CHECK_OK_OR_RETURN(LEVEL_WARNING, 1406 kmigmgrGetMemoryPartitionHeapFromDevice(pDstGpu, pDstKernelMIGManager, 1407 pDstDevice, &pDstClientHeap)); 1408 1409 // Make sure memory is coming from same heaps 1410 if (pDstClientHeap != pMemory->pHeap) 1411 { 1412 NV_PRINTF(LEVEL_WARNING, 1413 "Duping outside of GPU instance not allowed with MIG\n"); 1414 return NV_ERR_NOT_SUPPORTED; 1415 } 1416 } 1417 else 1418 { 1419 // 1420 // Due to Bug 2815350 we have to take an exception for kernel clients, 1421 // hence we can't use a direct instanceRef check 1422 // 1423 MIG_INSTANCE_REF srcInstRef; 1424 MIG_INSTANCE_REF dstInstRef; 1425 RsResourceRef *pSrcDeviceRef; 1426 Device *pSrcDevice; 1427 1428 NV_ASSERT_OK_OR_RETURN( 1429 refFindAncestorOfType(RES_GET_REF(pMemory), classId(Device), &pSrcDeviceRef)); 1430 1431 pSrcDevice = dynamicCast(pSrcDeviceRef->pResource, Device); 1432 1433 // Check instance subscription of source and destination clients 1434 NV_CHECK_OK_OR_RETURN(LEVEL_WARNING, 1435 kmigmgrGetInstanceRefFromDevice(pSrcGpu, pSrcKernelMIGManager, 1436 pSrcDevice, &srcInstRef)); 1437 NV_CHECK_OK_OR_RETURN(LEVEL_WARNING, 1438 kmigmgrGetInstanceRefFromDevice(pDstGpu, pDstKernelMIGManager, 1439 pDstDevice, &dstInstRef)); 1440 1441 // 1442 // Memory duping is allowed accross compute instances. so ignore 1443 // compute instance differences 1444 // 1445 srcInstRef = kmigmgrMakeGIReference(srcInstRef.pKernelMIGGpuInstance); 1446 dstInstRef = kmigmgrMakeGIReference(dstInstRef.pKernelMIGGpuInstance); 1447 if (!kmigmgrAreMIGReferencesSame(&srcInstRef, &dstInstRef)) 1448 { 1449 NV_PRINTF(LEVEL_WARNING, 1450 "GPU instance subscription differ between Source and Destination clients\n"); 1451 return NV_ERR_NOT_SUPPORTED; 1452 } 1453 } 1454 } 1455 1456 return NV_OK; 1457 } 1458