1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "mem_mgr/virtual_mem.h" 25 #include "mem_mgr/vaspace.h" 26 #include "gpu/mem_mgr/virt_mem_allocator.h" 27 #include "virtualization/hypervisor/hypervisor.h" 28 #include "vgpu/rpc.h" 29 #include "gpu/mem_mgr/mem_desc.h" 30 #include "mem_mgr/mem.h" 31 #include "gpu/mem_mgr/mem_mgr.h" 32 #include "core/locks.h" 33 #include "kernel/gpu/rc/kernel_rc.h" 34 #include "Nvcm.h" 35 #include "gpu/mem_mgr/vaspace_api.h" 36 #include "gpu/mem_mgr/mem_utils.h" 37 #include "gpu/bus/kern_bus.h" 38 #include "gpu/bus/p2p_api.h" 39 #include "mem_mgr/gpu_vaspace.h" 40 41 #include "class/cl0070.h" // NV01_MEMORY_VIRTUAL 42 #include "class/cl50a0.h" // NV50_MEMORY_VIRTUAL 43 44 static void _virtmemFreeKernelMapping(OBJGPU *, CLI_DMA_MAPPING_INFO *); 45 46 /*! 47 * _virtmemQueryVirtAllocParams 48 * 49 * @brief 50 * Queries for the actual size of VA allocation, alignment 51 * and mask of page sizes (needed for page table allocation) 52 * 53 * @param[in] pGpu OBJGPU pointer 54 * @param[in] hClient Client handle 55 * @param[in] hDevice Device handle 56 * @param[in] pAllocData Pointer to VIDHEAP_ALLOC_DATA 57 * @param[out] pAlign Alignment 58 * @param[out] pSize Size of allocation 59 * @param[out] ppVAS Virtual address space for request 60 * @param[out] pPageSizeLockMask Mask of page sizes locked during VA reservation 61 * 62 * @returns 63 * NV_OK 64 */ 65 static NV_STATUS 66 _virtmemQueryVirtAllocParams 67 ( 68 OBJGPU *pGpu, 69 NvHandle hClient, 70 NvHandle hDevice, 71 NV_MEMORY_ALLOCATION_PARAMS *pAllocData, 72 NvU64 *pAlign, 73 NvU64 *pSize, 74 OBJVASPACE **ppVAS, 75 NvU64 *pPageSizeLockMask 76 ) 77 { 78 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 79 RsClient *pClient; 80 FB_ALLOC_INFO *pFbAllocInfo = NULL; 81 FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat = NULL; 82 NV_STATUS status = NV_OK; 83 NvBool bReleaseGpuLock = NV_FALSE; 84 85 pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO)); 86 if (pFbAllocInfo == NULL) 87 { 88 NV_ASSERT(0); 89 status = NV_ERR_NO_MEMORY; 90 goto done; 91 } 92 93 pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT)); 94 if (pFbAllocPageFormat == NULL) { 95 NV_ASSERT(0); 96 status = NV_ERR_NO_MEMORY; 97 goto done; 98 } 99 100 portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO)); 101 portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT)); 102 pFbAllocInfo->pageFormat = pFbAllocPageFormat; 103 104 pFbAllocInfo->pageFormat->attr = pAllocData->attr; 105 pFbAllocInfo->pageFormat->attr2 = pAllocData->attr2; 106 pFbAllocInfo->pageFormat->flags = pAllocData->flags; 107 *pSize = pAllocData->size; 108 *pAlign = pAllocData->alignment; 109 110 // LOCK: acquire device lock 111 if (!rmDeviceGpuLockIsOwner(gpuGetInstance(pGpu))) 112 { 113 NV_ASSERT_OK_OR_GOTO(status, rmDeviceGpuLocksAcquire(pGpu, GPUS_LOCK_FLAGS_NONE, 114 RM_LOCK_MODULES_MEM_PMA), done); 115 bReleaseGpuLock = NV_TRUE; 116 } 117 118 *pPageSizeLockMask = stdmemQueryPageSize(pMemoryManager, hClient, 119 pAllocData); 120 if (*pPageSizeLockMask == 0) 121 { 122 status = NV_ERR_INVALID_STATE; 123 goto done; 124 } 125 126 NV_ASSERT_OK_OR_GOTO(status, 127 serverGetClientUnderLock(&g_resServ, hClient, &pClient), 128 done); 129 130 NV_ASSERT_OK_OR_GOTO(status, 131 vaspaceGetByHandleOrDeviceDefault(pClient, hDevice, pAllocData->hVASpace, ppVAS), 132 done); 133 134 NV_ASSERT_OK_OR_GOTO(status, 135 vaspaceApplyDefaultAlignment(*ppVAS, pFbAllocInfo, pAlign, pSize, pPageSizeLockMask), 136 done); 137 138 done: 139 if (bReleaseGpuLock) 140 { 141 // UNLOCK: release device lock 142 rmDeviceGpuLocksRelease(pGpu, GPUS_LOCK_FLAGS_NONE, NULL); 143 } 144 145 portMemFree(pFbAllocPageFormat); 146 portMemFree(pFbAllocInfo); 147 148 return status; 149 } 150 151 /*! 152 * @brief Handle copy construction for VirtualMemory object 153 */ 154 static NV_STATUS 155 _virtmemCopyConstruct 156 ( 157 VirtualMemory *pDstVirtualMemory, 158 CALL_CONTEXT *pCallContext, 159 RS_RES_ALLOC_PARAMS_INTERNAL *pParams 160 ) 161 { 162 RsClient *pDstClient = pCallContext->pClient; 163 RsClient *pSrcClient = pParams->pSrcClient; 164 RsResourceRef *pSrcRef = pParams->pSrcRef; 165 VirtualMemory *pSrcVirtualMemory = dynamicCast(pSrcRef->pResource, VirtualMemory); 166 Memory *pDstMemory = staticCast(pDstVirtualMemory, Memory); 167 Memory *pSrcMemory = staticCast(pSrcVirtualMemory, Memory); 168 OBJGPU *pSrcGpu = pSrcMemory->pGpu; 169 OBJVASPACE *pVASSrc = NULL; 170 NvBool bIncAllocRefCnt = NV_FALSE; 171 172 // Special handling for Dup of the FLA VASpace 173 if (pSrcVirtualMemory->bFlaVAS) 174 { 175 Device *pDstDevice; 176 RsClient *pFlaClient; 177 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL); 178 NvHandle hImportedVASpace = NV01_NULL_OBJECT; 179 RsResourceRef *pDupedVasRef; 180 181 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 182 serverGetClientUnderLock(&g_resServ, GPU_GET_KERNEL_BUS(pSrcGpu)->flaInfo.hClient, &pFlaClient)); 183 184 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 185 vaspaceGetByHandleOrDeviceDefault(pFlaClient, 186 RES_GET_HANDLE(pSrcMemory->pDevice), 187 GPU_GET_KERNEL_BUS(pSrcGpu)->flaInfo.hFlaVASpace, 188 &pVASSrc)); 189 190 // 191 // FLA Memory can be duped during import stage and the importing client which might not be the 192 // same as exporting client. Also the importing client might not also bind to the the exporting FLA 193 // VASpace on the exporting device. In that case, we might see leaks in the exporting FLA VASpace. 194 // To avoid those scenarios, we are duping the FLA VAS to the importing client under the exporting device. 195 // RS-TODO: Bug 3059751 to track the duped VAS as dependant in ResServer 196 // 197 NV_ASSERT_OK_OR_RETURN(deviceGetByGpu(pDstClient, pSrcGpu, NV_TRUE, &pDstDevice)); 198 199 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 200 pRmApi->DupObject(pRmApi, 201 pDstClient->hClient, 202 RES_GET_HANDLE(pDstDevice), 203 &hImportedVASpace, 204 GPU_GET_KERNEL_BUS(pSrcGpu)->flaInfo.hClient, 205 GPU_GET_KERNEL_BUS(pSrcGpu)->flaInfo.hFlaVASpace, 206 0)); 207 208 if (clientGetResourceRef(pDstClient, hImportedVASpace, &pDupedVasRef) == NV_OK) 209 refAddDependant(pDupedVasRef, RES_GET_REF(pDstVirtualMemory)); 210 211 pDstVirtualMemory->hVASpace = hImportedVASpace; 212 213 // Increase refcount if locally managed 214 bIncAllocRefCnt = !pSrcMemory->bRpcAlloc; 215 } 216 else if (pSrcVirtualMemory->hVASpace == NV_MEMORY_VIRTUAL_SYSMEM_DYNAMIC_HVASPACE) 217 { 218 // A legacy sysmem dynamic object does not have valid hVASpace 219 pDstVirtualMemory->hVASpace = NV_MEMORY_VIRTUAL_SYSMEM_DYNAMIC_HVASPACE; 220 221 // No VASPACE is update 222 bIncAllocRefCnt = NV_FALSE; 223 } 224 else 225 { 226 OBJVASPACE *pVASDst = NULL; 227 228 NV_CHECK_OK_OR_RETURN(LEVEL_SILENT, 229 vaspaceGetByHandleOrDeviceDefault(pSrcClient, 230 RES_GET_HANDLE(pSrcMemory->pDevice), 231 pSrcVirtualMemory->hVASpace, &pVASSrc)); 232 NV_CHECK_OK_OR_RETURN(LEVEL_SILENT, 233 vaspaceGetByHandleOrDeviceDefault(pDstClient, 234 RES_GET_HANDLE(pDstMemory->pDevice), 235 NV01_NULL_OBJECT, &pVASDst)); 236 if (pVASSrc != pVASDst) 237 { 238 return NV_ERR_INVALID_DEVICE; 239 } 240 241 pDstVirtualMemory->hVASpace = NV01_NULL_OBJECT; 242 243 // Increase refcount for locally managed NV50_MEMORY_VIRTUAL 244 bIncAllocRefCnt = pSrcVirtualMemory->bReserveVaOnAlloc && !pSrcMemory->bRpcAlloc; 245 } 246 247 pDstVirtualMemory->bAllowUnicastMapping = pSrcVirtualMemory->bAllowUnicastMapping; 248 pDstVirtualMemory->bReserveVaOnAlloc = pSrcVirtualMemory->bReserveVaOnAlloc; 249 pDstVirtualMemory->bFlaVAS = pSrcVirtualMemory->bFlaVAS; 250 251 // Mappings do not follow virtual memory object 252 pDstVirtualMemory->pDmaMappingList = NULL; 253 254 if (bIncAllocRefCnt) 255 { 256 NvU64 vaddr; 257 NvU64 size; 258 259 virtmemGetAddressAndSize(pSrcVirtualMemory, &vaddr, &size); 260 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, 261 vaspaceIncAllocRefCnt(pVASSrc, vaddr)); 262 } 263 264 return NV_OK; 265 } 266 267 /*! 268 * virtmemConstruct 269 * 270 * @brief 271 * This routine provides common allocation services used by the 272 * following heap allocation functions: 273 * NVOS32_FUNCTION_ALLOC_SIZE 274 * NVOS32_FUNCTION_ALLOC_SIZE_RANGE 275 * NVOS32_FUNCTION_ALLOC_TILED_PITCH_HEIGHT 276 * 277 * @param[in] pVirtualMemory Pointer to VirtualMemory object 278 * @param[in] pCallContext Pointer to the current CALL_CONTEXT. 279 * @param[in] pParams Pointer to the alloc params 280 * 281 * @return 'NV_OK' 282 * Operation completed successfully. 283 * @return 'NV_ERR_NO_MEMORY' 284 * There is not enough available memory to satisfy allocation request. 285 * @return 'NV_ERR_INSUFFICIENT_RESOURCES' 286 * Not enough available resources to satisfy allocation request. 287 */ 288 NV_STATUS 289 virtmemConstruct_IMPL 290 ( 291 VirtualMemory *pVirtualMemory, 292 CALL_CONTEXT *pCallContext, 293 RS_RES_ALLOC_PARAMS_INTERNAL *pParams 294 ) 295 { 296 Memory *pMemory = staticCast(pVirtualMemory, Memory); 297 NV_MEMORY_ALLOCATION_PARAMS *pAllocData = pParams->pAllocParams; 298 MEMORY_ALLOCATION_REQUEST allocRequest = {0}; 299 MEMORY_ALLOCATION_REQUEST *pAllocRequest = &allocRequest; 300 OBJGPU *pGpu = pMemory->pGpu; 301 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 302 OBJVASPACE *pVAS = NULL; 303 HWRESOURCE_INFO hwResource; 304 RsClient *pRsClient = pCallContext->pClient; 305 RsResourceRef *pResourceRef = pCallContext->pResourceRef; 306 RsResourceRef *pVASpaceRef = NULL; 307 NvU32 gpuCacheAttrib; 308 NV_STATUS status = NV_OK; 309 NvHandle hClient = pCallContext->pClient->hClient; 310 NvHandle hParent = pCallContext->pResourceRef->pParentRef->hResource; 311 NvU64 sizeOut; 312 NvU64 offsetOut; 313 NvBool bLockAcquired = NV_FALSE; 314 NvU32 attr = 0; 315 NvU32 attr2 = 0; 316 NvBool bRpcAlloc = NV_FALSE; 317 NvBool bResAllocated = NV_FALSE; 318 NvU32 gpuMask = 0; 319 NvU32 gpuMaskInitial = 0; 320 FB_ALLOC_INFO *pFbAllocInfo = NULL; 321 FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat = NULL; 322 323 // Bulk of copy-construction is done by Memory class. Handle our members. 324 if (RS_IS_COPY_CTOR(pParams)) 325 { 326 if (!rmGpuGroupLockIsOwner(pGpu->gpuInstance, GPU_LOCK_GRP_ALL, &gpuMask)) 327 { 328 // 329 // If we hold some GPU locks already then acquiring more GPU locks 330 // may violate lock ordering and cause dead-lock. To avoid dead-lock in this case, 331 // attempt to take the locks with a conditional acquire. 332 // 333 gpuMaskInitial = rmGpuLocksGetOwnedMask(); 334 NvU32 lockFlag = (gpuMaskInitial == 0) 335 ? GPUS_LOCK_FLAGS_NONE 336 : GPU_LOCK_FLAGS_COND_ACQUIRE; 337 338 NV_ASSERT_OK_OR_RETURN(rmGpuGroupLockAcquire(pGpu->gpuInstance, 339 GPU_LOCK_GRP_ALL, 340 lockFlag, 341 RM_LOCK_MODULES_MEM, 342 &gpuMask)); 343 344 bLockAcquired = NV_TRUE; 345 } 346 347 status = _virtmemCopyConstruct(pVirtualMemory, pCallContext, pParams); 348 349 if (bLockAcquired) 350 { 351 bLockAcquired = NV_FALSE; 352 rmGpuGroupLockRelease(gpuMask & (~gpuMaskInitial), GPUS_LOCK_FLAGS_NONE); 353 } 354 355 goto done; 356 } 357 358 pVirtualMemory->hVASpace = RM_INVALID_VASPACE_HANDLE; 359 pVirtualMemory->bAllowUnicastMapping = NV_FALSE; 360 pVirtualMemory->bReserveVaOnAlloc = NV_FALSE; 361 pVirtualMemory->bFlaVAS = NV_FALSE; 362 pVirtualMemory->pDmaMappingList = NULL; 363 364 // NV01_MEMORY_VIRTUAL does not allocate typed memory from the heap 365 if (pParams->externalClassId == NV01_MEMORY_VIRTUAL) 366 return NV_OK; 367 368 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, stdmemValidateParams(pGpu, hClient, pAllocData)); 369 NV_CHECK_OR_RETURN(LEVEL_ERROR, pAllocData->flags & NVOS32_ALLOC_FLAGS_VIRTUAL, NV_ERR_INVALID_ARGUMENT); 370 371 stdmemDumpInputAllocParams(pAllocData, pCallContext); 372 373 pAllocRequest->classNum = NV50_MEMORY_VIRTUAL; 374 pAllocRequest->pUserParams = pAllocData; 375 pAllocRequest->hMemory = pResourceRef->hResource; 376 pAllocRequest->hClient = hClient; 377 pAllocRequest->hParent = hParent; 378 pAllocRequest->pGpu = pGpu; 379 pAllocRequest->internalflags = NVOS32_ALLOC_INTERNAL_FLAGS_CLIENTALLOC; 380 pAllocRequest->pHwResource = &hwResource; 381 382 // Unsure if we need to keep separate copies, but keeping old behavior for now. 383 sizeOut = pAllocData->size; 384 offsetOut = pAllocData->offset; 385 386 // 387 // Reserve memory for page tables in case of non lazy page table 388 // allocations. 389 // 390 // PageLevelMemReserve will reserve only if the PDB property for 391 // client managed page tables is set. 392 // 393 if (memmgrIsPmaInitialized(pMemoryManager) && 394 !(pAllocData->flags & NVOS32_ALLOC_FLAGS_LAZY) && 395 !(pAllocData->flags & NVOS32_ALLOC_FLAGS_EXTERNALLY_MANAGED)) 396 { 397 NvU64 size; 398 NvU64 align; 399 NvU64 pageSizeLockMask; 400 401 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY) 402 403 pAllocRequest->pGpu = pGpu; 404 size = 0; 405 align = 0; 406 pageSizeLockMask = 0; 407 408 status = _virtmemQueryVirtAllocParams(pGpu, hClient, hParent, 409 pAllocData, &align, &size, 410 &pVAS, &pageSizeLockMask); 411 if (NV_OK != status) 412 SLI_LOOP_GOTO(done); 413 414 status = vaspaceReserveMempool(pVAS, pGpu, hClient, 415 size, pageSizeLockMask, 416 VASPACE_RESERVE_FLAGS_NONE); 417 if (NV_OK != status) 418 SLI_LOOP_GOTO(done); 419 420 SLI_LOOP_END; 421 } 422 423 if (RMCFG_FEATURE_RM_BASIC_LOCK_MODEL) 424 { 425 // 426 // Can't move locking up as PMA locks need to be taken first. 427 // Acquire the lock *only after* PMA is done allocating. 428 // 429 if (!rmDeviceGpuLockIsOwner(pGpu->gpuInstance) && !rmGpuLockIsOwner()) 430 { 431 NV_ASSERT_OK_OR_GOTO(status, 432 rmDeviceGpuLocksAcquire(pGpu, GPUS_LOCK_FLAGS_NONE, 433 RM_LOCK_MODULES_MEM), 434 done); 435 436 bLockAcquired = NV_TRUE; 437 } 438 } 439 440 { 441 // 442 // If using thwap to generate an allocation failure here, fail the 443 // alloc right away. 444 // 445 KernelRc *pKernelRc = GPU_GET_KERNEL_RC(pGpu); 446 if (pKernelRc != NULL && 447 !krcTestAllowAlloc(pGpu, pKernelRc, 448 NV_ROBUST_CHANNEL_ALLOCFAIL_HEAP)) 449 { 450 status = NV_ERR_INSUFFICIENT_RESOURCES; 451 goto done; 452 } 453 } 454 455 // Validate virtual address space 456 NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT, 457 vaspaceGetByHandleOrDeviceDefault(pRsClient, hParent, pAllocData->hVASpace, &pVAS), 458 done); 459 460 pVirtualMemory->bFlaVAS = !!(vaspaceGetFlags(pVAS) & VASPACE_FLAGS_FLA); 461 pVirtualMemory->bOptimizePageTableMempoolUsage = 462 !!(vaspaceGetFlags(pVAS) & VASPACE_FLAGS_OPTIMIZE_PTETABLE_MEMPOOL_USAGE); 463 464 if (IS_VIRTUAL(pGpu) || IS_GSP_CLIENT(pGpu)) 465 { 466 NvBool bSriovFull = IS_VIRTUAL_WITH_SRIOV(pGpu) && 467 !gpuIsWarBug200577889SriovHeavyEnabled(pGpu); 468 NvBool bBar1VAS = !!(vaspaceGetFlags(pVAS) & VASPACE_FLAGS_BAR_BAR1); 469 470 // 471 // Skip RPC to the Host RM when local RM is managing page tables. Special case 472 // for early SR-IOV that only manages BAR1 and FLA page tables in the guest. 473 // 474 bRpcAlloc = !(gpuIsSplitVasManagementServerClientRmEnabled(pGpu) || 475 (bSriovFull && (bBar1VAS || pVirtualMemory->bFlaVAS))); 476 } 477 478 if (bRpcAlloc) 479 { 480 NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT, 481 memdescCreate(&pAllocRequest->pMemDesc, pGpu, 482 pAllocRequest->pUserParams->size, 0, NV_TRUE, 483 ADDR_VIRTUAL, 484 NV_MEMORY_UNCACHED, 485 MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE), 486 done); 487 } 488 else 489 { 490 pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO)); 491 NV_ASSERT_TRUE_OR_GOTO(status, pFbAllocInfo != NULL, NV_ERR_NO_MEMORY, done); 492 493 pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT)); 494 NV_ASSERT_TRUE_OR_GOTO(status, pFbAllocPageFormat != NULL, NV_ERR_NO_MEMORY, done); 495 496 portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO)); 497 portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT)); 498 pFbAllocInfo->pageFormat = pFbAllocPageFormat; 499 500 memUtilsInitFBAllocInfo(pAllocRequest->pUserParams, pFbAllocInfo, hClient, hParent); 501 502 // Call memmgr to get memory. 503 NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT, 504 memmgrAllocResources(pGpu, pMemoryManager, pAllocRequest, pFbAllocInfo), 505 done); 506 507 NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT, 508 virtmemAllocResources(pGpu, pMemoryManager, pAllocRequest, pFbAllocInfo), 509 done); 510 511 bResAllocated = NV_TRUE; 512 } 513 514 NV_ASSERT(pAllocRequest->pMemDesc != NULL); 515 516 // Copy final heap size/offset back to client struct 517 // 518 // What should we return ?. System or the Device physical address. 519 // Return the Device physical address for now. 520 // May change with the heap refactoring !. 521 // 522 // System and Device physical address can be got using the nv0041CtrlCmdGetSurfacePhysAttr ctrl call 523 offsetOut = memdescGetPhysAddr(pAllocRequest->pMemDesc, AT_GPU, 0); 524 sizeOut = pAllocRequest->pMemDesc->Size; 525 pAllocData->limit = sizeOut - 1; 526 527 // To handle < nv50 528 if (DRF_VAL(OS32, _ATTR2, _GPU_CACHEABLE, pAllocData->attr2) == 529 NVOS32_ATTR2_GPU_CACHEABLE_DEFAULT) 530 { 531 pAllocData->attr2 = FLD_SET_DRF(OS32, _ATTR2, _GPU_CACHEABLE, _NO, 532 pAllocData->attr2); 533 } 534 535 if (DRF_VAL(OS32, _ATTR2, _GPU_CACHEABLE, pAllocData->attr2) == 536 NVOS32_ATTR2_GPU_CACHEABLE_YES) 537 { 538 gpuCacheAttrib = NV_MEMORY_CACHED; 539 } 540 else 541 { 542 gpuCacheAttrib = NV_MEMORY_UNCACHED; 543 } 544 545 // 546 // Issue RPC if page tables are managed in the Host/GSP RM. This depends on 547 // the type object we have and the VGPU/GSP mode. We issue this prior to 548 // as memConstructCommon as RPC fills in pAllocData->offset. 549 // 550 if (bRpcAlloc) 551 { 552 NV_RM_RPC_ALLOC_VIRTMEM(pGpu, 553 hClient, 554 hParent, 555 pAllocData->hVASpace, 556 pAllocRequest->hMemory, 557 &pAllocData->offset, 558 pAllocRequest->pMemDesc->Size, 559 attr, 560 attr2, 561 pAllocData->type, 562 pAllocData->flags, 563 pAllocData->height, 564 pAllocData->width, 565 pAllocData->format, 566 pAllocData->comprCovg, 567 pAllocData->zcullCovg, 568 pAllocData->rangeLo, 569 pAllocData->rangeHi, 570 pAllocData->alignment, 571 status); 572 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, status, done); 573 574 // Update memory descriptor with results of the RPC 575 memdescDescribe(pAllocRequest->pMemDesc, 576 memdescGetAddressSpace(pAllocRequest->pMemDesc), 577 pAllocData->offset, 578 pAllocRequest->pMemDesc->Size); 579 580 // Assign offset back to caller 581 offsetOut = memdescGetPhysAddr(pAllocRequest->pMemDesc, AT_GPU, 0); 582 } 583 584 // 585 // The idea is to allocate virtual address space and record it (lo, limit) in this mem 586 // object. Later call MapMemoryDma(hThisMem, hSomePhysMem) to back it. 587 // 588 NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT, 589 memConstructCommon(pMemory, pAllocRequest->classNum, pAllocData->flags, 590 pAllocRequest->pMemDesc, pAllocData->owner, NULL, pAllocData->attr, 591 pAllocData->attr2, 0, pAllocData->type, NVOS32_MEM_TAG_NONE, NULL), 592 done); 593 pMemory->bRpcAlloc = bRpcAlloc; 594 595 pVirtualMemory->hVASpace = pAllocData->hVASpace; 596 pVirtualMemory->bReserveVaOnAlloc = NV_TRUE; 597 598 if (pAllocData->hVASpace != NV01_NULL_OBJECT) 599 { 600 NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT, 601 clientGetResourceRef(pRsClient, pAllocData->hVASpace, &pVASpaceRef), 602 done); 603 if (pVASpaceRef != NULL) 604 refAddDependant(pVASpaceRef, pResourceRef); 605 } 606 607 NV_ASSERT(pMemory->pMemDesc); 608 NV_ASSERT(memdescGetAddressSpace(pMemory->pMemDesc) == ADDR_VIRTUAL); 609 memdescSetGpuCacheAttrib(pMemory->pMemDesc, gpuCacheAttrib); 610 611 pAllocData->size = sizeOut; 612 pAllocData->offset = offsetOut; 613 614 stdmemDumpOutputAllocParams(pAllocData); 615 616 done: 617 if (status != NV_OK) 618 { 619 if (pAllocRequest->pMemDesc != NULL) 620 { 621 if (pMemory->pMemDesc != NULL) 622 { 623 memDestructCommon(pMemory); 624 pMemory->pMemDesc = NULL; 625 } 626 627 if (bResAllocated) 628 { 629 memmgrFree(pGpu, pMemoryManager, NULL, 630 hClient, hParent, pAllocData->hVASpace, 631 pAllocData->owner, 632 pAllocRequest->pMemDesc); 633 } 634 635 if (bRpcAlloc) 636 { 637 memdescDestroy(pAllocRequest->pMemDesc); 638 } 639 } 640 // vaspaceReserveMempool allocations are clean up is managed independently 641 } 642 643 portMemFree(pFbAllocPageFormat); 644 portMemFree(pFbAllocInfo); 645 646 if (bLockAcquired) 647 { 648 // UNLOCK: release GPUs lock 649 rmDeviceGpuLocksRelease(pGpu, GPUS_LOCK_FLAGS_NONE, NULL); 650 } 651 652 return status; 653 } 654 655 /*! 656 * @brief Handle destruction of VirtualMemory specific fields 657 */ 658 void 659 virtmemDestruct_IMPL 660 ( 661 VirtualMemory *pVirtualMemory 662 ) 663 { 664 Memory *pMemory = staticCast(pVirtualMemory, Memory); 665 OBJGPU *pGpu = pMemory->pGpu; 666 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 667 NvHandle hClient; 668 NvHandle hParent; 669 NvHandle hVASpace; 670 MEMORY_DESCRIPTOR *pMemDesc; 671 NvU32 heapOwner; 672 NV_STATUS status = NV_OK; 673 674 // Save needed state from memory object before common destruction 675 hClient = RES_GET_CLIENT_HANDLE(pVirtualMemory); 676 hParent = RES_GET_PARENT_HANDLE(pVirtualMemory); 677 hVASpace = pVirtualMemory->hVASpace; 678 pMemDesc = pMemory->pMemDesc; 679 heapOwner = pMemory->HeapOwner; 680 681 NV_ASSERT(pMemDesc); 682 683 memDestructCommon(pMemory); 684 685 // 686 // NV50_MEMORY_VIRTUAL may have underlying heap allocation associated with the object 687 // to free depending on which RM/VGPU context we are in. This is tracked at object 688 // creation time. 689 // 690 // If we RPCed a NV50_MEMORY_VIRTUAL or we have a NV01_MEMORY_VIRTUAL than just destroy 691 // the memdesc and RPC the free if required. 692 // 693 if (pMemory->bRpcAlloc || pMemory->categoryClassId == NV01_MEMORY_VIRTUAL) 694 { 695 NV_ASSERT(pMemDesc->Allocated == 0); 696 memdescDestroy(pMemDesc); 697 } 698 else 699 { 700 NV_ASSERT(heapOwner != 0); 701 702 // Get the relevant information from the client memory info and free it 703 status = memmgrFree(pGpu, 704 pMemoryManager, 705 NULL, 706 hClient, 707 hParent, 708 hVASpace, 709 heapOwner, 710 pMemDesc); 711 if (status != NV_OK) 712 { 713 NV_PRINTF(LEVEL_ERROR, 714 "VirtualMemory memmgrFree failed, client: %x, hVASpace: %x, gpu: %x\n", 715 RES_GET_CLIENT_HANDLE(pVirtualMemory), 716 hVASpace, 717 pGpu->gpuInstance); 718 } 719 } 720 } 721 722 NV_STATUS 723 virtmemAllocResources 724 ( 725 OBJGPU *pGpu, 726 MemoryManager *pMemoryManager, 727 MEMORY_ALLOCATION_REQUEST *pAllocRequest, 728 FB_ALLOC_INFO *pFbAllocInfo 729 ) 730 { 731 NV_STATUS status = NV_OK; 732 MEMORY_DESCRIPTOR *pMemDesc = NULL; 733 RsClient *pRsClient = NULL; 734 NV_MEMORY_ALLOCATION_PARAMS *pVidHeapAlloc = pAllocRequest->pUserParams; 735 NvHandle hVASpace = pVidHeapAlloc->hVASpace; 736 NvBool bAllocedMemDesc = NV_FALSE; 737 NvBool bBar1VA = NV_FALSE; 738 NvBool bFlaVA = NV_FALSE; 739 740 NV_ASSERT(!(pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_WPR1) && !(pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_WPR2)); 741 742 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, memUtilsAllocMemDesc(pGpu, pAllocRequest, pFbAllocInfo, &pMemDesc, NULL, 743 ADDR_VIRTUAL, NV_TRUE, &bAllocedMemDesc), failed); 744 745 // Only a kernel client can request for a protected allocation 746 if (pFbAllocInfo->pageFormat->flags & NVOS32_ALLOC_FLAGS_ALLOCATE_KERNEL_PRIVILEGED) 747 { 748 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 749 RS_PRIV_LEVEL privLevel; 750 751 // 752 // This fn has usescases where call context is unavailable. 753 // In those cases, fall back to cached privileges. 754 // 755 if (pCallContext == NULL) 756 { 757 privLevel = rmclientGetCachedPrivilegeByHandle(pFbAllocInfo->hClient); 758 } 759 else 760 { 761 privLevel = pCallContext->secInfo.privLevel; 762 } 763 764 if ( 765 (privLevel >= RS_PRIV_LEVEL_KERNEL)) 766 { 767 pFbAllocInfo->bIsKernelAlloc = NV_TRUE; 768 } 769 else 770 { 771 NV_PRINTF(LEVEL_ERROR, "NV_ERR_INSUFFICIENT_PERMISSIONS\n"); 772 status = NV_ERR_INSUFFICIENT_PERMISSIONS; 773 goto failed; 774 } 775 } 776 777 // Allocate a virtual surface 778 if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE) 779 pFbAllocInfo->offset = pVidHeapAlloc->offset - pFbAllocInfo->alignPad; 780 781 // 782 // pFbAllocInfo->hClient=0 is sometimes passed and not always needed, 783 // do not immediately fail if this call, only if the client needs to be used. 784 // 785 status = serverGetClientUnderLock(&g_resServ, pFbAllocInfo->hClient, &pRsClient); 786 787 // 788 // vGPU: 789 // 790 // Since vGPU does all real hardware management in the 791 // host, if we are in guest OS (where IS_VIRTUAL(pGpu) is true), 792 // do an RPC to the host to do the hardware update. 793 // In case of SR-IOV, the VAS is managed by the guest. So, no need 794 // to communicate with the host for VA allocation. 795 // 796 if (IS_VIRTUAL_WITH_SRIOV(pGpu)) 797 { 798 OBJVASPACE *pVAS = NULL; 799 800 // Only try this if GetClient succeeded, else pass through the status from its fail. 801 if (pRsClient != NULL) 802 status = vaspaceGetByHandleOrDeviceDefault(pRsClient, pFbAllocInfo->hDevice, hVASpace, &pVAS); 803 if (NV_OK != status) 804 goto failed; 805 806 bBar1VA = !!(vaspaceGetFlags(pVAS) & VASPACE_FLAGS_BAR_BAR1); 807 bFlaVA = !!(vaspaceGetFlags(pVAS) & VASPACE_FLAGS_FLA); 808 } 809 810 // For Virtual FLA allocations, we don't have to RPC 811 if ((!IS_VIRTUAL(pGpu) && !IS_GSP_CLIENT(pGpu)) || 812 bBar1VA || bFlaVA || 813 gpuIsSplitVasManagementServerClientRmEnabled(pGpu)) 814 { 815 OBJVASPACE *pVAS = NULL; 816 OBJGVASPACE *pGVAS = NULL; 817 NvU64 align = pFbAllocInfo->align + 1; 818 VAS_ALLOC_FLAGS flags = {0}; 819 NvU64 pageSizeLockMask = 0; 820 pFbAllocInfo->internalflags = pAllocRequest->internalflags; 821 822 // Only try this if GetClient succeeded, else pass through the status from its fail. 823 if (pRsClient != NULL) 824 status = vaspaceGetByHandleOrDeviceDefault(pRsClient, pFbAllocInfo->hDevice, hVASpace, &pVAS); 825 if (NV_OK != status) 826 goto failed; 827 828 // 829 // Feature requested for RM unlinked SLI: 830 // Clients can pass an allocation flag to the device or VA space constructor 831 // so that mappings and allocations will fail without an explicit address. 832 // 833 pGVAS = dynamicCast(pVAS, OBJGVASPACE); 834 if (pGVAS != NULL) 835 { 836 if ((pGVAS->flags & VASPACE_FLAGS_REQUIRE_FIXED_OFFSET) && 837 !(pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE)) 838 { 839 status = NV_ERR_INVALID_ARGUMENT; 840 NV_PRINTF(LEVEL_ERROR, "The VA space requires all allocations to specify a fixed address\n"); 841 goto failed; 842 } 843 } 844 845 status = vaspaceFillAllocParams(pVAS, pFbAllocInfo, 846 &pFbAllocInfo->size, &align, 847 &pVidHeapAlloc->rangeLo, &pVidHeapAlloc->rangeHi, 848 &pageSizeLockMask, &flags); 849 if (NV_OK != status) 850 { 851 NV_PRINTF(LEVEL_ERROR, "FillAllocParams failed.\n"); 852 DBG_BREAKPOINT(); 853 } 854 else 855 { 856 status = vaspaceAlloc(pVAS, pFbAllocInfo->size, align, 857 pVidHeapAlloc->rangeLo, pVidHeapAlloc->rangeHi, 858 pageSizeLockMask, flags, &pFbAllocInfo->offset); 859 if (NV_OK != status) 860 { 861 NV_PRINTF(LEVEL_ERROR, 862 "VA Space alloc failed! Status Code: 0x%x Size: 0x%llx RangeLo: 0x%llx," 863 " RangeHi: 0x%llx, pageSzLockMask: 0x%llx\n", 864 status, pFbAllocInfo->size, 865 pVidHeapAlloc->rangeLo, pVidHeapAlloc->rangeHi, 866 pageSizeLockMask); 867 status = NV_ERR_INSUFFICIENT_RESOURCES; 868 goto failed; 869 } 870 871 memdescDescribe(pMemDesc, ADDR_VIRTUAL, 872 pFbAllocInfo->offset, 873 pFbAllocInfo->size); 874 875 // Return alignment info. 876 pFbAllocInfo->align = align - 1; 877 pVidHeapAlloc->alignment = align; 878 } 879 } 880 else 881 { 882 // Possibly dead code: IS_VIRTUAL and bSplitVAs are only enabled on legacy vGPU. 883 memdescDescribe(pMemDesc, ADDR_VIRTUAL, memdescGetPte(pMemDesc, AT_GPU, 0), 884 pMemDesc->Size); 885 } 886 887 // 888 // Report default (any) page size for virtual allocations with no page size restriction. 889 // Actual page size will be determined at map time. 890 // 891 if (FLD_TEST_DRF(OS32, _ATTR, _PAGE_SIZE, _DEFAULT, pFbAllocInfo->pageFormat->attr)) 892 { 893 pFbAllocInfo->retAttr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _DEFAULT, pFbAllocInfo->retAttr); 894 } 895 896 // get possibly updated surface attributes 897 pVidHeapAlloc->attr = pFbAllocInfo->retAttr; 898 pVidHeapAlloc->attr2 = pFbAllocInfo->retAttr2; 899 900 // update contiguity attribute to reflect memdesc 901 if (memdescGetContiguity(pAllocRequest->pMemDesc, AT_GPU)) 902 { 903 pVidHeapAlloc->attr = FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY, 904 _CONTIGUOUS, 905 pVidHeapAlloc->attr); 906 } 907 else 908 { 909 pVidHeapAlloc->attr = FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY, 910 _NONCONTIGUOUS, 911 pVidHeapAlloc->attr); 912 } 913 914 pVidHeapAlloc->offset = pFbAllocInfo->offset; 915 916 if (pAllocRequest->pHwResource != NULL) 917 { 918 pAllocRequest->pHwResource->attr = pFbAllocInfo->retAttr; 919 pAllocRequest->pHwResource->attr2 = pFbAllocInfo->retAttr2; 920 pAllocRequest->pHwResource->hwResId = pFbAllocInfo->hwResId; 921 pAllocRequest->pHwResource->comprCovg = pFbAllocInfo->comprCovg; 922 pAllocRequest->pHwResource->ctagOffset = pFbAllocInfo->ctagOffset; 923 pAllocRequest->pHwResource->hwResId = pFbAllocInfo->hwResId; 924 } 925 926 return NV_OK; 927 928 failed: 929 memmgrFreeHwResources(pGpu, pMemoryManager, pFbAllocInfo); 930 931 if (bAllocedMemDesc) 932 { 933 memdescDestroy(pAllocRequest->pMemDesc); 934 pAllocRequest->pMemDesc = NULL; 935 } 936 937 return status; 938 } 939 940 /* 941 * @brief Interface to vaspaceReserveMempool to reserve PMA memory for page tables 942 */ 943 NV_STATUS virtmemReserveMempool_IMPL 944 ( 945 VirtualMemory *pVirtualMemory, 946 OBJGPU *pGpu, 947 NvHandle hDevice, 948 NvU64 size, 949 NvU64 pageSizeMask 950 ) 951 { 952 RsClient *pClient = RES_GET_CLIENT(pVirtualMemory); 953 OBJVASPACE *pVAS = NULL; 954 NvU32 mempoolFlags = VASPACE_RESERVE_FLAGS_NONE; 955 956 // 957 // Reject mappings for a legacy NV01_MEMORY_SYSTEM_DYNAMIC 958 // object silently. 959 // 960 if (pVirtualMemory->hVASpace == NV_MEMORY_VIRTUAL_SYSMEM_DYNAMIC_HVASPACE) 961 { 962 return NV_ERR_INVALID_OBJECT; 963 } 964 965 if (pVirtualMemory->bOptimizePageTableMempoolUsage) 966 { 967 mempoolFlags = VASPACE_RESERVE_FLAGS_ALLOC_UPTO_TARGET_LEVEL_ONLY; 968 } 969 970 NV_ASSERT_OK_OR_RETURN( 971 vaspaceGetByHandleOrDeviceDefault(pClient, hDevice, 972 pVirtualMemory->hVASpace, &pVAS)); 973 974 return vaspaceReserveMempool(pVAS, pGpu, RES_GET_CLIENT_HANDLE(pVirtualMemory), 975 size, pageSizeMask, mempoolFlags); 976 } 977 978 /*! 979 * @brief Does this VirtualMemory object use the specified hVASpace? 980 */ 981 NvBool 982 virtmemMatchesVASpace_IMPL 983 ( 984 VirtualMemory *pVirtualMemory, 985 NvHandle hClient, 986 NvHandle hVASpace 987 ) 988 { 989 return (RES_GET_CLIENT_HANDLE(pVirtualMemory) == hClient) && (pVirtualMemory->hVASpace == hVASpace); 990 } 991 992 /*! 993 * @brief Helper to look up a VirtualMemory object 994 */ 995 NV_STATUS 996 virtmemGetByHandleAndDevice_IMPL 997 ( 998 RsClient *pClient, 999 NvHandle hMemory, 1000 NvHandle hDevice, 1001 VirtualMemory **ppVirtualMemory 1002 ) 1003 { 1004 Memory *pMemory; 1005 1006 NV_CHECK_OK_OR_RETURN(LEVEL_SILENT, 1007 memGetByHandleAndDevice(pClient, hMemory, hDevice, &pMemory)); 1008 1009 *ppVirtualMemory = dynamicCast(pMemory, VirtualMemory); 1010 1011 return (*ppVirtualMemory != NULL) ? NV_OK : NV_ERR_INVALID_OBJECT_HANDLE; 1012 } 1013 1014 /*! 1015 * @brief Create a CPU mapping in addition to the DMA mapping 1016 */ 1017 static NV_STATUS 1018 _virtmemAllocKernelMapping 1019 ( 1020 OBJGPU *pGpu, 1021 OBJVASPACE *pVAS, 1022 CLI_DMA_MAPPING_INFO *pDmaMappingInfo, 1023 NvU64 offset, 1024 NvU64 size, 1025 Memory *pMemoryInfo 1026 ) 1027 { 1028 NV_STATUS status = NV_OK; 1029 NvBool bCoherentCpuMapping = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING); 1030 NvU32 gpuSubDevInst; 1031 RmPhysAddr bar1PhysAddr; 1032 1033 SLI_LOOP_START(SLI_LOOP_FLAGS_NONE) 1034 1035 gpuSubDevInst = gpumgrGetSubDeviceInstanceFromGpu(pGpu); 1036 if (bCoherentCpuMapping) 1037 { 1038 // Use a temp pointer to prevent overwriting the previous pointer by accident 1039 NvP64 tempCpuPtr = NvP64_NULL; 1040 MEMORY_DESCRIPTOR *pMemDesc = memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu); 1041 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 1042 1043 NV_PRINTF(LEVEL_INFO, 1044 "Allocating coherent link mapping. length=%lld, memDesc->size=%lld\n", 1045 size, pDmaMappingInfo->pMemDesc->Size); 1046 1047 NV_ASSERT(pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED)); 1048 NV_ASSERT(pDmaMappingInfo->pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS); 1049 1050 tempCpuPtr = kbusMapCoherentCpuMapping_HAL(pGpu, pKernelBus, pMemDesc); 1051 if (tempCpuPtr == NULL) 1052 { 1053 status = NV_ERR_GENERIC; 1054 } 1055 else 1056 { 1057 status = NV_OK; 1058 tempCpuPtr = NvP64_PLUS_OFFSET(tempCpuPtr, offset); 1059 } 1060 1061 pDmaMappingInfo->KernelVAddr[gpuSubDevInst] = NvP64_VALUE(tempCpuPtr); 1062 } 1063 else 1064 { 1065 // 1066 // Allocate GPU virtual address space for the video memory region 1067 // for those GPUs that support it. 1068 // 1069 pDmaMappingInfo->FbApertureLen[gpuSubDevInst] = pDmaMappingInfo->pMemDesc->Size; 1070 if (RMCFG_FEATURE_PLATFORM_GSP) 1071 { 1072 status = osMapSystemMemory(pMemoryInfo->pMemDesc, 1073 offset, 1074 pDmaMappingInfo->pMemDesc->Size, 1075 NV_TRUE /*Kernel*/, 1076 NV_PROTECT_READ_WRITE, 1077 (NvP64 *) &pDmaMappingInfo->KernelVAddr[gpuSubDevInst], 1078 (NvP64 *) &pDmaMappingInfo->KernelPriv); 1079 1080 if (status != NV_OK) 1081 { 1082 pDmaMappingInfo->FbApertureLen[gpuSubDevInst] = 0; 1083 pDmaMappingInfo->FbAperture[gpuSubDevInst] = 0; 1084 pDmaMappingInfo->KernelPriv = 0; 1085 SLI_LOOP_BREAK; 1086 } 1087 } 1088 else 1089 { 1090 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 1091 NvHandle hClient = NV01_NULL_OBJECT; 1092 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 1093 if ((pCallContext != NULL) && (pCallContext->pClient != NULL)) 1094 { 1095 hClient = pCallContext->pClient->hClient; 1096 } 1097 1098 status = kbusMapFbAperture_HAL(pGpu, pKernelBus, 1099 pMemoryInfo->pMemDesc, offset, 1100 &pDmaMappingInfo->FbAperture[gpuSubDevInst], 1101 &pDmaMappingInfo->FbApertureLen[gpuSubDevInst], 1102 BUS_MAP_FB_FLAGS_MAP_UNICAST, hClient); 1103 1104 if (status != NV_OK) 1105 { 1106 pDmaMappingInfo->FbApertureLen[gpuSubDevInst] = 0; 1107 pDmaMappingInfo->FbAperture[gpuSubDevInst] = 0; 1108 SLI_LOOP_BREAK; 1109 } 1110 1111 bar1PhysAddr = gpumgrGetGpuPhysFbAddr(pGpu) + pDmaMappingInfo->FbAperture[gpuSubDevInst]; 1112 status = osMapPciMemoryKernelOld(pGpu, bar1PhysAddr, 1113 pDmaMappingInfo->pMemDesc->Size, 1114 NV_PROTECT_READ_WRITE, 1115 &pDmaMappingInfo->KernelVAddr[gpuSubDevInst], 1116 NV_MEMORY_WRITECOMBINED); 1117 } 1118 } 1119 1120 if (status != NV_OK) 1121 { 1122 SLI_LOOP_BREAK; 1123 } 1124 1125 SLI_LOOP_END 1126 1127 if (status != NV_OK) 1128 { 1129 _virtmemFreeKernelMapping(pGpu, pDmaMappingInfo); 1130 } 1131 1132 return status; 1133 } 1134 /*! 1135 * @brief Free CPU mapping 1136 */ 1137 static void 1138 _virtmemFreeKernelMapping 1139 ( 1140 OBJGPU *pGpu, 1141 CLI_DMA_MAPPING_INFO *pDmaMappingInfo 1142 ) 1143 { 1144 NvU32 gpuSubDevInst; 1145 NvBool bCoherentCpuMapping = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING); 1146 1147 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY) 1148 1149 gpuSubDevInst = gpumgrGetSubDeviceInstanceFromGpu(pGpu); 1150 1151 // Unmap a kernel CPU mapping if one exists 1152 if (pDmaMappingInfo->KernelVAddr[gpuSubDevInst] != NULL) 1153 { 1154 if (bCoherentCpuMapping) 1155 { 1156 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 1157 MEMORY_DESCRIPTOR *pMemDesc = memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu); 1158 kbusUnmapCoherentCpuMapping_HAL(pGpu, pKernelBus, pMemDesc); 1159 } 1160 else 1161 { 1162 osUnmapPciMemoryKernelOld(pGpu, pDmaMappingInfo->KernelVAddr[gpuSubDevInst]); 1163 } 1164 1165 pDmaMappingInfo->KernelVAddr[gpuSubDevInst] = NULL; 1166 } 1167 1168 // Unmap the FB aperture mapping if one exists 1169 if ((pDmaMappingInfo->FbApertureLen[gpuSubDevInst]) && (!bCoherentCpuMapping)) 1170 { 1171 if (RMCFG_FEATURE_PLATFORM_GSP) 1172 { 1173 // This is a no-op in GSP, but document it here as code in case it changes. 1174 osUnmapSystemMemory(pDmaMappingInfo->pMemDesc, 1175 NV_TRUE /*Kernel*/, 1176 0 /*ProcessId*/, 1177 (NvP64)pDmaMappingInfo->FbAperture[gpuSubDevInst], 1178 NV_PTR_TO_NvP64(pDmaMappingInfo->KernelPriv)); 1179 } 1180 else 1181 { 1182 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu); 1183 kbusUnmapFbAperture_HAL(pGpu, 1184 pKernelBus, 1185 pDmaMappingInfo->pMemDesc, 1186 pDmaMappingInfo->FbAperture[gpuSubDevInst], 1187 pDmaMappingInfo->FbApertureLen[gpuSubDevInst], 1188 BUS_MAP_FB_FLAGS_MAP_UNICAST); 1189 } 1190 pDmaMappingInfo->FbAperture[gpuSubDevInst] = 0; 1191 pDmaMappingInfo->FbApertureLen[gpuSubDevInst] = 0; 1192 pDmaMappingInfo->KernelPriv = 0; 1193 } 1194 1195 SLI_LOOP_END 1196 } 1197 1198 /*! 1199 * @brief Map an object into a VirtualMemory object 1200 */ 1201 NV_STATUS 1202 virtmemMapTo_IMPL 1203 ( 1204 VirtualMemory *pVirtualMemory, 1205 RS_RES_MAP_TO_PARAMS *pParams 1206 ) 1207 { 1208 NV_STATUS status = NV_ERR_NOT_SUPPORTED; 1209 Memory *pMemory = staticCast(pVirtualMemory, Memory); 1210 OBJGPU *pGpu = pParams->pGpu; 1211 OBJGPU *pSrcGpu = pParams->pSrcGpu; 1212 RsClient *pClient = RES_GET_CLIENT(pVirtualMemory); 1213 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu); 1214 RsResourceRef *pMemoryRef = pParams->pMemoryRef; 1215 NvHandle hClient = pClient->hClient; 1216 NvHandle hBroadcastDevice = pParams->hBroadcastDevice; 1217 NvHandle hVirtualMem = RES_GET_HANDLE(pVirtualMemory); 1218 NvHandle hMemoryDevice = pParams->hMemoryDevice; 1219 NvU32 gpuMask = pParams->gpuMask; 1220 NvU64 offset = pParams->offset; // offset into pMemoryRef to map 1221 NvU64 length = pParams->length; 1222 NvU32 flags = pParams->flags; 1223 NvU32 p2p = DRF_VAL(OS46, _FLAGS, _P2P_ENABLE, pParams->flags); 1224 1225 VirtMemAllocator *pDma = GPU_GET_DMA(pGpu); 1226 MEMORY_DESCRIPTOR *pSrcMemDesc = pParams->pSrcMemDesc; 1227 NvU64 *pDmaOffset = pParams->pDmaOffset; // return VirtualMemory offset 1228 CLI_DMA_MAPPING_INFO *pDmaMappingInfo = NULL; 1229 OBJVASPACE *pVas = NULL; 1230 Memory *pSrcMemory = dynamicCast(pMemoryRef->pResource, Memory); 1231 1232 NvU32 tgtAddressSpace = ADDR_UNKNOWN; 1233 1234 NvBool bDmaMapNeeded = pParams->bDmaMapNeeded; 1235 NvBool bDmaMapped = NV_FALSE; 1236 NvBool bDmaMappingRegistered = NV_FALSE; 1237 NvBool bFlaMapping = pParams->bFlaMapping; 1238 NvBool bIsIndirectPeer = NV_FALSE; 1239 NvBool bEncrypted; 1240 NvBool bIsSysmem = NV_FALSE; 1241 NvBool bBar1P2P = (p2p && kbusHasPcieBar1P2PMapping_HAL(pGpu, 1242 GPU_GET_KERNEL_BUS(pGpu), 1243 pSrcGpu, 1244 GPU_GET_KERNEL_BUS(pSrcGpu))); 1245 1246 // 1247 // Allow unicast on NV01_MEMORY_VIRTUAL object, but maintain the broadcast 1248 // requirement for NV50_MEMORY_VIRTUAL. 1249 // 1250 if (pParams->bSubdeviceHandleProvided && !pVirtualMemory->bAllowUnicastMapping) 1251 { 1252 NV_PRINTF(LEVEL_ERROR, "Unicast mappings into virtual memory object not supported.\n"); 1253 return NV_ERR_NOT_SUPPORTED; 1254 } 1255 1256 status = vaspaceGetByHandleOrDeviceDefault(pClient, hBroadcastDevice, pVirtualMemory->hVASpace, &pVas); 1257 if (status != NV_OK) 1258 return status; 1259 1260 // 1261 // Use the encryption setting of the virtual allocation. 1262 // This makes sense, since the same physical mem descriptor could have 1263 // more than one mapping, each with different encryption settings. 1264 // 1265 bEncrypted = memdescGetFlag(pMemory->pMemDesc, MEMDESC_FLAGS_ENCRYPTED); 1266 1267 // Validate the offset and limit passed in. 1268 if (offset + length > pSrcMemDesc->Size) 1269 return NV_ERR_INVALID_BASE; 1270 1271 status = intermapCreateDmaMapping(pClient, pVirtualMemory, &pDmaMappingInfo, flags); 1272 if (status != NV_OK) 1273 return status; 1274 1275 if (bBar1P2P) 1276 { 1277 DMA_BAR1P2P_MAPPING_PRARAMS params = {0}; 1278 1279 params.pVas = pVas; 1280 params.pPeerGpu = pSrcGpu; 1281 params.pPeerMemDesc = pSrcMemDesc; 1282 params.flags = flags; 1283 params.offset = offset; 1284 params.length = length; 1285 params.pDmaMappingInfo = pDmaMappingInfo; 1286 1287 status = dmaAllocBar1P2PMapping_HAL(pGpu, pDma, ¶ms); 1288 if (status != NV_OK) 1289 goto done; 1290 1291 // Adjust local variables for the BAR1 P2P mappings 1292 pSrcMemDesc = params.pMemDescOut; 1293 flags = params.flagsOut; 1294 offset = params.offsetOut; 1295 } 1296 1297 // 1298 // Determine target address space. If we're mapping fbmem from 1299 // one gpu for use by another, then we need to treat that memory as 1300 // ADDR_SYSMEM. 1301 // 1302 tgtAddressSpace = memdescGetAddressSpace(memdescGetMemDescFromGpu(pSrcMemDesc, pGpu)); 1303 if ((pSrcGpu != pGpu) && (tgtAddressSpace == ADDR_FBMEM)) 1304 { 1305 tgtAddressSpace = ADDR_SYSMEM; 1306 1307 if (gpumgrCheckIndirectPeer(pGpu, pSrcGpu)) 1308 bIsIndirectPeer = NV_TRUE; 1309 1310 // IOMMU mapping not needed for GPU P2P accesses on FB pages. 1311 bDmaMapNeeded = NV_FALSE; 1312 } 1313 1314 if ( 1315 (tgtAddressSpace == ADDR_FABRIC_MC) || 1316 (tgtAddressSpace == ADDR_FABRIC_V2)) 1317 { 1318 // IOMMU mapping not needed for GPU P2P accesses on FB pages. 1319 bDmaMapNeeded = NV_FALSE; 1320 } 1321 1322 // Different cases for vidmem & system memory/fabric memory. 1323 bIsSysmem = (tgtAddressSpace == ADDR_SYSMEM); 1324 1325 if (bIsSysmem || 1326 (tgtAddressSpace == ADDR_FABRIC_MC) || 1327 (tgtAddressSpace == ADDR_FABRIC_V2)) 1328 { 1329 // 1330 // Create a MEMORY_DESCRIPTOR describing this region of the memory 1331 // alloc in question 1332 // 1333 status = memdescCreateSubMem(&pDmaMappingInfo->pMemDesc, pSrcMemDesc, pGpu, offset, length); 1334 if (status != NV_OK) 1335 goto done; 1336 *pParams->ppMemDesc = pDmaMappingInfo->pMemDesc; 1337 1338 // 1339 // If system memory does not support compression, the virtual kind is compressible, 1340 // and being mapped into system memory fallback to using the uncompressed kind. 1341 // 1342 if (FLD_TEST_DRF(OS46, _FLAGS, _PAGE_KIND, _VIRTUAL, flags) && 1343 (tgtAddressSpace == ADDR_SYSMEM) && 1344 (!memmgrComprSupported(pMemoryManager, ADDR_SYSMEM))) 1345 { 1346 NvU32 kind = memdescGetPteKind(pMemory->pMemDesc); 1347 NvU32 updatedKind = memmgrGetUncompressedKind_HAL(pGpu, pMemoryManager, kind, 0); 1348 NvU32 dmaKind = memdescGetPteKind(pDmaMappingInfo->pMemDesc); 1349 1350 if (dmaKind != updatedKind) 1351 { 1352 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY); 1353 NV_ASSERT(memdescGetFlag(memdescGetMemDescFromGpu(pMemory->pMemDesc, pGpu), MEMDESC_FLAGS_SET_KIND)); 1354 memdescSetPteKind(memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu), updatedKind); 1355 SLI_LOOP_END; 1356 } 1357 } 1358 1359 // if GPUs are indirect peers, create TCE mappings 1360 if (bIsIndirectPeer) 1361 { 1362 // 1363 // TODO: Ideally memdescMapIommu should be called on FB memdesc with 1364 // pSrcGpu That would clearly convey that memory is owned by pSrcGpu and 1365 // we are trying to create IOMMU mappings for pGpu. This effort is being 1366 // tracked in bug 2043603 1367 // 1368 status = memdescMapIommu(pDmaMappingInfo->pMemDesc, pGpu->busInfo.iovaspaceId); 1369 if (status != NV_OK) 1370 { 1371 NV_PRINTF(LEVEL_ERROR, "DMA map pages failed for requested GPU!\n"); 1372 goto done; 1373 } 1374 } 1375 else if (bDmaMapNeeded) 1376 { 1377 status = osDmaMapPages(pGpu->pOsGpuInfo, pDmaMappingInfo->pMemDesc); 1378 if ((status != NV_OK) && (status != NV_ERR_NOT_SUPPORTED)) 1379 { 1380 NV_PRINTF(LEVEL_ERROR, "DMA map pages failed for requested GPU!\n"); 1381 goto done; 1382 } 1383 // 1384 // Some operating systems return NV_ERR_NOT_SUPPORTED. Assign NV_OK to 1385 // status since we return status from this function and NV_ERR_NOT_SUPPORTED 1386 // may be considered as failure in calling function. 1387 // 1388 status = NV_OK; 1389 bDmaMapped = NV_TRUE; 1390 } 1391 1392 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY) 1393 memdescSetFlag(memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu), 1394 MEMDESC_FLAGS_ENCRYPTED, 1395 bEncrypted); 1396 SLI_LOOP_END 1397 1398 // Monolithic CPU RM or SPLIT_VAS_MGMT 1399 if (!pMemory->bRpcAlloc || gpuIsSplitVasManagementServerClientRmEnabled(pGpu)) 1400 { 1401 pDmaMappingInfo->DmaOffset = *pDmaOffset; // in case this is 'in' 1402 1403 // allocate mapping in VirtualMemory object 1404 status = dmaAllocMap(pGpu, pDma, pVas, pVirtualMemory, pSrcMemory, pDmaMappingInfo); 1405 if (status != NV_OK) 1406 goto done; 1407 1408 status = intermapRegisterDmaMapping(pClient, pVirtualMemory, pDmaMappingInfo, pDmaMappingInfo->DmaOffset, gpuMask); 1409 if (status != NV_OK) 1410 { 1411 dmaFreeMap(pGpu, pDma, pVas, 1412 pVirtualMemory, pDmaMappingInfo, 1413 DRF_DEF(OS47, _FLAGS, _DEFER_TLB_INVALIDATION, _FALSE)); 1414 goto done; 1415 } 1416 1417 bDmaMappingRegistered = NV_TRUE; 1418 1419 // If a kernel mapping has been requested, create one 1420 if (DRF_VAL(OS46, _FLAGS, _KERNEL_MAPPING, flags) == NVOS46_FLAGS_KERNEL_MAPPING_ENABLE) 1421 { 1422 status = memdescMapOld(pDmaMappingInfo->pMemDesc, 1423 0, 1424 pDmaMappingInfo->pMemDesc->Size, 1425 NV_TRUE, NV_PROTECT_READ_WRITE, 1426 &pDmaMappingInfo->KernelVAddr[gpumgrGetSubDeviceInstanceFromGpu(gpumgrGetParentGPU(pGpu))], 1427 &pDmaMappingInfo->KernelPriv); 1428 1429 if (status != NV_OK) 1430 goto done; 1431 } 1432 1433 *pDmaOffset = pDmaMappingInfo->DmaOffset; 1434 } // !IS_VIRTUAL(pGpu) && !IS_GSP_CLIENT(pGpu) 1435 } 1436 else if (tgtAddressSpace == ADDR_FBMEM) 1437 { 1438 // 1439 // Create a MEMORY_DESCRIPTOR describing this region of the memory alloc 1440 // in question 1441 // 1442 status = memdescCreateSubMem(&pDmaMappingInfo->pMemDesc, pSrcMemDesc, pGpu, offset, length); 1443 if (status != NV_OK) 1444 goto done; 1445 *pParams->ppMemDesc = pDmaMappingInfo->pMemDesc; 1446 1447 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY); 1448 memdescSetFlag(memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu), 1449 MEMDESC_FLAGS_ENCRYPTED, 1450 bEncrypted); 1451 SLI_LOOP_END; 1452 1453 if (FLD_TEST_DRF(OS46, _FLAGS, _PAGE_KIND, _VIRTUAL, flags)) 1454 { 1455 // 1456 // Want to make sure that the virtual kind was set beforehand 1457 // 1458 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY); 1459 NV_ASSERT(memdescGetFlag(memdescGetMemDescFromGpu(pMemory->pMemDesc, pGpu), MEMDESC_FLAGS_SET_KIND)); 1460 memdescSetPteKind(memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu), 1461 memdescGetPteKind(pMemory->pMemDesc)); 1462 SLI_LOOP_END; 1463 } 1464 1465 pDmaMappingInfo->DmaOffset = *pDmaOffset; // in case this is 'in' 1466 1467 // Monolithic CPU RM or SPLIT_VAS_MGMT 1468 if (!pMemory->bRpcAlloc || gpuIsSplitVasManagementServerClientRmEnabled(pGpu)) 1469 { 1470 // allocate mapping in VirtualMemory object 1471 status = dmaAllocMap(pGpu, pDma, pVas, pVirtualMemory, pSrcMemory, pDmaMappingInfo); 1472 if (status != NV_OK) 1473 goto done; 1474 1475 *pDmaOffset = pDmaMappingInfo->DmaOffset; 1476 1477 status = intermapRegisterDmaMapping(pClient, pVirtualMemory, pDmaMappingInfo, pDmaMappingInfo->DmaOffset, gpuMask); 1478 if (status != NV_OK) 1479 { 1480 dmaFreeMap(pGpu, pDma, pVas, 1481 pVirtualMemory, pDmaMappingInfo, 1482 DRF_DEF(OS47, _FLAGS, _DEFER_TLB_INVALIDATION, _FALSE)); 1483 goto done; 1484 } 1485 1486 bDmaMappingRegistered = NV_TRUE; 1487 1488 if (DRF_VAL(OS46, _FLAGS, _KERNEL_MAPPING, flags) == NVOS46_FLAGS_KERNEL_MAPPING_ENABLE) 1489 { 1490 status = _virtmemAllocKernelMapping(pGpu, pVas, pDmaMappingInfo, offset, length, pSrcMemory); 1491 if (status != NV_OK) 1492 goto done; 1493 } 1494 1495 *pDmaOffset = pDmaMappingInfo->DmaOffset; 1496 } // if (!IS_VIRTUAL(pGpu) && !IS_GSP_CLIENT(pGpu)) 1497 } 1498 else 1499 { 1500 // unknown (or mixed vidmem+sysmem?) mem case 1501 status = NV_ERR_INVALID_OBJECT_HANDLE; 1502 goto done; 1503 } 1504 1505 if (RMCFG_CLASS_NV50_P2P && 1506 !bFlaMapping && 1507 (bBar1P2P || DRF_VAL(OS46, _FLAGS, _P2P_ENABLE, pDmaMappingInfo->Flags) == NVOS46_FLAGS_P2P_ENABLE_NOSLI)) 1508 { 1509 // 1510 // if we are on SLI and trying to map peer memory between two GPUs 1511 // on the same device, we don't rely on dynamic p2p mailbox setup. 1512 // SLI uses static p2p mailbox and hence will not have any 1513 // P2P object associated with it 1514 // 1515 if ((hBroadcastDevice == hMemoryDevice) && IsSLIEnabled(pGpu)) 1516 { 1517 goto vgpu_send_rpc; 1518 } 1519 1520 pDmaMappingInfo->bP2P = NV_TRUE; 1521 } 1522 1523 vgpu_send_rpc: 1524 1525 if (pMemory->bRpcAlloc) 1526 { 1527 NV_RM_RPC_MAP_MEMORY_DMA(pGpu, hClient, hBroadcastDevice, hVirtualMem, pMemoryRef->hResource, 1528 offset, length, flags, pDmaOffset, status); 1529 if (status != NV_OK) 1530 goto done; 1531 1532 if ((IS_VIRTUAL(pGpu) || IS_GSP_CLIENT(pGpu)) && 1533 !gpuIsSplitVasManagementServerClientRmEnabled(pGpu)) 1534 { 1535 // 1536 // vGPU doesn't understand subdevice handles. But clients map memory 1537 // with subdevice handles and we don't want that to fail on vGPU. 1538 // Currently, we just pass down the broadcast device handle to the host 1539 // (which should be equivalent if SLI is disabled). This will need to 1540 // be revisited if vGPU ever supports SLI. 1541 // 1542 NV_ASSERT(!IsSLIEnabled(pGpu)); 1543 1544 pDmaMappingInfo->DmaOffset = *pDmaOffset; 1545 1546 status = intermapRegisterDmaMapping(pClient, pVirtualMemory, pDmaMappingInfo, 1547 pDmaMappingInfo->DmaOffset, gpuMask); 1548 if (status != NV_OK) 1549 goto done; 1550 1551 bDmaMappingRegistered = NV_TRUE; 1552 1553 if (tgtAddressSpace == ADDR_SYSMEM) 1554 { 1555 // If a kernel mapping has been requested, create one 1556 if (DRF_VAL(OS46, _FLAGS, _KERNEL_MAPPING, flags) == NVOS46_FLAGS_KERNEL_MAPPING_ENABLE) 1557 { 1558 status = memdescMapOld(pDmaMappingInfo->pMemDesc, 1559 0, 1560 pDmaMappingInfo->pMemDesc->Size, 1561 NV_TRUE, NV_PROTECT_READ_WRITE, 1562 &pDmaMappingInfo->KernelVAddr[gpumgrGetSubDeviceInstanceFromGpu(gpumgrGetParentGPU(pGpu))], 1563 &pDmaMappingInfo->KernelPriv); 1564 if (status != NV_OK) 1565 goto done; 1566 } 1567 } 1568 } 1569 } 1570 1571 done: 1572 if (status != NV_OK) 1573 { 1574 if (pDmaMappingInfo != NULL) 1575 { 1576 if ((pDmaMappingInfo->pMemDesc != NULL) && 1577 FLD_TEST_DRF(OS46, _FLAGS, _KERNEL_MAPPING, _ENABLE, flags)) 1578 { 1579 // 1580 // if Kernel cookie exists and mapping is in sysmem, free sysmem mapping 1581 // for ADDR_FBMEM function determines whether mapping was created itself 1582 // 1583 if ((pDmaMappingInfo->KernelPriv != NULL) && 1584 (memdescGetAddressSpace(pDmaMappingInfo->pMemDesc) == ADDR_SYSMEM)) 1585 { 1586 memdescUnmapOld(pDmaMappingInfo->pMemDesc, NV_TRUE, 0, 1587 pDmaMappingInfo->KernelVAddr[gpumgrGetSubDeviceInstanceFromGpu(gpumgrGetParentGPU(pGpu))], 1588 pDmaMappingInfo->KernelPriv); 1589 pDmaMappingInfo->KernelPriv = NULL; 1590 } 1591 else if (memdescGetAddressSpace(pDmaMappingInfo->pMemDesc) == ADDR_FBMEM) 1592 { 1593 _virtmemFreeKernelMapping(pGpu, pDmaMappingInfo); 1594 } 1595 } 1596 1597 if (pDmaMappingInfo->pMemDesc != NULL) 1598 { 1599 NV_STATUS status; 1600 1601 if (bIsIndirectPeer) 1602 { 1603 memdescUnmapIommu(pDmaMappingInfo->pMemDesc, pGpu->busInfo.iovaspaceId); 1604 } 1605 else if (bDmaMapped) 1606 { 1607 // Unmap the DMA mapped pages in failure case if any. 1608 status = osDmaUnmapPages(pGpu->pOsGpuInfo, pDmaMappingInfo->pMemDesc); 1609 if (!(status == NV_OK || status == NV_ERR_NOT_SUPPORTED)) 1610 { 1611 NV_PRINTF(LEVEL_ERROR, "DMA unmap pages failed for requested GPU!\n"); 1612 } 1613 } 1614 } 1615 1616 dmaFreeBar1P2PMapping_HAL(pDma, pDmaMappingInfo); 1617 1618 memdescDestroy(pDmaMappingInfo->pMemDesc); 1619 pDmaMappingInfo->pMemDesc = NULL; 1620 1621 if (bDmaMappingRegistered) 1622 { 1623 NV_ASSERT_OK(intermapDelDmaMapping(pClient, pVirtualMemory, *pDmaOffset, gpuMask)); 1624 } 1625 else 1626 { 1627 // Explicitly free the DMA mapping if mapping was not yet registered 1628 intermapFreeDmaMapping(pDmaMappingInfo); 1629 } 1630 } 1631 } 1632 1633 return status; 1634 } 1635 1636 /*! 1637 * @brief Unmap object from VirtualMemory object 1638 */ 1639 NV_STATUS 1640 virtmemUnmapFrom_IMPL 1641 ( 1642 VirtualMemory *pVirtualMemory, 1643 RS_RES_UNMAP_FROM_PARAMS *pParams 1644 ) 1645 { 1646 OBJGPU *pGpu = pParams->pGpu; 1647 Memory *pMemory = staticCast(pVirtualMemory, Memory); 1648 RsClient *pClient = RES_GET_CLIENT(pVirtualMemory); 1649 NvHandle hClient = pClient->hClient; 1650 NvHandle hMemory = pParams->hMemory; 1651 NvHandle hVirtualMem = RES_GET_HANDLE(pVirtualMemory); 1652 NvHandle hBroadcastDevice = pParams->hBroadcastDevice; 1653 NvU32 gpuMask = pParams->gpuMask; 1654 NvU64 dmaOffset = pParams->dmaOffset; 1655 OBJVASPACE *pVas = NULL; 1656 NV_STATUS status = NV_OK; 1657 NvBool bIsIndirectPeer = NV_FALSE; 1658 1659 CLI_DMA_MAPPING_INFO *pDmaMappingInfo = NULL; 1660 1661 if (hMemory != NV01_NULL_OBJECT) 1662 { 1663 RsResourceRef *pSrcMemoryRef; 1664 Memory *pMemorySrc; 1665 1666 if (clientGetResourceRef(pClient, hMemory, &pSrcMemoryRef) != NV_OK) 1667 return NV_ERR_OBJECT_NOT_FOUND; 1668 1669 status = rmresCheckMemInterUnmap(dynamicCast(pSrcMemoryRef->pResource, RmResource), pParams->bSubdeviceHandleProvided); 1670 1671 // Exit if failed or invalid class, otherwise continue on to next part 1672 if (status != NV_OK) 1673 return status; 1674 1675 pMemorySrc = dynamicCast(pSrcMemoryRef->pResource, Memory); 1676 if (pMemorySrc != NULL) 1677 { 1678 if (gpumgrCheckIndirectPeer(pMemorySrc->pGpu, pGpu)) 1679 bIsIndirectPeer = NV_TRUE; 1680 } 1681 } 1682 1683 if (pParams->bSubdeviceHandleProvided && !pVirtualMemory->bAllowUnicastMapping) 1684 { 1685 NV_PRINTF(LEVEL_ERROR, "Unicast DMA mappings into virtual memory object not supported.\n"); 1686 return NV_ERR_NOT_SUPPORTED; 1687 } 1688 1689 status = vaspaceGetByHandleOrDeviceDefault(pClient, hBroadcastDevice, pVirtualMemory->hVASpace, &pVas); 1690 if (status != NV_OK) 1691 return status; 1692 1693 // Get DMA mapping info. 1694 pDmaMappingInfo = intermapGetDmaMapping(pVirtualMemory, dmaOffset, gpuMask); 1695 NV_ASSERT_OR_RETURN(pDmaMappingInfo != NULL, NV_ERR_INVALID_OBJECT_HANDLE); 1696 1697 // 1698 // if Kernel cookie exists and mapping is in sysmem, free sysmem mapping 1699 // for ADDR_FBMEM function determines whether mapping was created itself 1700 // 1701 if ((pDmaMappingInfo->KernelPriv != NULL) && 1702 (memdescGetAddressSpace(pDmaMappingInfo->pMemDesc) == ADDR_SYSMEM)) 1703 { 1704 memdescUnmapOld(pDmaMappingInfo->pMemDesc, NV_TRUE, 0, 1705 pDmaMappingInfo->KernelVAddr[gpumgrGetSubDeviceInstanceFromGpu(gpumgrGetParentGPU(pGpu))], 1706 pDmaMappingInfo->KernelPriv); 1707 pDmaMappingInfo->KernelPriv = NULL; 1708 } 1709 else if (memdescGetAddressSpace(memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu)) == ADDR_FBMEM) 1710 { 1711 _virtmemFreeKernelMapping(pGpu, pDmaMappingInfo); 1712 } 1713 1714 // if this was peer mapped context dma, remove it from P2P object 1715 if (RMCFG_CLASS_NV50_P2P && pDmaMappingInfo->bP2P) 1716 { 1717 dmaFreeBar1P2PMapping_HAL(GPU_GET_DMA(pGpu), pDmaMappingInfo); 1718 } 1719 1720 if (!pMemory->bRpcAlloc || gpuIsSplitVasManagementServerClientRmEnabled(pGpu)) 1721 { 1722 // free mapping in context dma 1723 dmaFreeMap(pGpu, GPU_GET_DMA(pGpu), pVas, pVirtualMemory, pDmaMappingInfo, pParams->flags); 1724 1725 if ((memdescGetAddressSpace(memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu)) == ADDR_FBMEM) && 1726 bIsIndirectPeer) 1727 { 1728 memdescUnmapIommu(pDmaMappingInfo->pMemDesc, pGpu->busInfo.iovaspaceId); 1729 } 1730 else if ((memdescGetAddressSpace(memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu)) == ADDR_SYSMEM) && 1731 (pDmaMappingInfo->pMemDesc->pGpu != pGpu)) 1732 { 1733 status = osDmaUnmapPages(pGpu->pOsGpuInfo, pDmaMappingInfo->pMemDesc); 1734 if (!(status == NV_OK || status == NV_ERR_NOT_SUPPORTED)) 1735 { 1736 NV_PRINTF(LEVEL_ERROR, "DMA unmap pages failed for requested GPU!\n"); 1737 } 1738 // 1739 // Some operating systems return NV_ERR_NOT_SUPPORTED. Assign NV_OK to 1740 // status since we return status from this function and NV_ERR_NOT_SUPPORTED 1741 // may be considered as failure in calling function. 1742 // 1743 status = NV_OK; 1744 } 1745 } 1746 1747 // free memory descriptor 1748 memdescFree(pDmaMappingInfo->pMemDesc); 1749 memdescDestroy(pDmaMappingInfo->pMemDesc); 1750 pDmaMappingInfo->pMemDesc = NULL; 1751 1752 // delete client dma mapping 1753 intermapDelDmaMapping(pClient, pVirtualMemory, dmaOffset, gpuMask); 1754 1755 // 1756 // vGPU: 1757 // 1758 // Since vGPU does all real hardware management in the 1759 // host, if we are in guest OS (where IS_VIRTUAL(pGpu) is true), 1760 // do an RPC to the host to do the hardware update. 1761 // 1762 // vGPU doesn't understand subdevice handles. But clients map memory 1763 // with subdevice handles and we don't want that to fail on vGPU. 1764 // Currently, we just pass down the broadcast device handle to the host 1765 // (which should be equivalent if SLI is disabled). This will need to 1766 // be revisited if vGPU ever supports SLI. 1767 // 1768 NV_ASSERT((!IS_VIRTUAL(pGpu) && !IS_GSP_CLIENT(pGpu)) || !IsSLIEnabled(pGpu)); 1769 1770 if (pMemory->bRpcAlloc && 1771 (NV01_NULL_OBJECT != hMemory) && 1772 (resGetRefCount(staticCast(pVirtualMemory, RsResource)) || (hVirtualMem == hMemory))) 1773 { 1774 // 1775 // resGetRefCount(pMemCtx->pResource) is zero when we are here from call of 1776 // RmFree -> clientFreeResourceTree_IMPL -> clientFreeResource_IMPL -> __nvoc_objDelete 1777 // 1778 // memDestruct_IMPL-> CliDelDeviceMemory(i.e. hVirtualMem == hMemory) -> RmUnmapMemoryDma are valid calls since we 1779 // call RPC_FREE later in memDestruct_IMPL. 1780 // 1781 // ifbDestruct_IMPL-> RmUnmapMemoryDma should not RPC_UNMAP_MEMORY_DMA since RPC_FREE is invoked in call stack earlier. 1782 // 1783 NV_RM_RPC_UNMAP_MEMORY_DMA(pGpu, hClient, hBroadcastDevice, hVirtualMem, hMemory, 0, dmaOffset, status); 1784 } 1785 1786 return status; 1787 } 1788 1789 /*! 1790 * @brief return address and size of a VirtualMemory object 1791 */ 1792 void virtmemGetAddressAndSize_IMPL 1793 ( 1794 VirtualMemory *pVirtualMemory, 1795 NvU64 *pVAddr, 1796 NvU64 *pSize 1797 ) 1798 { 1799 MEMORY_DESCRIPTOR *pMemDesc = staticCast(pVirtualMemory, Memory)->pMemDesc; 1800 1801 *pVAddr = memdescGetPhysAddr(pMemDesc, AT_GPU_VA, 0); 1802 *pSize = memdescGetSize(pMemDesc); 1803 } 1804