1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "mem_mgr/virtual_mem.h"
25 #include "mem_mgr/vaspace.h"
26 #include "gpu/mem_mgr/virt_mem_allocator.h"
27 #include "virtualization/hypervisor/hypervisor.h"
28 #include "vgpu/rpc.h"
29 #include "gpu/mem_mgr/mem_desc.h"
30 #include "mem_mgr/mem.h"
31 #include "gpu/mem_mgr/mem_mgr.h"
32 #include "core/locks.h"
33 #include "kernel/gpu/rc/kernel_rc.h"
34 #include "gpu/device/device.h"
35 #include "Nvcm.h"
36 #include "gpu/mem_mgr/vaspace_api.h"
37 #include "gpu/mem_mgr/mem_utils.h"
38 #include "gpu/bus/kern_bus.h"
39 #include "gpu/bus/p2p_api.h"
40 #include "mem_mgr/gpu_vaspace.h"
41 #include "platform/sli/sli.h"
42
43 #include "class/cl0070.h" // NV01_MEMORY_VIRTUAL
44 #include "class/cl50a0.h" // NV50_MEMORY_VIRTUAL
45
46 static void _virtmemFreeKernelMapping(OBJGPU *, CLI_DMA_MAPPING_INFO *);
47
48 /*!
49 * _virtmemQueryVirtAllocParams
50 *
51 * @brief
52 * Queries for the actual size of VA allocation, alignment
53 * and mask of page sizes (needed for page table allocation)
54 *
55 * @param[in] pGpu OBJGPU pointer
56 * @param[in] hClient Client handle
57 * @param[in] hDevice Device handle
58 * @param[in] pAllocData Pointer to VIDHEAP_ALLOC_DATA
59 * @param[out] pAlign Alignment
60 * @param[out] pSize Size of allocation
61 * @param[out] ppVAS Virtual address space for request
62 * @param[out] pPageSizeLockMask Mask of page sizes locked during VA reservation
63 *
64 * @returns
65 * NV_OK
66 */
67 static NV_STATUS
_virtmemQueryVirtAllocParams(OBJGPU * pGpu,NvHandle hClient,NvHandle hDevice,NV_MEMORY_ALLOCATION_PARAMS * pAllocData,NvU64 * pAlign,NvU64 * pSize,OBJVASPACE ** ppVAS,NvU64 * pPageSizeLockMask)68 _virtmemQueryVirtAllocParams
69 (
70 OBJGPU *pGpu,
71 NvHandle hClient,
72 NvHandle hDevice,
73 NV_MEMORY_ALLOCATION_PARAMS *pAllocData,
74 NvU64 *pAlign,
75 NvU64 *pSize,
76 OBJVASPACE **ppVAS,
77 NvU64 *pPageSizeLockMask
78 )
79 {
80 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
81 RsClient *pClient;
82 FB_ALLOC_INFO *pFbAllocInfo = NULL;
83 FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat = NULL;
84 NV_STATUS status = NV_OK;
85 NvBool bReleaseGpuLock = NV_FALSE;
86
87 pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
88 if (pFbAllocInfo == NULL)
89 {
90 NV_ASSERT(0);
91 status = NV_ERR_NO_MEMORY;
92 goto done;
93 }
94
95 pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
96 if (pFbAllocPageFormat == NULL) {
97 NV_ASSERT(0);
98 status = NV_ERR_NO_MEMORY;
99 goto done;
100 }
101
102 portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO));
103 portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT));
104 pFbAllocInfo->pageFormat = pFbAllocPageFormat;
105
106 pFbAllocInfo->pageFormat->attr = pAllocData->attr;
107 pFbAllocInfo->pageFormat->attr2 = pAllocData->attr2;
108 pFbAllocInfo->pageFormat->flags = pAllocData->flags;
109 *pSize = pAllocData->size;
110 *pAlign = pAllocData->alignment;
111
112 // LOCK: acquire device lock
113 if (!rmDeviceGpuLockIsOwner(gpuGetInstance(pGpu)))
114 {
115 NV_ASSERT_OK_OR_GOTO(status, rmDeviceGpuLocksAcquire(pGpu, GPUS_LOCK_FLAGS_NONE,
116 RM_LOCK_MODULES_MEM_PMA), done);
117 bReleaseGpuLock = NV_TRUE;
118 }
119
120 *pPageSizeLockMask = stdmemQueryPageSize(pMemoryManager, hClient,
121 pAllocData);
122 if (*pPageSizeLockMask == 0)
123 {
124 status = NV_ERR_INVALID_STATE;
125 goto done;
126 }
127
128 NV_ASSERT_OK_OR_GOTO(status,
129 serverGetClientUnderLock(&g_resServ, hClient, &pClient),
130 done);
131
132 NV_ASSERT_OK_OR_GOTO(status,
133 vaspaceGetByHandleOrDeviceDefault(pClient, hDevice, pAllocData->hVASpace, ppVAS),
134 done);
135
136 NV_ASSERT_OK_OR_GOTO(status,
137 vaspaceApplyDefaultAlignment(*ppVAS, pFbAllocInfo, pAlign, pSize, pPageSizeLockMask),
138 done);
139
140 done:
141 if (bReleaseGpuLock)
142 {
143 // UNLOCK: release device lock
144 rmDeviceGpuLocksRelease(pGpu, GPUS_LOCK_FLAGS_NONE, NULL);
145 }
146
147 portMemFree(pFbAllocPageFormat);
148 portMemFree(pFbAllocInfo);
149
150 return status;
151 }
152
153 /*!
154 * @brief Handle copy construction for VirtualMemory object
155 */
156 static NV_STATUS
_virtmemCopyConstruct(VirtualMemory * pDstVirtualMemory,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)157 _virtmemCopyConstruct
158 (
159 VirtualMemory *pDstVirtualMemory,
160 CALL_CONTEXT *pCallContext,
161 RS_RES_ALLOC_PARAMS_INTERNAL *pParams
162 )
163 {
164 RsClient *pDstClient = pCallContext->pClient;
165 RsClient *pSrcClient = pParams->pSrcClient;
166 RsResourceRef *pSrcRef = pParams->pSrcRef;
167 VirtualMemory *pSrcVirtualMemory = dynamicCast(pSrcRef->pResource, VirtualMemory);
168 Memory *pDstMemory = staticCast(pDstVirtualMemory, Memory);
169 Memory *pSrcMemory = staticCast(pSrcVirtualMemory, Memory);
170 OBJGPU *pSrcGpu = pSrcMemory->pGpu;
171 OBJVASPACE *pVASSrc = NULL;
172 NvBool bIncAllocRefCnt = NV_FALSE;
173
174 // Special handling for Dup of the FLA VASpace
175 if (pSrcVirtualMemory->bFlaVAS)
176 {
177 Device *pDstDevice;
178 RsClient *pFlaClient;
179 RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
180 NvHandle hImportedVASpace = NV01_NULL_OBJECT;
181 RsResourceRef *pDupedVasRef;
182
183 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
184 serverGetClientUnderLock(&g_resServ, GPU_GET_KERNEL_BUS(pSrcGpu)->flaInfo.hClient, &pFlaClient));
185
186 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
187 vaspaceGetByHandleOrDeviceDefault(pFlaClient,
188 RES_GET_HANDLE(pSrcMemory->pDevice),
189 GPU_GET_KERNEL_BUS(pSrcGpu)->flaInfo.hFlaVASpace,
190 &pVASSrc));
191
192 //
193 // FLA Memory can be duped during import stage and the importing client which might not be the
194 // same as exporting client. Also the importing client might not also bind to the the exporting FLA
195 // VASpace on the exporting device. In that case, we might see leaks in the exporting FLA VASpace.
196 // To avoid those scenarios, we are duping the FLA VAS to the importing client under the exporting device.
197 // RS-TODO: Bug 3059751 to track the duped VAS as dependant in ResServer
198 //
199 NV_ASSERT_OK_OR_RETURN(deviceGetByGpu(pDstClient, pSrcGpu, NV_TRUE, &pDstDevice));
200
201 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
202 pRmApi->DupObject(pRmApi,
203 pDstClient->hClient,
204 RES_GET_HANDLE(pDstDevice),
205 &hImportedVASpace,
206 GPU_GET_KERNEL_BUS(pSrcGpu)->flaInfo.hClient,
207 GPU_GET_KERNEL_BUS(pSrcGpu)->flaInfo.hFlaVASpace,
208 0));
209
210 if (clientGetResourceRef(pDstClient, hImportedVASpace, &pDupedVasRef) == NV_OK)
211 refAddDependant(pDupedVasRef, RES_GET_REF(pDstVirtualMemory));
212
213 pDstVirtualMemory->hVASpace = hImportedVASpace;
214
215 // Increase refcount if locally managed
216 bIncAllocRefCnt = !pSrcMemory->bRpcAlloc;
217 }
218 else if (pSrcVirtualMemory->hVASpace == NV_MEMORY_VIRTUAL_SYSMEM_DYNAMIC_HVASPACE)
219 {
220 // A legacy sysmem dynamic object does not have valid hVASpace
221 pDstVirtualMemory->hVASpace = NV_MEMORY_VIRTUAL_SYSMEM_DYNAMIC_HVASPACE;
222
223 // No VASPACE is update
224 bIncAllocRefCnt = NV_FALSE;
225 }
226 else
227 {
228 OBJVASPACE *pVASDst = NULL;
229
230 NV_CHECK_OK_OR_RETURN(LEVEL_SILENT,
231 vaspaceGetByHandleOrDeviceDefault(pSrcClient,
232 RES_GET_HANDLE(pSrcMemory->pDevice),
233 pSrcVirtualMemory->hVASpace, &pVASSrc));
234 NV_CHECK_OK_OR_RETURN(LEVEL_SILENT,
235 vaspaceGetByHandleOrDeviceDefault(pDstClient,
236 RES_GET_HANDLE(pDstMemory->pDevice),
237 NV01_NULL_OBJECT, &pVASDst));
238 if (pVASSrc != pVASDst)
239 {
240 return NV_ERR_INVALID_DEVICE;
241 }
242
243 pDstVirtualMemory->hVASpace = NV01_NULL_OBJECT;
244
245 // Increase refcount for locally managed NV50_MEMORY_VIRTUAL
246 bIncAllocRefCnt = pSrcVirtualMemory->bReserveVaOnAlloc && !pSrcMemory->bRpcAlloc;
247 }
248
249 pDstVirtualMemory->bAllowUnicastMapping = pSrcVirtualMemory->bAllowUnicastMapping;
250 pDstVirtualMemory->bReserveVaOnAlloc = pSrcVirtualMemory->bReserveVaOnAlloc;
251 pDstVirtualMemory->bFlaVAS = pSrcVirtualMemory->bFlaVAS;
252
253 // Mappings do not follow virtual memory object
254 pDstVirtualMemory->pDmaMappingList = NULL;
255
256 if (bIncAllocRefCnt)
257 {
258 NvU64 vaddr;
259 NvU64 size;
260
261 virtmemGetAddressAndSize(pSrcVirtualMemory, &vaddr, &size);
262 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
263 vaspaceIncAllocRefCnt(pVASSrc, vaddr));
264 }
265
266 return NV_OK;
267 }
268
269 /*!
270 * virtmemConstruct
271 *
272 * @brief
273 * This routine provides common allocation services used by the
274 * following heap allocation functions:
275 * NVOS32_FUNCTION_ALLOC_SIZE
276 * NVOS32_FUNCTION_ALLOC_SIZE_RANGE
277 * NVOS32_FUNCTION_ALLOC_TILED_PITCH_HEIGHT
278 *
279 * @param[in] pVirtualMemory Pointer to VirtualMemory object
280 * @param[in] pCallContext Pointer to the current CALL_CONTEXT.
281 * @param[in] pParams Pointer to the alloc params
282 *
283 * @return 'NV_OK'
284 * Operation completed successfully.
285 * @return 'NV_ERR_NO_MEMORY'
286 * There is not enough available memory to satisfy allocation request.
287 * @return 'NV_ERR_INSUFFICIENT_RESOURCES'
288 * Not enough available resources to satisfy allocation request.
289 */
290 NV_STATUS
virtmemConstruct_IMPL(VirtualMemory * pVirtualMemory,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)291 virtmemConstruct_IMPL
292 (
293 VirtualMemory *pVirtualMemory,
294 CALL_CONTEXT *pCallContext,
295 RS_RES_ALLOC_PARAMS_INTERNAL *pParams
296 )
297 {
298 Memory *pMemory = staticCast(pVirtualMemory, Memory);
299 NV_MEMORY_ALLOCATION_PARAMS *pAllocData = pParams->pAllocParams;
300 MEMORY_ALLOCATION_REQUEST allocRequest = {0};
301 MEMORY_ALLOCATION_REQUEST *pAllocRequest = &allocRequest;
302 OBJGPU *pGpu = pMemory->pGpu;
303 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
304 OBJVASPACE *pVAS = NULL;
305 HWRESOURCE_INFO hwResource;
306 RsClient *pRsClient = pCallContext->pClient;
307 RsResourceRef *pResourceRef = pCallContext->pResourceRef;
308 RsResourceRef *pVASpaceRef = NULL;
309 NvU32 gpuCacheAttrib;
310 NV_STATUS status = NV_OK;
311 NvHandle hClient = pCallContext->pClient->hClient;
312 NvHandle hParent = pCallContext->pResourceRef->pParentRef->hResource;
313 NvU64 sizeOut;
314 NvU64 offsetOut;
315 NvBool bLockAcquired = NV_FALSE;
316 NvU32 attr = 0;
317 NvU32 attr2 = 0;
318 NvBool bRpcAlloc = NV_FALSE;
319 NvBool bResAllocated = NV_FALSE;
320 NvU32 gpuMask = 0;
321 FB_ALLOC_INFO *pFbAllocInfo = NULL;
322 FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat = NULL;
323
324 // Bulk of copy-construction is done by Memory class. Handle our members.
325 if (RS_IS_COPY_CTOR(pParams))
326 {
327 NV_ASSERT_OK_OR_RETURN(rmGpuGroupLockAcquire(pGpu->gpuInstance,
328 GPU_LOCK_GRP_ALL,
329 GPU_LOCK_FLAGS_SAFE_LOCK_UPGRADE,
330 RM_LOCK_MODULES_MEM,
331 &gpuMask));
332
333 status = _virtmemCopyConstruct(pVirtualMemory, pCallContext, pParams);
334
335 rmGpuGroupLockRelease(gpuMask, GPUS_LOCK_FLAGS_NONE);
336
337 goto done;
338 }
339
340 pVirtualMemory->hVASpace = RM_INVALID_VASPACE_HANDLE;
341 pVirtualMemory->bAllowUnicastMapping = NV_FALSE;
342 pVirtualMemory->bReserveVaOnAlloc = NV_FALSE;
343 pVirtualMemory->bFlaVAS = NV_FALSE;
344 pVirtualMemory->pDmaMappingList = NULL;
345
346 // NV01_MEMORY_VIRTUAL does not allocate typed memory from the heap
347 if (pParams->externalClassId == NV01_MEMORY_VIRTUAL)
348 return NV_OK;
349
350 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, stdmemValidateParams(pGpu, hClient, pAllocData));
351 NV_CHECK_OR_RETURN(LEVEL_ERROR, pAllocData->flags & NVOS32_ALLOC_FLAGS_VIRTUAL, NV_ERR_INVALID_ARGUMENT);
352
353 stdmemDumpInputAllocParams(pAllocData, pCallContext);
354
355 attr = pAllocData->attr;
356 attr2 = pAllocData->attr2;
357
358 pAllocRequest->classNum = NV50_MEMORY_VIRTUAL;
359 pAllocRequest->pUserParams = pAllocData;
360 pAllocRequest->hMemory = pResourceRef->hResource;
361 pAllocRequest->hClient = hClient;
362 pAllocRequest->hParent = hParent;
363 pAllocRequest->pGpu = pGpu;
364 pAllocRequest->internalflags = NVOS32_ALLOC_INTERNAL_FLAGS_CLIENTALLOC;
365 pAllocRequest->pHwResource = &hwResource;
366
367 // Unsure if we need to keep separate copies, but keeping old behavior for now.
368 sizeOut = pAllocData->size;
369 offsetOut = pAllocData->offset;
370
371 //
372 // Reserve memory for page tables in case of non lazy page table
373 // allocations.
374 //
375 // PageLevelMemReserve will reserve only if the PDB property for
376 // client managed page tables is set.
377 //
378 if (memmgrIsPmaInitialized(pMemoryManager) &&
379 !(pAllocData->flags & NVOS32_ALLOC_FLAGS_LAZY) &&
380 !(pAllocData->flags & NVOS32_ALLOC_FLAGS_EXTERNALLY_MANAGED))
381 {
382 NvU64 size;
383 NvU64 align;
384 NvU64 pageSizeLockMask;
385 Device *pDevice;
386
387 NV_ASSERT_OK_OR_GOTO(status,
388 deviceGetByHandle(pRsClient, hParent, &pDevice),
389 done);
390
391 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
392
393 pAllocRequest->pGpu = pGpu;
394 size = 0;
395 align = 0;
396 pageSizeLockMask = 0;
397
398 status = _virtmemQueryVirtAllocParams(pGpu, hClient, hParent,
399 pAllocData, &align, &size,
400 &pVAS, &pageSizeLockMask);
401 if (NV_OK != status)
402 SLI_LOOP_GOTO(done);
403
404 status = vaspaceReserveMempool(pVAS, pGpu, pDevice,
405 size, pageSizeLockMask,
406 VASPACE_RESERVE_FLAGS_NONE);
407 if (NV_OK != status)
408 SLI_LOOP_GOTO(done);
409
410 SLI_LOOP_END;
411 }
412
413 if (RMCFG_FEATURE_RM_BASIC_LOCK_MODEL)
414 {
415 //
416 // Can't move locking up as PMA locks need to be taken first.
417 // Acquire the lock *only after* PMA is done allocating.
418 //
419 if (!rmDeviceGpuLockIsOwner(pGpu->gpuInstance) && !rmGpuLockIsOwner())
420 {
421 NV_ASSERT_OK_OR_GOTO(status,
422 rmDeviceGpuLocksAcquire(pGpu, GPUS_LOCK_FLAGS_NONE,
423 RM_LOCK_MODULES_MEM),
424 done);
425
426 bLockAcquired = NV_TRUE;
427 }
428 }
429
430 {
431 //
432 // If using thwap to generate an allocation failure here, fail the
433 // alloc right away.
434 //
435 KernelRc *pKernelRc = GPU_GET_KERNEL_RC(pGpu);
436 if (pKernelRc != NULL &&
437 !krcTestAllowAlloc(pGpu, pKernelRc,
438 NV_ROBUST_CHANNEL_ALLOCFAIL_HEAP))
439 {
440 status = NV_ERR_INSUFFICIENT_RESOURCES;
441 goto done;
442 }
443 }
444
445 // Validate virtual address space
446 NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT,
447 vaspaceGetByHandleOrDeviceDefault(pRsClient, hParent, pAllocData->hVASpace, &pVAS),
448 done);
449
450 pVirtualMemory->bFlaVAS = !!(vaspaceGetFlags(pVAS) & VASPACE_FLAGS_FLA);
451 pVirtualMemory->bOptimizePageTableMempoolUsage =
452 !!(vaspaceGetFlags(pVAS) & VASPACE_FLAGS_OPTIMIZE_PTETABLE_MEMPOOL_USAGE);
453
454 if (IS_VIRTUAL(pGpu) || IS_GSP_CLIENT(pGpu))
455 {
456 NvBool bSriovFull = IS_VIRTUAL_WITH_SRIOV(pGpu) &&
457 !gpuIsWarBug200577889SriovHeavyEnabled(pGpu);
458 NvBool bBar1VAS = !!(vaspaceGetFlags(pVAS) & VASPACE_FLAGS_BAR_BAR1);
459
460 //
461 // Skip RPC to the Host RM when local RM is managing page tables. Special case
462 // for early SR-IOV that only manages BAR1 and FLA page tables in the guest.
463 //
464 bRpcAlloc = !(gpuIsSplitVasManagementServerClientRmEnabled(pGpu) ||
465 (bSriovFull && (bBar1VAS || pVirtualMemory->bFlaVAS)));
466 }
467
468 if (bRpcAlloc)
469 {
470 NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT,
471 memdescCreate(&pAllocRequest->pMemDesc, pGpu,
472 pAllocRequest->pUserParams->size, 0, NV_TRUE,
473 ADDR_VIRTUAL,
474 NV_MEMORY_UNCACHED,
475 MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE),
476 done);
477 }
478 else
479 {
480 pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
481 NV_ASSERT_TRUE_OR_GOTO(status, pFbAllocInfo != NULL, NV_ERR_NO_MEMORY, done);
482
483 pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
484 NV_ASSERT_TRUE_OR_GOTO(status, pFbAllocPageFormat != NULL, NV_ERR_NO_MEMORY, done);
485
486 portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO));
487 portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT));
488 pFbAllocInfo->pageFormat = pFbAllocPageFormat;
489
490 memUtilsInitFBAllocInfo(pAllocRequest->pUserParams, pFbAllocInfo, hClient, hParent);
491
492 // Call memmgr to get memory.
493 NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT,
494 memmgrAllocResources(pGpu, pMemoryManager, pAllocRequest, pFbAllocInfo),
495 done);
496
497 NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT,
498 virtmemAllocResources(pGpu, pMemoryManager, pAllocRequest, pFbAllocInfo),
499 done);
500
501 bResAllocated = NV_TRUE;
502 }
503
504 NV_ASSERT(pAllocRequest->pMemDesc != NULL);
505
506 // Copy final heap size/offset back to client struct
507 //
508 // What should we return ?. System or the Device physical address.
509 // Return the Device physical address for now.
510 // May change with the heap refactoring !.
511 //
512 // System and Device physical address can be got using the nv0041CtrlCmdGetSurfacePhysAttr ctrl call
513 offsetOut = memdescGetPhysAddr(pAllocRequest->pMemDesc, AT_GPU, 0);
514 sizeOut = pAllocRequest->pMemDesc->Size;
515 pAllocData->limit = sizeOut - 1;
516
517 // To handle < nv50
518 if (DRF_VAL(OS32, _ATTR2, _GPU_CACHEABLE, pAllocData->attr2) ==
519 NVOS32_ATTR2_GPU_CACHEABLE_DEFAULT)
520 {
521 pAllocData->attr2 = FLD_SET_DRF(OS32, _ATTR2, _GPU_CACHEABLE, _NO,
522 pAllocData->attr2);
523 }
524
525 if (DRF_VAL(OS32, _ATTR2, _GPU_CACHEABLE, pAllocData->attr2) ==
526 NVOS32_ATTR2_GPU_CACHEABLE_YES)
527 {
528 gpuCacheAttrib = NV_MEMORY_CACHED;
529 }
530 else
531 {
532 gpuCacheAttrib = NV_MEMORY_UNCACHED;
533 }
534
535 //
536 // Issue RPC if page tables are managed in the Host/GSP RM. This depends on
537 // the type object we have and the VGPU/GSP mode. We issue this prior to
538 // as memConstructCommon as RPC fills in pAllocData->offset.
539 //
540 if (bRpcAlloc)
541 {
542 NV_RM_RPC_ALLOC_VIRTMEM(pGpu,
543 hClient,
544 hParent,
545 pAllocData->hVASpace,
546 pAllocRequest->hMemory,
547 &pAllocData->offset,
548 pAllocRequest->pMemDesc->Size,
549 attr,
550 attr2,
551 pAllocData->type,
552 pAllocData->flags,
553 pAllocData->height,
554 pAllocData->width,
555 pAllocData->format,
556 pAllocData->comprCovg,
557 pAllocData->zcullCovg,
558 pAllocData->rangeLo,
559 pAllocData->rangeHi,
560 pAllocData->alignment,
561 status);
562 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, status, done);
563
564 // Update memory descriptor with results of the RPC
565 memdescDescribe(pAllocRequest->pMemDesc,
566 memdescGetAddressSpace(pAllocRequest->pMemDesc),
567 pAllocData->offset,
568 pAllocRequest->pMemDesc->Size);
569
570 // Assign offset back to caller
571 offsetOut = memdescGetPhysAddr(pAllocRequest->pMemDesc, AT_GPU, 0);
572 }
573
574 //
575 // The idea is to allocate virtual address space and record it (lo, limit) in this mem
576 // object. Later call MapMemoryDma(hThisMem, hSomePhysMem) to back it.
577 //
578 NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT,
579 memConstructCommon(pMemory, pAllocRequest->classNum, pAllocData->flags,
580 pAllocRequest->pMemDesc, pAllocData->owner, NULL, pAllocData->attr,
581 pAllocData->attr2, 0, pAllocData->type, NVOS32_MEM_TAG_NONE, NULL),
582 done);
583 pMemory->bRpcAlloc = bRpcAlloc;
584
585 pVirtualMemory->hVASpace = pAllocData->hVASpace;
586 pVirtualMemory->bReserveVaOnAlloc = NV_TRUE;
587
588 if (pAllocData->hVASpace != NV01_NULL_OBJECT)
589 {
590 NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT,
591 clientGetResourceRef(pRsClient, pAllocData->hVASpace, &pVASpaceRef),
592 done);
593 if (pVASpaceRef != NULL)
594 refAddDependant(pVASpaceRef, pResourceRef);
595 }
596
597 NV_ASSERT(pMemory->pMemDesc);
598 NV_ASSERT(memdescGetAddressSpace(pMemory->pMemDesc) == ADDR_VIRTUAL);
599 memdescSetGpuCacheAttrib(pMemory->pMemDesc, gpuCacheAttrib);
600
601 pAllocData->size = sizeOut;
602 pAllocData->offset = offsetOut;
603
604 stdmemDumpOutputAllocParams(pAllocData);
605
606 done:
607 if (status != NV_OK)
608 {
609 if (pAllocRequest->pMemDesc != NULL)
610 {
611 if (pMemory->pMemDesc != NULL)
612 {
613 memDestructCommon(pMemory);
614 pMemory->pMemDesc = NULL;
615 }
616
617 if (bResAllocated)
618 {
619 memmgrFree(pGpu, pMemoryManager, NULL,
620 hClient, hParent, pAllocData->hVASpace,
621 pAllocData->owner,
622 pAllocRequest->pMemDesc);
623 }
624
625 if (bRpcAlloc)
626 {
627 memdescDestroy(pAllocRequest->pMemDesc);
628 }
629 }
630 // vaspaceReserveMempool allocations are clean up is managed independently
631 }
632
633 portMemFree(pFbAllocPageFormat);
634 portMemFree(pFbAllocInfo);
635
636 if (bLockAcquired)
637 {
638 // UNLOCK: release GPUs lock
639 rmDeviceGpuLocksRelease(pGpu, GPUS_LOCK_FLAGS_NONE, NULL);
640 }
641
642 return status;
643 }
644
645 /*!
646 * @brief Handle destruction of VirtualMemory specific fields
647 */
648 void
virtmemDestruct_IMPL(VirtualMemory * pVirtualMemory)649 virtmemDestruct_IMPL
650 (
651 VirtualMemory *pVirtualMemory
652 )
653 {
654 Memory *pMemory = staticCast(pVirtualMemory, Memory);
655 OBJGPU *pGpu = pMemory->pGpu;
656 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
657 NvHandle hClient;
658 NvHandle hParent;
659 NvHandle hVASpace;
660 MEMORY_DESCRIPTOR *pMemDesc;
661 NvU32 heapOwner;
662 NV_STATUS status = NV_OK;
663
664 // Save needed state from memory object before common destruction
665 hClient = RES_GET_CLIENT_HANDLE(pVirtualMemory);
666 hParent = RES_GET_PARENT_HANDLE(pVirtualMemory);
667 hVASpace = pVirtualMemory->hVASpace;
668 pMemDesc = pMemory->pMemDesc;
669 heapOwner = pMemory->HeapOwner;
670
671 NV_ASSERT(pMemDesc);
672
673 memDestructCommon(pMemory);
674
675 //
676 // NV50_MEMORY_VIRTUAL may have underlying heap allocation associated with the object
677 // to free depending on which RM/VGPU context we are in. This is tracked at object
678 // creation time.
679 //
680 // If we RPCed a NV50_MEMORY_VIRTUAL or we have a NV01_MEMORY_VIRTUAL than just destroy
681 // the memdesc and RPC the free if required.
682 //
683 if (pMemory->bRpcAlloc || pMemory->categoryClassId == NV01_MEMORY_VIRTUAL)
684 {
685 NV_ASSERT(pMemDesc->Allocated == 0);
686 memdescDestroy(pMemDesc);
687 }
688 else
689 {
690 NV_ASSERT(heapOwner != 0);
691
692 // Get the relevant information from the client memory info and free it
693 status = memmgrFree(pGpu,
694 pMemoryManager,
695 NULL,
696 hClient,
697 hParent,
698 hVASpace,
699 heapOwner,
700 pMemDesc);
701 if (status != NV_OK)
702 {
703 NV_PRINTF(LEVEL_ERROR,
704 "VirtualMemory memmgrFree failed, client: %x, hVASpace: %x, gpu: %x\n",
705 RES_GET_CLIENT_HANDLE(pVirtualMemory),
706 hVASpace,
707 pGpu->gpuInstance);
708 }
709 }
710 }
711
712 NV_STATUS
virtmemAllocResources(OBJGPU * pGpu,MemoryManager * pMemoryManager,MEMORY_ALLOCATION_REQUEST * pAllocRequest,FB_ALLOC_INFO * pFbAllocInfo)713 virtmemAllocResources
714 (
715 OBJGPU *pGpu,
716 MemoryManager *pMemoryManager,
717 MEMORY_ALLOCATION_REQUEST *pAllocRequest,
718 FB_ALLOC_INFO *pFbAllocInfo
719 )
720 {
721 NV_STATUS status = NV_OK;
722 MEMORY_DESCRIPTOR *pMemDesc = NULL;
723 RsClient *pRsClient = NULL;
724 NV_MEMORY_ALLOCATION_PARAMS *pVidHeapAlloc = pAllocRequest->pUserParams;
725 NvHandle hVASpace = pVidHeapAlloc->hVASpace;
726 NvBool bAllocedMemDesc = NV_FALSE;
727 NvBool bBar1VA = NV_FALSE;
728 NvBool bFlaVA = NV_FALSE;
729
730 NV_ASSERT(!(pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_WPR1) && !(pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_WPR2));
731
732 NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, memUtilsAllocMemDesc(pGpu, pAllocRequest, pFbAllocInfo, &pMemDesc, NULL,
733 ADDR_VIRTUAL, NV_TRUE, &bAllocedMemDesc), failed);
734
735 // Only a kernel client can request for a protected allocation
736 if (pFbAllocInfo->pageFormat->flags & NVOS32_ALLOC_FLAGS_ALLOCATE_KERNEL_PRIVILEGED)
737 {
738 CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
739 RS_PRIV_LEVEL privLevel;
740
741 //
742 // This fn has usescases where call context is unavailable.
743 // In those cases, fall back to cached privileges.
744 //
745 if (pCallContext == NULL)
746 {
747 privLevel = rmclientGetCachedPrivilegeByHandle(pFbAllocInfo->hClient);
748 }
749 else
750 {
751 privLevel = pCallContext->secInfo.privLevel;
752 }
753
754 if (
755 (privLevel >= RS_PRIV_LEVEL_KERNEL))
756 {
757 pFbAllocInfo->bIsKernelAlloc = NV_TRUE;
758 }
759 else
760 {
761 NV_PRINTF(LEVEL_ERROR, "NV_ERR_INSUFFICIENT_PERMISSIONS\n");
762 status = NV_ERR_INSUFFICIENT_PERMISSIONS;
763 goto failed;
764 }
765 }
766
767 // Allocate a virtual surface
768 if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE)
769 pFbAllocInfo->offset = pVidHeapAlloc->offset - pFbAllocInfo->alignPad;
770
771 //
772 // pFbAllocInfo->hClient=0 is sometimes passed and not always needed,
773 // do not immediately fail if this call, only if the client needs to be used.
774 //
775 status = serverGetClientUnderLock(&g_resServ, pFbAllocInfo->hClient, &pRsClient);
776
777 //
778 // vGPU:
779 //
780 // Since vGPU does all real hardware management in the
781 // host, if we are in guest OS (where IS_VIRTUAL(pGpu) is true),
782 // do an RPC to the host to do the hardware update.
783 // In case of SR-IOV, the VAS is managed by the guest. So, no need
784 // to communicate with the host for VA allocation.
785 //
786 if (IS_VIRTUAL_WITH_SRIOV(pGpu))
787 {
788 OBJVASPACE *pVAS = NULL;
789
790 // Only try this if GetClient succeeded, else pass through the status from its fail.
791 if (pRsClient != NULL)
792 status = vaspaceGetByHandleOrDeviceDefault(pRsClient, pFbAllocInfo->hDevice, hVASpace, &pVAS);
793 if (NV_OK != status)
794 goto failed;
795
796 bBar1VA = !!(vaspaceGetFlags(pVAS) & VASPACE_FLAGS_BAR_BAR1);
797 bFlaVA = !!(vaspaceGetFlags(pVAS) & VASPACE_FLAGS_FLA);
798 }
799
800 // For Virtual FLA allocations, we don't have to RPC
801 if ((!IS_VIRTUAL(pGpu) && !IS_GSP_CLIENT(pGpu)) ||
802 bBar1VA || bFlaVA ||
803 gpuIsSplitVasManagementServerClientRmEnabled(pGpu))
804 {
805 OBJVASPACE *pVAS = NULL;
806 OBJGVASPACE *pGVAS = NULL;
807 NvU64 align = pFbAllocInfo->align + 1;
808 VAS_ALLOC_FLAGS flags = {0};
809 NvU64 pageSizeLockMask = 0;
810 pFbAllocInfo->internalflags = pAllocRequest->internalflags;
811
812 // Only try this if GetClient succeeded, else pass through the status from its fail.
813 if (pRsClient != NULL)
814 status = vaspaceGetByHandleOrDeviceDefault(pRsClient, pFbAllocInfo->hDevice, hVASpace, &pVAS);
815 if (NV_OK != status)
816 goto failed;
817
818 //
819 // Feature requested for RM unlinked SLI:
820 // Clients can pass an allocation flag to the device or VA space constructor
821 // so that mappings and allocations will fail without an explicit address.
822 //
823 pGVAS = dynamicCast(pVAS, OBJGVASPACE);
824 if (pGVAS != NULL)
825 {
826 if ((pGVAS->flags & VASPACE_FLAGS_REQUIRE_FIXED_OFFSET) &&
827 !(pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE))
828 {
829 status = NV_ERR_INVALID_ARGUMENT;
830 NV_PRINTF(LEVEL_ERROR, "The VA space requires all allocations to specify a fixed address\n");
831 goto failed;
832 }
833 }
834
835 status = vaspaceFillAllocParams(pVAS, pFbAllocInfo,
836 &pFbAllocInfo->size, &align,
837 &pVidHeapAlloc->rangeLo, &pVidHeapAlloc->rangeHi,
838 &pageSizeLockMask, &flags);
839 if (NV_OK != status)
840 {
841 NV_PRINTF(LEVEL_ERROR, "FillAllocParams failed.\n");
842 DBG_BREAKPOINT();
843 }
844 else
845 {
846 status = vaspaceAlloc(pVAS, pFbAllocInfo->size, align,
847 pVidHeapAlloc->rangeLo, pVidHeapAlloc->rangeHi,
848 pageSizeLockMask, flags, &pFbAllocInfo->offset);
849 if (NV_OK != status)
850 {
851 NV_PRINTF(LEVEL_ERROR,
852 "VA Space alloc failed! Status Code: 0x%x Size: 0x%llx RangeLo: 0x%llx,"
853 " RangeHi: 0x%llx, pageSzLockMask: 0x%llx\n",
854 status, pFbAllocInfo->size,
855 pVidHeapAlloc->rangeLo, pVidHeapAlloc->rangeHi,
856 pageSizeLockMask);
857 status = NV_ERR_INSUFFICIENT_RESOURCES;
858 goto failed;
859 }
860
861 memdescDescribe(pMemDesc, ADDR_VIRTUAL,
862 pFbAllocInfo->offset,
863 pFbAllocInfo->size);
864
865 // Return alignment info.
866 pFbAllocInfo->align = align - 1;
867 pVidHeapAlloc->alignment = align;
868 }
869 }
870 else
871 {
872 // Possibly dead code: IS_VIRTUAL and bSplitVAs are only enabled on legacy vGPU.
873 memdescDescribe(pMemDesc, ADDR_VIRTUAL, memdescGetPte(pMemDesc, AT_GPU, 0),
874 pMemDesc->Size);
875 }
876
877 //
878 // Report default (any) page size for virtual allocations with no page size restriction.
879 // Actual page size will be determined at map time.
880 //
881 if (FLD_TEST_DRF(OS32, _ATTR, _PAGE_SIZE, _DEFAULT, pFbAllocInfo->pageFormat->attr))
882 {
883 pFbAllocInfo->retAttr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _DEFAULT, pFbAllocInfo->retAttr);
884 }
885
886 // get possibly updated surface attributes
887 pVidHeapAlloc->attr = pFbAllocInfo->retAttr;
888 pVidHeapAlloc->attr2 = pFbAllocInfo->retAttr2;
889
890 // update contiguity attribute to reflect memdesc
891 if (memdescGetContiguity(pAllocRequest->pMemDesc, AT_GPU))
892 {
893 pVidHeapAlloc->attr = FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY,
894 _CONTIGUOUS,
895 pVidHeapAlloc->attr);
896 }
897 else
898 {
899 pVidHeapAlloc->attr = FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY,
900 _NONCONTIGUOUS,
901 pVidHeapAlloc->attr);
902 }
903
904 pVidHeapAlloc->offset = pFbAllocInfo->offset;
905
906 if (pAllocRequest->pHwResource != NULL)
907 {
908 pAllocRequest->pHwResource->attr = pFbAllocInfo->retAttr;
909 pAllocRequest->pHwResource->attr2 = pFbAllocInfo->retAttr2;
910 pAllocRequest->pHwResource->hwResId = pFbAllocInfo->hwResId;
911 pAllocRequest->pHwResource->comprCovg = pFbAllocInfo->comprCovg;
912 pAllocRequest->pHwResource->ctagOffset = pFbAllocInfo->ctagOffset;
913 pAllocRequest->pHwResource->hwResId = pFbAllocInfo->hwResId;
914 }
915
916 return NV_OK;
917
918 failed:
919 memmgrFreeHwResources(pGpu, pMemoryManager, pFbAllocInfo);
920
921 if (bAllocedMemDesc)
922 {
923 memdescDestroy(pAllocRequest->pMemDesc);
924 pAllocRequest->pMemDesc = NULL;
925 }
926
927 return status;
928 }
929
930 /*
931 * @brief Interface to vaspaceReserveMempool to reserve PMA memory for page tables
932 */
virtmemReserveMempool_IMPL(VirtualMemory * pVirtualMemory,OBJGPU * pGpu,Device * pDevice,NvU64 size,NvU64 pageSizeMask)933 NV_STATUS virtmemReserveMempool_IMPL
934 (
935 VirtualMemory *pVirtualMemory,
936 OBJGPU *pGpu,
937 Device *pDevice,
938 NvU64 size,
939 NvU64 pageSizeMask
940 )
941 {
942 RsClient *pClient = RES_GET_CLIENT(pVirtualMemory);
943 OBJVASPACE *pVAS = NULL;
944 NvU32 mempoolFlags = VASPACE_RESERVE_FLAGS_NONE;
945
946 //
947 // Reject mappings for a legacy NV01_MEMORY_SYSTEM_DYNAMIC
948 // object silently.
949 //
950 if (pVirtualMemory->hVASpace == NV_MEMORY_VIRTUAL_SYSMEM_DYNAMIC_HVASPACE)
951 {
952 return NV_ERR_INVALID_OBJECT;
953 }
954
955 if (pVirtualMemory->bOptimizePageTableMempoolUsage)
956 {
957 mempoolFlags = VASPACE_RESERVE_FLAGS_ALLOC_UPTO_TARGET_LEVEL_ONLY;
958 }
959
960 NV_ASSERT_OK_OR_RETURN(
961 vaspaceGetByHandleOrDeviceDefault(pClient, RES_GET_HANDLE(pDevice),
962 pVirtualMemory->hVASpace, &pVAS));
963
964 return vaspaceReserveMempool(pVAS, pGpu, pDevice,
965 size, pageSizeMask, mempoolFlags);
966 }
967
968 /*!
969 * @brief Does this VirtualMemory object use the specified hVASpace?
970 */
971 NvBool
virtmemMatchesVASpace_IMPL(VirtualMemory * pVirtualMemory,NvHandle hClient,NvHandle hVASpace)972 virtmemMatchesVASpace_IMPL
973 (
974 VirtualMemory *pVirtualMemory,
975 NvHandle hClient,
976 NvHandle hVASpace
977 )
978 {
979 return (RES_GET_CLIENT_HANDLE(pVirtualMemory) == hClient) && (pVirtualMemory->hVASpace == hVASpace);
980 }
981
982 /*!
983 * @brief Helper to look up a VirtualMemory object
984 */
985 NV_STATUS
virtmemGetByHandleAndDevice_IMPL(RsClient * pClient,NvHandle hMemory,NvHandle hDevice,VirtualMemory ** ppVirtualMemory)986 virtmemGetByHandleAndDevice_IMPL
987 (
988 RsClient *pClient,
989 NvHandle hMemory,
990 NvHandle hDevice,
991 VirtualMemory **ppVirtualMemory
992 )
993 {
994 Memory *pMemory;
995
996 NV_CHECK_OK_OR_RETURN(LEVEL_SILENT,
997 memGetByHandleAndDevice(pClient, hMemory, hDevice, &pMemory));
998
999 *ppVirtualMemory = dynamicCast(pMemory, VirtualMemory);
1000
1001 return (*ppVirtualMemory != NULL) ? NV_OK : NV_ERR_INVALID_OBJECT_HANDLE;
1002 }
1003
1004 /*!
1005 * @brief Create a CPU mapping in addition to the DMA mapping
1006 */
1007 static NV_STATUS
_virtmemAllocKernelMapping(OBJGPU * pGpu,OBJVASPACE * pVAS,CLI_DMA_MAPPING_INFO * pDmaMappingInfo,NvU64 offset,NvU64 size,Memory * pMemoryInfo)1008 _virtmemAllocKernelMapping
1009 (
1010 OBJGPU *pGpu,
1011 OBJVASPACE *pVAS,
1012 CLI_DMA_MAPPING_INFO *pDmaMappingInfo,
1013 NvU64 offset,
1014 NvU64 size,
1015 Memory *pMemoryInfo
1016 )
1017 {
1018 NV_STATUS status = NV_OK;
1019 NvBool bCoherentCpuMapping = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING);
1020 NvU32 gpuSubDevInst;
1021 RmPhysAddr bar1PhysAddr;
1022
1023 SLI_LOOP_START(SLI_LOOP_FLAGS_NONE)
1024
1025 gpuSubDevInst = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
1026 if (bCoherentCpuMapping)
1027 {
1028 // Use a temp pointer to prevent overwriting the previous pointer by accident
1029 NvP64 tempCpuPtr = NvP64_NULL;
1030 MEMORY_DESCRIPTOR *pMemDesc = memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu);
1031 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1032
1033 NV_PRINTF(LEVEL_INFO,
1034 "Allocating coherent link mapping. length=%lld, memDesc->size=%lld\n",
1035 size, pDmaMappingInfo->pMemDesc->Size);
1036
1037 NV_ASSERT(pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED));
1038 NV_ASSERT(pDmaMappingInfo->pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS);
1039
1040 tempCpuPtr = kbusMapCoherentCpuMapping_HAL(pGpu, pKernelBus, pMemDesc);
1041 if (tempCpuPtr == NULL)
1042 {
1043 status = NV_ERR_GENERIC;
1044 }
1045 else
1046 {
1047 status = NV_OK;
1048 tempCpuPtr = NvP64_PLUS_OFFSET(tempCpuPtr, offset);
1049 }
1050
1051 pDmaMappingInfo->KernelVAddr[gpuSubDevInst] = NvP64_VALUE(tempCpuPtr);
1052 }
1053 else
1054 {
1055 //
1056 // Allocate GPU virtual address space for the video memory region
1057 // for those GPUs that support it.
1058 //
1059 pDmaMappingInfo->FbApertureLen[gpuSubDevInst] = pDmaMappingInfo->pMemDesc->Size;
1060 if (RMCFG_FEATURE_PLATFORM_GSP)
1061 {
1062 status = osMapSystemMemory(pMemoryInfo->pMemDesc,
1063 offset,
1064 pDmaMappingInfo->pMemDesc->Size,
1065 NV_TRUE /*Kernel*/,
1066 NV_PROTECT_READ_WRITE,
1067 (NvP64 *) &pDmaMappingInfo->KernelVAddr[gpuSubDevInst],
1068 (NvP64 *) &pDmaMappingInfo->KernelPriv);
1069
1070 if (status != NV_OK)
1071 {
1072 pDmaMappingInfo->FbApertureLen[gpuSubDevInst] = 0;
1073 pDmaMappingInfo->FbAperture[gpuSubDevInst] = 0;
1074 pDmaMappingInfo->KernelPriv = 0;
1075 SLI_LOOP_BREAK;
1076 }
1077 }
1078 else
1079 {
1080 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1081 Device *pDevice = NULL;
1082 CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
1083 if ((pCallContext != NULL) && (pCallContext->pClient != NULL))
1084 {
1085 RsResourceRef *pDeviceRef = NULL;
1086
1087 status = refFindAncestorOfType(pCallContext->pResourceRef,
1088 classId(Device), &pDeviceRef);
1089 if (status == NV_OK)
1090 {
1091 pDevice = dynamicCast(pDeviceRef->pResource, Device);
1092 }
1093 }
1094
1095 status = kbusMapFbAperture_HAL(pGpu, pKernelBus,
1096 pMemoryInfo->pMemDesc, offset,
1097 &pDmaMappingInfo->FbAperture[gpuSubDevInst],
1098 &pDmaMappingInfo->FbApertureLen[gpuSubDevInst],
1099 BUS_MAP_FB_FLAGS_MAP_UNICAST, pDevice);
1100
1101 if (status != NV_OK)
1102 {
1103 pDmaMappingInfo->FbApertureLen[gpuSubDevInst] = 0;
1104 pDmaMappingInfo->FbAperture[gpuSubDevInst] = 0;
1105 SLI_LOOP_BREAK;
1106 }
1107
1108 bar1PhysAddr = gpumgrGetGpuPhysFbAddr(pGpu) + pDmaMappingInfo->FbAperture[gpuSubDevInst];
1109 status = osMapPciMemoryKernelOld(pGpu, bar1PhysAddr,
1110 pDmaMappingInfo->pMemDesc->Size,
1111 NV_PROTECT_READ_WRITE,
1112 &pDmaMappingInfo->KernelVAddr[gpuSubDevInst],
1113 NV_MEMORY_WRITECOMBINED);
1114 }
1115 }
1116
1117 if (status != NV_OK)
1118 {
1119 SLI_LOOP_BREAK;
1120 }
1121
1122 SLI_LOOP_END
1123
1124 if (status != NV_OK)
1125 {
1126 _virtmemFreeKernelMapping(pGpu, pDmaMappingInfo);
1127 }
1128
1129 return status;
1130 }
1131 /*!
1132 * @brief Free CPU mapping
1133 */
1134 static void
_virtmemFreeKernelMapping(OBJGPU * pGpu,CLI_DMA_MAPPING_INFO * pDmaMappingInfo)1135 _virtmemFreeKernelMapping
1136 (
1137 OBJGPU *pGpu,
1138 CLI_DMA_MAPPING_INFO *pDmaMappingInfo
1139 )
1140 {
1141 NvU32 gpuSubDevInst;
1142 NvBool bCoherentCpuMapping = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING);
1143
1144 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
1145
1146 gpuSubDevInst = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
1147
1148 // Unmap a kernel CPU mapping if one exists
1149 if (pDmaMappingInfo->KernelVAddr[gpuSubDevInst] != NULL)
1150 {
1151 if (bCoherentCpuMapping)
1152 {
1153 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1154 MEMORY_DESCRIPTOR *pMemDesc = memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu);
1155 kbusUnmapCoherentCpuMapping_HAL(pGpu, pKernelBus, pMemDesc);
1156 }
1157 else
1158 {
1159 osUnmapPciMemoryKernelOld(pGpu, pDmaMappingInfo->KernelVAddr[gpuSubDevInst]);
1160 }
1161
1162 pDmaMappingInfo->KernelVAddr[gpuSubDevInst] = NULL;
1163 }
1164
1165 // Unmap the FB aperture mapping if one exists
1166 if ((pDmaMappingInfo->FbApertureLen[gpuSubDevInst]) && (!bCoherentCpuMapping))
1167 {
1168 if (RMCFG_FEATURE_PLATFORM_GSP)
1169 {
1170 // This is a no-op in GSP, but document it here as code in case it changes.
1171 osUnmapSystemMemory(pDmaMappingInfo->pMemDesc,
1172 NV_TRUE /*Kernel*/,
1173 0 /*ProcessId*/,
1174 (NvP64)pDmaMappingInfo->FbAperture[gpuSubDevInst],
1175 NV_PTR_TO_NvP64(pDmaMappingInfo->KernelPriv));
1176 }
1177 else
1178 {
1179 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1180 kbusUnmapFbAperture_HAL(pGpu,
1181 pKernelBus,
1182 pDmaMappingInfo->pMemDesc,
1183 pDmaMappingInfo->FbAperture[gpuSubDevInst],
1184 pDmaMappingInfo->FbApertureLen[gpuSubDevInst],
1185 BUS_MAP_FB_FLAGS_MAP_UNICAST);
1186 }
1187 pDmaMappingInfo->FbAperture[gpuSubDevInst] = 0;
1188 pDmaMappingInfo->FbApertureLen[gpuSubDevInst] = 0;
1189 pDmaMappingInfo->KernelPriv = 0;
1190 }
1191
1192 SLI_LOOP_END
1193 }
1194
1195 /*!
1196 * @brief Map an object into a VirtualMemory object
1197 */
1198 NV_STATUS
virtmemMapTo_IMPL(VirtualMemory * pVirtualMemory,RS_RES_MAP_TO_PARAMS * pParams)1199 virtmemMapTo_IMPL
1200 (
1201 VirtualMemory *pVirtualMemory,
1202 RS_RES_MAP_TO_PARAMS *pParams
1203 )
1204 {
1205 NV_STATUS status = NV_ERR_NOT_SUPPORTED;
1206 Memory *pMemory = staticCast(pVirtualMemory, Memory);
1207 OBJGPU *pGpu = pParams->pGpu;
1208 OBJGPU *pSrcGpu = pParams->pSrcGpu;
1209 RsClient *pClient = RES_GET_CLIENT(pVirtualMemory);
1210 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
1211 RsResourceRef *pMemoryRef = pParams->pMemoryRef;
1212 NvHandle hClient = pClient->hClient;
1213 NvHandle hBroadcastDevice = pParams->hBroadcastDevice;
1214 NvHandle hVirtualMem = RES_GET_HANDLE(pVirtualMemory);
1215 NvHandle hMemoryDevice = pParams->hMemoryDevice;
1216 NvU32 gpuMask = pParams->gpuMask;
1217 NvU64 offset = pParams->offset; // offset into pMemoryRef to map
1218 NvU64 length = pParams->length;
1219 NvU32 flags = pParams->flags;
1220 NvU32 p2p = DRF_VAL(OS46, _FLAGS, _P2P_ENABLE, pParams->flags);
1221
1222 VirtMemAllocator *pDma = GPU_GET_DMA(pGpu);
1223 MEMORY_DESCRIPTOR *pSrcMemDesc = pParams->pSrcMemDesc;
1224 NvU64 *pDmaOffset = pParams->pDmaOffset; // return VirtualMemory offset
1225 CLI_DMA_MAPPING_INFO *pDmaMappingInfo = NULL;
1226 OBJVASPACE *pVas = NULL;
1227 Memory *pSrcMemory = dynamicCast(pMemoryRef->pResource, Memory);
1228
1229 NvU32 tgtAddressSpace = ADDR_UNKNOWN;
1230 NvBool bDmaMappingRegistered = NV_FALSE;
1231 NvBool bFlaMapping = pParams->bFlaMapping;
1232 NvBool bIsIndirectPeer = NV_FALSE;
1233 NvBool bEncrypted;
1234 NvBool bIsSysmem = NV_FALSE;
1235 NvBool bBar1P2P = (p2p && kbusHasPcieBar1P2PMapping_HAL(pGpu,
1236 GPU_GET_KERNEL_BUS(pGpu),
1237 pSrcGpu,
1238 GPU_GET_KERNEL_BUS(pSrcGpu)));
1239 NvBool bKernelMappingRequired = FLD_TEST_DRF(OS46, _FLAGS, _KERNEL_MAPPING, _ENABLE, flags);
1240
1241 //
1242 // Allow unicast on NV01_MEMORY_VIRTUAL object, but maintain the broadcast
1243 // requirement for NV50_MEMORY_VIRTUAL.
1244 //
1245 if (pParams->bSubdeviceHandleProvided && !pVirtualMemory->bAllowUnicastMapping)
1246 {
1247 NV_PRINTF(LEVEL_ERROR, "Unicast mappings into virtual memory object not supported.\n");
1248 return NV_ERR_NOT_SUPPORTED;
1249 }
1250
1251 status = vaspaceGetByHandleOrDeviceDefault(pClient, hBroadcastDevice, pVirtualMemory->hVASpace, &pVas);
1252 if (status != NV_OK)
1253 return status;
1254
1255 //
1256 // Use the encryption setting of the virtual allocation.
1257 // This makes sense, since the same physical mem descriptor could have
1258 // more than one mapping, each with different encryption settings.
1259 //
1260 bEncrypted = memdescGetFlag(pMemory->pMemDesc, MEMDESC_FLAGS_ENCRYPTED);
1261
1262 // Validate the offset and limit passed in.
1263 if (offset + length > pSrcMemDesc->Size)
1264 return NV_ERR_INVALID_BASE;
1265
1266 status = intermapCreateDmaMapping(pClient, pVirtualMemory, &pDmaMappingInfo, flags);
1267 if (status != NV_OK)
1268 return status;
1269
1270 if (bBar1P2P)
1271 {
1272 DMA_BAR1P2P_MAPPING_PRARAMS params = {0};
1273
1274 params.pVas = pVas;
1275 params.pPeerGpu = pSrcGpu;
1276 params.pPeerMemDesc = pSrcMemDesc;
1277 params.flags = flags;
1278 params.offset = offset;
1279 params.length = length;
1280 params.pDmaMappingInfo = pDmaMappingInfo;
1281
1282 status = dmaAllocBar1P2PMapping_HAL(pGpu, pDma, ¶ms);
1283 if (status != NV_OK)
1284 goto done;
1285
1286 // Adjust local variables for the BAR1 P2P mappings
1287 pSrcMemDesc = params.pMemDescOut;
1288 flags = params.flagsOut;
1289 offset = params.offsetOut;
1290 }
1291
1292 //
1293 // Determine target address space. If we're mapping fbmem from
1294 // one gpu for use by another, then we need to treat that memory as
1295 // ADDR_SYSMEM.
1296 //
1297 tgtAddressSpace = memdescGetAddressSpace(memdescGetMemDescFromGpu(pSrcMemDesc, pGpu));
1298 if ((pSrcGpu != pGpu) && (tgtAddressSpace == ADDR_FBMEM))
1299 {
1300 tgtAddressSpace = ADDR_SYSMEM;
1301
1302 if (gpumgrCheckIndirectPeer(pGpu, pSrcGpu))
1303 bIsIndirectPeer = NV_TRUE;
1304 }
1305
1306 // Different cases for vidmem & system memory/fabric memory.
1307 bIsSysmem = (tgtAddressSpace == ADDR_SYSMEM);
1308 bIsSysmem = bIsSysmem || (tgtAddressSpace == ADDR_EGM);
1309
1310 //
1311 // Create a MEMORY_DESCRIPTOR describing this region of the memory
1312 // alloc in question
1313 //
1314 status = memdescCreateSubMem(&pDmaMappingInfo->pMemDesc, pSrcMemDesc, pGpu, offset, length);
1315 if (status != NV_OK)
1316 goto done;
1317
1318 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
1319 memdescSetFlag(memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu),
1320 MEMDESC_FLAGS_ENCRYPTED,
1321 bEncrypted);
1322 SLI_LOOP_END
1323
1324 if (FLD_TEST_DRF(OS46, _FLAGS, _PAGE_KIND, _VIRTUAL, flags))
1325 {
1326 NvU32 kind = memdescGetPteKind(pMemory->pMemDesc);
1327
1328 NV_ASSERT(memdescGetFlag(pMemory->pMemDesc, MEMDESC_FLAGS_SET_KIND));
1329
1330 SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY);
1331 if (tgtAddressSpace == ADDR_SYSMEM && !memmgrComprSupported(pMemoryManager, ADDR_SYSMEM))
1332 {
1333 //
1334 // If system memory does not support compression, the virtual kind is compressible,
1335 // and being mapped into system memory fallback to using the uncompressed kind.
1336 //
1337 kind = memmgrGetUncompressedKind_HAL(pGpu, pMemoryManager, kind, 0);
1338 }
1339 memdescSetPteKind(memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu), kind);
1340 SLI_LOOP_END;
1341 }
1342
1343 if (bIsSysmem ||
1344 (tgtAddressSpace == ADDR_FABRIC_MC) ||
1345 (tgtAddressSpace == ADDR_FABRIC_V2))
1346 {
1347 // if GPUs are indirect peers, create TCE mappings
1348 if (bIsIndirectPeer)
1349 {
1350 //
1351 // TODO: Ideally memdescMapIommu should be called on FB memdesc with
1352 // pSrcGpu That would clearly convey that memory is owned by pSrcGpu and
1353 // we are trying to create IOMMU mappings for pGpu. This effort is being
1354 // tracked in bug 2043603
1355 //
1356 status = memdescMapIommu(pDmaMappingInfo->pMemDesc, pGpu->busInfo.iovaspaceId);
1357 if (status != NV_OK)
1358 {
1359 NV_PRINTF(LEVEL_ERROR, "DMA map pages failed for requested GPU!\n");
1360 goto done;
1361 }
1362 }
1363
1364 // Monolithic CPU RM or SPLIT_VAS_MGMT
1365 if (!pMemory->bRpcAlloc || gpuIsSplitVasManagementServerClientRmEnabled(pGpu))
1366 {
1367 pDmaMappingInfo->DmaOffset = *pDmaOffset; // in case this is 'in'
1368
1369 // allocate mapping in VirtualMemory object
1370 status = dmaAllocMap(pGpu, pDma, pVas, pVirtualMemory, pSrcMemory, pDmaMappingInfo);
1371 if (status != NV_OK)
1372 goto done;
1373
1374 status = intermapRegisterDmaMapping(pClient, pVirtualMemory, pDmaMappingInfo, pDmaMappingInfo->DmaOffset, gpuMask);
1375 if (status != NV_OK)
1376 {
1377 dmaFreeMap(pGpu, pDma, pVas,
1378 pVirtualMemory, pDmaMappingInfo,
1379 DRF_DEF(OS47, _FLAGS, _DEFER_TLB_INVALIDATION, _FALSE));
1380 goto done;
1381 }
1382
1383 bDmaMappingRegistered = NV_TRUE;
1384
1385 // If a kernel mapping has been requested, create one
1386 if (bKernelMappingRequired)
1387 {
1388 status = memdescMapOld(pDmaMappingInfo->pMemDesc,
1389 0,
1390 pDmaMappingInfo->pMemDesc->Size,
1391 NV_TRUE, NV_PROTECT_READ_WRITE,
1392 &pDmaMappingInfo->KernelVAddr[gpumgrGetSubDeviceInstanceFromGpu(gpumgrGetParentGPU(pGpu))],
1393 &pDmaMappingInfo->KernelPriv);
1394
1395 if (status != NV_OK)
1396 goto done;
1397 }
1398
1399 *pDmaOffset = pDmaMappingInfo->DmaOffset;
1400 } // !IS_VIRTUAL(pGpu) && !IS_GSP_CLIENT(pGpu)
1401 }
1402 else if (tgtAddressSpace == ADDR_FBMEM)
1403 {
1404 pDmaMappingInfo->DmaOffset = *pDmaOffset; // in case this is 'in'
1405
1406 // Monolithic CPU RM or SPLIT_VAS_MGMT
1407 if (!pMemory->bRpcAlloc || gpuIsSplitVasManagementServerClientRmEnabled(pGpu))
1408 {
1409 // allocate mapping in VirtualMemory object
1410 status = dmaAllocMap(pGpu, pDma, pVas, pVirtualMemory, pSrcMemory, pDmaMappingInfo);
1411 if (status != NV_OK)
1412 goto done;
1413
1414 *pDmaOffset = pDmaMappingInfo->DmaOffset;
1415
1416 status = intermapRegisterDmaMapping(pClient, pVirtualMemory, pDmaMappingInfo, pDmaMappingInfo->DmaOffset, gpuMask);
1417 if (status != NV_OK)
1418 {
1419 dmaFreeMap(pGpu, pDma, pVas,
1420 pVirtualMemory, pDmaMappingInfo,
1421 DRF_DEF(OS47, _FLAGS, _DEFER_TLB_INVALIDATION, _FALSE));
1422 goto done;
1423 }
1424
1425 bDmaMappingRegistered = NV_TRUE;
1426
1427 if (bKernelMappingRequired)
1428 {
1429 status = _virtmemAllocKernelMapping(pGpu, pVas, pDmaMappingInfo, offset, length, pSrcMemory);
1430 if (status != NV_OK)
1431 goto done;
1432 }
1433
1434 *pDmaOffset = pDmaMappingInfo->DmaOffset;
1435 } // if (!IS_VIRTUAL(pGpu) && !IS_GSP_CLIENT(pGpu))
1436 }
1437 else
1438 {
1439 // unknown (or mixed vidmem+sysmem?) mem case
1440 status = NV_ERR_INVALID_OBJECT_HANDLE;
1441 goto done;
1442 }
1443
1444 if (RMCFG_CLASS_NV50_P2P &&
1445 !bFlaMapping &&
1446 (bBar1P2P || DRF_VAL(OS46, _FLAGS, _P2P_ENABLE, pDmaMappingInfo->Flags) == NVOS46_FLAGS_P2P_ENABLE_NOSLI))
1447 {
1448 //
1449 // if we are on SLI and trying to map peer memory between two GPUs
1450 // on the same device, we don't rely on dynamic p2p mailbox setup.
1451 // SLI uses static p2p mailbox and hence will not have any
1452 // P2P object associated with it
1453 //
1454 if ((hBroadcastDevice == hMemoryDevice) && IsSLIEnabled(pGpu))
1455 {
1456 goto vgpu_send_rpc;
1457 }
1458
1459 pDmaMappingInfo->bP2P = NV_TRUE;
1460 }
1461
1462 vgpu_send_rpc:
1463
1464 if (pMemory->bRpcAlloc)
1465 {
1466 NV_RM_RPC_MAP_MEMORY_DMA(pGpu, hClient, hBroadcastDevice, hVirtualMem, pMemoryRef->hResource,
1467 offset, length, flags, pDmaOffset, status);
1468 if (status != NV_OK)
1469 goto done;
1470
1471 if ((IS_VIRTUAL(pGpu) || IS_GSP_CLIENT(pGpu)) &&
1472 !gpuIsSplitVasManagementServerClientRmEnabled(pGpu))
1473 {
1474 //
1475 // vGPU doesn't understand subdevice handles. But clients map memory
1476 // with subdevice handles and we don't want that to fail on vGPU.
1477 // Currently, we just pass down the broadcast device handle to the host
1478 // (which should be equivalent if SLI is disabled). This will need to
1479 // be revisited if vGPU ever supports SLI.
1480 //
1481 NV_ASSERT(!IsSLIEnabled(pGpu));
1482
1483 pDmaMappingInfo->DmaOffset = *pDmaOffset;
1484
1485 status = intermapRegisterDmaMapping(pClient, pVirtualMemory, pDmaMappingInfo,
1486 pDmaMappingInfo->DmaOffset, gpuMask);
1487 if (status != NV_OK)
1488 goto done;
1489
1490 bDmaMappingRegistered = NV_TRUE;
1491
1492 if (tgtAddressSpace == ADDR_SYSMEM)
1493 {
1494 // If a kernel mapping has been requested, create one
1495 if (bKernelMappingRequired)
1496 {
1497 status = memdescMapOld(pDmaMappingInfo->pMemDesc,
1498 0,
1499 pDmaMappingInfo->pMemDesc->Size,
1500 NV_TRUE, NV_PROTECT_READ_WRITE,
1501 &pDmaMappingInfo->KernelVAddr[gpumgrGetSubDeviceInstanceFromGpu(gpumgrGetParentGPU(pGpu))],
1502 &pDmaMappingInfo->KernelPriv);
1503 if (status != NV_OK)
1504 goto done;
1505 }
1506 }
1507 }
1508 }
1509
1510 done:
1511 if (status != NV_OK)
1512 {
1513 if (pDmaMappingInfo != NULL)
1514 {
1515 if ((pDmaMappingInfo->pMemDesc != NULL) && bKernelMappingRequired)
1516 {
1517 //
1518 // if Kernel cookie exists and mapping is in sysmem, free sysmem mapping
1519 // for ADDR_FBMEM function determines whether mapping was created itself
1520 //
1521 if ((pDmaMappingInfo->KernelPriv != NULL) &&
1522 (memdescGetAddressSpace(pDmaMappingInfo->pMemDesc) == ADDR_SYSMEM))
1523 {
1524 memdescUnmapOld(pDmaMappingInfo->pMemDesc, NV_TRUE, 0,
1525 pDmaMappingInfo->KernelVAddr[gpumgrGetSubDeviceInstanceFromGpu(gpumgrGetParentGPU(pGpu))],
1526 pDmaMappingInfo->KernelPriv);
1527 pDmaMappingInfo->KernelPriv = NULL;
1528 }
1529 else if (memdescGetAddressSpace(pDmaMappingInfo->pMemDesc) == ADDR_FBMEM)
1530 {
1531 _virtmemFreeKernelMapping(pGpu, pDmaMappingInfo);
1532 }
1533 }
1534
1535 if (pDmaMappingInfo->pMemDesc != NULL && bIsIndirectPeer)
1536 {
1537 memdescUnmapIommu(pDmaMappingInfo->pMemDesc, pGpu->busInfo.iovaspaceId);
1538 }
1539
1540 dmaFreeBar1P2PMapping_HAL(pDma, pDmaMappingInfo);
1541
1542 memdescDestroy(pDmaMappingInfo->pMemDesc);
1543 pDmaMappingInfo->pMemDesc = NULL;
1544
1545 if (bDmaMappingRegistered)
1546 {
1547 NV_ASSERT_OK(intermapDelDmaMapping(pClient, pVirtualMemory, *pDmaOffset, gpuMask));
1548 }
1549 else
1550 {
1551 // Explicitly free the DMA mapping if mapping was not yet registered
1552 intermapFreeDmaMapping(pDmaMappingInfo);
1553 }
1554 }
1555 }
1556
1557 return status;
1558 }
1559
1560 /*!
1561 * @brief Unmap object from VirtualMemory object
1562 */
1563 NV_STATUS
virtmemUnmapFrom_IMPL(VirtualMemory * pVirtualMemory,RS_RES_UNMAP_FROM_PARAMS * pParams)1564 virtmemUnmapFrom_IMPL
1565 (
1566 VirtualMemory *pVirtualMemory,
1567 RS_RES_UNMAP_FROM_PARAMS *pParams
1568 )
1569 {
1570 OBJGPU *pGpu = pParams->pGpu;
1571 Memory *pMemory = staticCast(pVirtualMemory, Memory);
1572 RsClient *pClient = RES_GET_CLIENT(pVirtualMemory);
1573 NvHandle hClient = pClient->hClient;
1574 NvHandle hMemory = pParams->hMemory;
1575 NvHandle hVirtualMem = RES_GET_HANDLE(pVirtualMemory);
1576 NvHandle hBroadcastDevice = pParams->hBroadcastDevice;
1577 NvU32 gpuMask = pParams->gpuMask;
1578 NvU64 dmaOffset = pParams->dmaOffset;
1579 OBJVASPACE *pVas = NULL;
1580 NV_STATUS status = NV_OK;
1581 NvBool bIsIndirectPeer = NV_FALSE;
1582 CLI_DMA_MAPPING_INFO *pDmaMappingInfoLeft = NULL;
1583 NvBool bDmaMappingInfoLeftRegistered = NV_FALSE;
1584 CLI_DMA_MAPPING_INFO *pDmaMappingInfoRight = NULL;
1585 NvBool bDmaMappingInfoRightRegistered = NV_FALSE;
1586 CLI_DMA_MAPPING_INFO *pDmaMappingInfoUnmap = NULL;
1587
1588 CLI_DMA_MAPPING_INFO *pDmaMappingInfo = NULL;
1589
1590 if (hMemory != NV01_NULL_OBJECT)
1591 {
1592 RsResourceRef *pSrcMemoryRef;
1593 Memory *pMemorySrc;
1594
1595 if (clientGetResourceRef(pClient, hMemory, &pSrcMemoryRef) != NV_OK)
1596 return NV_ERR_OBJECT_NOT_FOUND;
1597
1598 status = rmresCheckMemInterUnmap(dynamicCast(pSrcMemoryRef->pResource, RmResource), pParams->bSubdeviceHandleProvided);
1599
1600 // Exit if failed or invalid class, otherwise continue on to next part
1601 if (status != NV_OK)
1602 return status;
1603
1604 pMemorySrc = dynamicCast(pSrcMemoryRef->pResource, Memory);
1605 if (pMemorySrc != NULL)
1606 {
1607 if (gpumgrCheckIndirectPeer(pMemorySrc->pGpu, pGpu))
1608 bIsIndirectPeer = NV_TRUE;
1609 }
1610 }
1611
1612 if (pParams->bSubdeviceHandleProvided && !pVirtualMemory->bAllowUnicastMapping)
1613 {
1614 NV_PRINTF(LEVEL_ERROR, "Unicast DMA mappings into virtual memory object not supported.\n");
1615 return NV_ERR_NOT_SUPPORTED;
1616 }
1617
1618 status = vaspaceGetByHandleOrDeviceDefault(pClient, hBroadcastDevice, pVirtualMemory->hVASpace, &pVas);
1619 if (status != NV_OK)
1620 return status;
1621
1622 // Get DMA mapping info.
1623 pDmaMappingInfo = intermapGetDmaMapping(pVirtualMemory, dmaOffset, gpuMask);
1624 NV_ASSERT_OR_RETURN(pDmaMappingInfo != NULL, NV_ERR_INVALID_OBJECT_HANDLE);
1625 NvBool bPartialUnmap = dmaOffset != pDmaMappingInfo->DmaOffset || pParams->size != pDmaMappingInfo->pMemDesc->Size;
1626 NV_ASSERT_OR_RETURN(!bPartialUnmap || (gpuMask & (gpuMask - 1)) == 0, NV_ERR_INVALID_ARGUMENT);
1627 NV_ASSERT_OR_RETURN(!bPartialUnmap || !bIsIndirectPeer, NV_ERR_INVALID_ARGUMENT);
1628
1629 if (FLD_TEST_DRF(OS46, _FLAGS, _KERNEL_MAPPING, _ENABLE, pDmaMappingInfo->Flags))
1630 {
1631 NV_ASSERT_OR_RETURN(!bPartialUnmap, NV_ERR_INVALID_ARGUMENT);
1632
1633 //
1634 // if Kernel cookie exists and mapping is in sysmem, free sysmem mapping
1635 // for ADDR_FBMEM function determines whether mapping was created itself
1636 //
1637 if ((pDmaMappingInfo->KernelPriv != NULL) &&
1638 (memdescGetAddressSpace(pDmaMappingInfo->pMemDesc) == ADDR_SYSMEM))
1639 {
1640 memdescUnmapOld(pDmaMappingInfo->pMemDesc, NV_TRUE, 0,
1641 pDmaMappingInfo->KernelVAddr[gpumgrGetSubDeviceInstanceFromGpu(gpumgrGetParentGPU(pGpu))],
1642 pDmaMappingInfo->KernelPriv);
1643 pDmaMappingInfo->KernelPriv = NULL;
1644 }
1645 else if (memdescGetAddressSpace(memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu)) == ADDR_FBMEM)
1646 {
1647 _virtmemFreeKernelMapping(pGpu, pDmaMappingInfo);
1648 }
1649 }
1650
1651 // if this was peer mapped context dma, remove it from P2P object
1652 if (RMCFG_CLASS_NV50_P2P && pDmaMappingInfo->bP2P)
1653 {
1654 NV_ASSERT_OR_RETURN(!bPartialUnmap, NV_ERR_INVALID_ARGUMENT);
1655 dmaFreeBar1P2PMapping_HAL(GPU_GET_DMA(pGpu), pDmaMappingInfo);
1656 }
1657
1658 if (dmaOffset > pDmaMappingInfo->DmaOffset)
1659 {
1660 NV_ASSERT_OK_OR_GOTO(status,
1661 intermapCreateDmaMapping(pClient, pVirtualMemory, &pDmaMappingInfoLeft, pDmaMappingInfo->Flags),
1662 failed);
1663
1664 pDmaMappingInfoLeft->DmaOffset = pDmaMappingInfo->DmaOffset;
1665 pDmaMappingInfoLeft->bP2P = pDmaMappingInfo->bP2P;
1666 pDmaMappingInfoLeft->addressTranslation = pDmaMappingInfo->addressTranslation;
1667 pDmaMappingInfoLeft->mapPageSize = pDmaMappingInfo->mapPageSize;
1668
1669 NV_ASSERT_OK_OR_GOTO(status,
1670 memdescCreateSubMem(&pDmaMappingInfoLeft->pMemDesc, pDmaMappingInfo->pMemDesc, pGpu,
1671 pDmaMappingInfoLeft->DmaOffset - pDmaMappingInfo->DmaOffset,
1672 dmaOffset - pDmaMappingInfoLeft->DmaOffset),
1673 failed);
1674 }
1675
1676 if (dmaOffset + pParams->size < pDmaMappingInfo->DmaOffset + pDmaMappingInfo->pMemDesc->Size)
1677 {
1678 NV_ASSERT_OK_OR_GOTO(status,
1679 intermapCreateDmaMapping(pClient, pVirtualMemory, &pDmaMappingInfoRight, pDmaMappingInfo->Flags),
1680 failed);
1681
1682 pDmaMappingInfoRight->DmaOffset = dmaOffset + pParams->size;
1683 pDmaMappingInfoRight->bP2P = pDmaMappingInfo->bP2P;
1684 pDmaMappingInfoRight->addressTranslation = pDmaMappingInfo->addressTranslation;
1685 pDmaMappingInfoRight->mapPageSize = pDmaMappingInfo->mapPageSize;
1686
1687 NV_ASSERT_OK_OR_GOTO(status,
1688 memdescCreateSubMem(&pDmaMappingInfoRight->pMemDesc, pDmaMappingInfo->pMemDesc, pGpu,
1689 pDmaMappingInfoRight->DmaOffset - pDmaMappingInfo->DmaOffset,
1690 pDmaMappingInfo->DmaOffset + pDmaMappingInfo->pMemDesc->Size - pDmaMappingInfoRight->DmaOffset),
1691 failed);
1692 }
1693
1694 pDmaMappingInfoUnmap = pDmaMappingInfo;
1695 if (pDmaMappingInfoLeft != NULL || pDmaMappingInfoRight != NULL)
1696 {
1697 NV_ASSERT_OK_OR_GOTO(status,
1698 intermapCreateDmaMapping(pClient, pVirtualMemory, &pDmaMappingInfoUnmap, pDmaMappingInfo->Flags),
1699 failed);
1700
1701 pDmaMappingInfoUnmap->DmaOffset = dmaOffset;
1702 pDmaMappingInfoUnmap->bP2P = pDmaMappingInfo->bP2P;
1703 pDmaMappingInfoUnmap->addressTranslation = pDmaMappingInfo->addressTranslation;
1704 pDmaMappingInfoUnmap->mapPageSize = pDmaMappingInfo->mapPageSize;
1705 pDmaMappingInfoUnmap->gpuMask = pDmaMappingInfo->gpuMask;
1706
1707 NV_ASSERT_OK_OR_GOTO(status,
1708 memdescCreateSubMem(&pDmaMappingInfoUnmap->pMemDesc, pDmaMappingInfo->pMemDesc, pGpu,
1709 pDmaMappingInfoUnmap->DmaOffset - pDmaMappingInfo->DmaOffset,
1710 pParams->size),
1711 failed);
1712 }
1713
1714 if (!pMemory->bRpcAlloc || gpuIsSplitVasManagementServerClientRmEnabled(pGpu))
1715 {
1716 // free mapping in context dma
1717 dmaFreeMap(pGpu, GPU_GET_DMA(pGpu), pVas, pVirtualMemory, pDmaMappingInfoUnmap, pParams->flags);
1718
1719 if ((memdescGetAddressSpace(memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu)) == ADDR_FBMEM) &&
1720 bIsIndirectPeer)
1721 {
1722 memdescUnmapIommu(pDmaMappingInfo->pMemDesc, pGpu->busInfo.iovaspaceId);
1723 }
1724 }
1725
1726 // free memory descriptor
1727 memdescFree(pDmaMappingInfo->pMemDesc);
1728 memdescDestroy(pDmaMappingInfo->pMemDesc);
1729 pDmaMappingInfo->pMemDesc = NULL;
1730
1731 // delete client dma mapping
1732 intermapDelDmaMapping(pClient, pVirtualMemory, pDmaMappingInfo->DmaOffset, gpuMask);
1733
1734 if (pDmaMappingInfoLeft != NULL)
1735 {
1736 NV_ASSERT_OK_OR_GOTO(status,
1737 intermapRegisterDmaMapping(pClient, pVirtualMemory, pDmaMappingInfoLeft,
1738 pDmaMappingInfoLeft->DmaOffset, gpuMask),
1739 failed);
1740 bDmaMappingInfoLeftRegistered = NV_TRUE;
1741 }
1742
1743 if (pDmaMappingInfoRight != NULL)
1744 {
1745 NV_ASSERT_OK_OR_GOTO(status,
1746 intermapRegisterDmaMapping(pClient, pVirtualMemory, pDmaMappingInfoRight,
1747 pDmaMappingInfoRight->DmaOffset, gpuMask),
1748 failed);
1749 bDmaMappingInfoRightRegistered = NV_TRUE;
1750 }
1751
1752 failed:
1753 if (pDmaMappingInfoUnmap != NULL && pDmaMappingInfoUnmap != pDmaMappingInfo)
1754 {
1755 memdescFree(pDmaMappingInfoUnmap->pMemDesc);
1756 memdescDestroy(pDmaMappingInfoUnmap->pMemDesc);
1757 intermapFreeDmaMapping(pDmaMappingInfoUnmap);
1758 }
1759
1760 if (status != NV_OK)
1761 {
1762 if (pDmaMappingInfoLeft != NULL)
1763 {
1764 if (bDmaMappingInfoLeftRegistered)
1765 intermapDelDmaMapping(pClient, pVirtualMemory, pDmaMappingInfoLeft->DmaOffset, gpuMask);
1766 else
1767 intermapFreeDmaMapping(pDmaMappingInfoLeft);
1768 }
1769
1770 if (pDmaMappingInfoRight != NULL)
1771 {
1772 if (bDmaMappingInfoRightRegistered)
1773 intermapDelDmaMapping(pClient, pVirtualMemory, pDmaMappingInfoRight->DmaOffset, gpuMask);
1774 else
1775 intermapFreeDmaMapping(pDmaMappingInfoRight);
1776 }
1777
1778 }
1779 //
1780 // vGPU:
1781 //
1782 // Since vGPU does all real hardware management in the
1783 // host, if we are in guest OS (where IS_VIRTUAL(pGpu) is true),
1784 // do an RPC to the host to do the hardware update.
1785 //
1786 // vGPU doesn't understand subdevice handles. But clients map memory
1787 // with subdevice handles and we don't want that to fail on vGPU.
1788 // Currently, we just pass down the broadcast device handle to the host
1789 // (which should be equivalent if SLI is disabled). This will need to
1790 // be revisited if vGPU ever supports SLI.
1791 //
1792 NV_ASSERT((!IS_VIRTUAL(pGpu) && !IS_GSP_CLIENT(pGpu)) || !IsSLIEnabled(pGpu));
1793
1794 if (pMemory->bRpcAlloc &&
1795 (NV01_NULL_OBJECT != hMemory) &&
1796 (resGetRefCount(staticCast(pVirtualMemory, RsResource)) || (hVirtualMem == hMemory)))
1797 {
1798 //
1799 // resGetRefCount(pMemCtx->pResource) is zero when we are here from call of
1800 // RmFree -> clientFreeResourceTree_IMPL -> clientFreeResource_IMPL -> __nvoc_objDelete
1801 //
1802 // memDestruct_IMPL-> CliDelDeviceMemory(i.e. hVirtualMem == hMemory) -> RmUnmapMemoryDma are valid calls since we
1803 // call RPC_FREE later in memDestruct_IMPL.
1804 //
1805 // ifbDestruct_IMPL-> RmUnmapMemoryDma should not RPC_UNMAP_MEMORY_DMA since RPC_FREE is invoked in call stack earlier.
1806 //
1807 NV_RM_RPC_UNMAP_MEMORY_DMA(pGpu, hClient, hBroadcastDevice, hVirtualMem, hMemory, 0, dmaOffset, status);
1808 }
1809
1810 return status;
1811 }
1812
1813 /*!
1814 * @brief return address and size of a VirtualMemory object
1815 */
virtmemGetAddressAndSize_IMPL(VirtualMemory * pVirtualMemory,NvU64 * pVAddr,NvU64 * pSize)1816 void virtmemGetAddressAndSize_IMPL
1817 (
1818 VirtualMemory *pVirtualMemory,
1819 NvU64 *pVAddr,
1820 NvU64 *pSize
1821 )
1822 {
1823 MEMORY_DESCRIPTOR *pMemDesc = staticCast(pVirtualMemory, Memory)->pMemDesc;
1824
1825 *pVAddr = memdescGetPhysAddr(pMemDesc, AT_GPU_VA, 0);
1826 *pSize = memdescGetSize(pMemDesc);
1827 }
1828