1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "mem_mgr/virtual_mem.h"
25 #include "mem_mgr/vaspace.h"
26 #include "gpu/mem_mgr/virt_mem_allocator.h"
27 #include "virtualization/hypervisor/hypervisor.h"
28 #include "vgpu/rpc.h"
29 #include "gpu/mem_mgr/mem_desc.h"
30 #include "mem_mgr/mem.h"
31 #include "gpu/mem_mgr/mem_mgr.h"
32 #include "core/locks.h"
33 #include "kernel/gpu/rc/kernel_rc.h"
34 #include "gpu/device/device.h"
35 #include "Nvcm.h"
36 #include "gpu/mem_mgr/vaspace_api.h"
37 #include "gpu/mem_mgr/mem_utils.h"
38 #include "gpu/bus/kern_bus.h"
39 #include "gpu/bus/p2p_api.h"
40 #include "mem_mgr/gpu_vaspace.h"
41 #include "platform/sli/sli.h"
42 
43 #include "class/cl0070.h" // NV01_MEMORY_VIRTUAL
44 #include "class/cl50a0.h" // NV50_MEMORY_VIRTUAL
45 
46 static void _virtmemFreeKernelMapping(OBJGPU *, CLI_DMA_MAPPING_INFO *);
47 
48 /*!
49  * _virtmemQueryVirtAllocParams
50  *
51  * @brief
52  *     Queries for the actual size of VA allocation, alignment
53  *     and mask of page sizes (needed for page table allocation)
54  *
55  * @param[in]  pGpu                OBJGPU pointer
56  * @param[in]  hClient             Client handle
57  * @param[in]  hDevice             Device handle
58  * @param[in]  pAllocData          Pointer to VIDHEAP_ALLOC_DATA
59  * @param[out] pAlign              Alignment
60  * @param[out] pSize               Size of allocation
61  * @param[out] ppVAS               Virtual address space for request
62  * @param[out] pPageSizeLockMask   Mask of page sizes locked during VA reservation
63  *
64  * @returns
65  *      NV_OK
66  */
67 static NV_STATUS
_virtmemQueryVirtAllocParams(OBJGPU * pGpu,NvHandle hClient,NvHandle hDevice,NV_MEMORY_ALLOCATION_PARAMS * pAllocData,NvU64 * pAlign,NvU64 * pSize,OBJVASPACE ** ppVAS,NvU64 * pPageSizeLockMask)68 _virtmemQueryVirtAllocParams
69 (
70     OBJGPU                     *pGpu,
71     NvHandle                    hClient,
72     NvHandle                    hDevice,
73     NV_MEMORY_ALLOCATION_PARAMS *pAllocData,
74     NvU64                       *pAlign,
75     NvU64                       *pSize,
76     OBJVASPACE                 **ppVAS,
77     NvU64                       *pPageSizeLockMask
78 )
79 {
80     MemoryManager          *pMemoryManager     = GPU_GET_MEMORY_MANAGER(pGpu);
81     RsClient               *pClient;
82     FB_ALLOC_INFO          *pFbAllocInfo       = NULL;
83     FB_ALLOC_PAGE_FORMAT   *pFbAllocPageFormat = NULL;
84     NV_STATUS               status             = NV_OK;
85     NvBool                  bReleaseGpuLock    = NV_FALSE;
86 
87     pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
88     if (pFbAllocInfo == NULL)
89     {
90         NV_ASSERT(0);
91         status = NV_ERR_NO_MEMORY;
92         goto done;
93     }
94 
95     pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
96     if (pFbAllocPageFormat == NULL) {
97         NV_ASSERT(0);
98         status = NV_ERR_NO_MEMORY;
99         goto done;
100     }
101 
102     portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO));
103     portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT));
104     pFbAllocInfo->pageFormat = pFbAllocPageFormat;
105 
106     pFbAllocInfo->pageFormat->attr  = pAllocData->attr;
107     pFbAllocInfo->pageFormat->attr2 = pAllocData->attr2;
108     pFbAllocInfo->pageFormat->flags = pAllocData->flags;
109     *pSize                          = pAllocData->size;
110     *pAlign                         = pAllocData->alignment;
111 
112     // LOCK: acquire device lock
113     if (!rmDeviceGpuLockIsOwner(gpuGetInstance(pGpu)))
114     {
115         NV_ASSERT_OK_OR_GOTO(status, rmDeviceGpuLocksAcquire(pGpu, GPUS_LOCK_FLAGS_NONE,
116                                                              RM_LOCK_MODULES_MEM_PMA), done);
117         bReleaseGpuLock = NV_TRUE;
118     }
119 
120     *pPageSizeLockMask = stdmemQueryPageSize(pMemoryManager, hClient,
121                                              pAllocData);
122     if (*pPageSizeLockMask == 0)
123     {
124         status = NV_ERR_INVALID_STATE;
125         goto done;
126     }
127 
128     NV_ASSERT_OK_OR_GOTO(status,
129         serverGetClientUnderLock(&g_resServ, hClient, &pClient),
130         done);
131 
132     NV_ASSERT_OK_OR_GOTO(status,
133         vaspaceGetByHandleOrDeviceDefault(pClient, hDevice, pAllocData->hVASpace, ppVAS),
134         done);
135 
136     NV_ASSERT_OK_OR_GOTO(status,
137         vaspaceApplyDefaultAlignment(*ppVAS, pFbAllocInfo, pAlign, pSize, pPageSizeLockMask),
138         done);
139 
140 done:
141     if (bReleaseGpuLock)
142     {
143         // UNLOCK: release device lock
144         rmDeviceGpuLocksRelease(pGpu, GPUS_LOCK_FLAGS_NONE, NULL);
145     }
146 
147     portMemFree(pFbAllocPageFormat);
148     portMemFree(pFbAllocInfo);
149 
150     return status;
151 }
152 
153 /*!
154  * @brief Handle copy construction for VirtualMemory object
155  */
156 static NV_STATUS
_virtmemCopyConstruct(VirtualMemory * pDstVirtualMemory,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)157 _virtmemCopyConstruct
158 (
159     VirtualMemory *pDstVirtualMemory,
160     CALL_CONTEXT *pCallContext,
161     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
162 )
163 {
164     RsClient      *pDstClient = pCallContext->pClient;
165     RsClient      *pSrcClient = pParams->pSrcClient;
166     RsResourceRef *pSrcRef    = pParams->pSrcRef;
167     VirtualMemory *pSrcVirtualMemory = dynamicCast(pSrcRef->pResource, VirtualMemory);
168     Memory        *pDstMemory = staticCast(pDstVirtualMemory, Memory);
169     Memory        *pSrcMemory = staticCast(pSrcVirtualMemory, Memory);
170     OBJGPU        *pSrcGpu = pSrcMemory->pGpu;
171     OBJVASPACE    *pVASSrc = NULL;
172     NvBool         bIncAllocRefCnt = NV_FALSE;
173 
174     // Special handling for Dup of the FLA VASpace
175     if (pSrcVirtualMemory->bFlaVAS)
176     {
177         Device        *pDstDevice;
178         RsClient      *pFlaClient;
179         RM_API        *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
180         NvHandle       hImportedVASpace = NV01_NULL_OBJECT;
181         RsResourceRef *pDupedVasRef;
182 
183         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
184             serverGetClientUnderLock(&g_resServ, GPU_GET_KERNEL_BUS(pSrcGpu)->flaInfo.hClient, &pFlaClient));
185 
186         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
187             vaspaceGetByHandleOrDeviceDefault(pFlaClient,
188                                               RES_GET_HANDLE(pSrcMemory->pDevice),
189                                               GPU_GET_KERNEL_BUS(pSrcGpu)->flaInfo.hFlaVASpace,
190                                               &pVASSrc));
191 
192         //
193         // FLA Memory can be duped during import stage and the importing client which might not be the
194         // same as exporting client. Also the importing client might not also bind to the the exporting FLA
195         // VASpace on the exporting device. In that case, we might see leaks in the exporting FLA VASpace.
196         // To avoid those scenarios, we are duping the FLA VAS to the importing client under the exporting device.
197         // RS-TODO: Bug 3059751 to track the duped VAS as dependant in ResServer
198         //
199         NV_ASSERT_OK_OR_RETURN(deviceGetByGpu(pDstClient, pSrcGpu, NV_TRUE, &pDstDevice));
200 
201         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
202             pRmApi->DupObject(pRmApi,
203                               pDstClient->hClient,
204                               RES_GET_HANDLE(pDstDevice),
205                               &hImportedVASpace,
206                               GPU_GET_KERNEL_BUS(pSrcGpu)->flaInfo.hClient,
207                               GPU_GET_KERNEL_BUS(pSrcGpu)->flaInfo.hFlaVASpace,
208                               0));
209 
210         if (clientGetResourceRef(pDstClient, hImportedVASpace, &pDupedVasRef) == NV_OK)
211             refAddDependant(pDupedVasRef, RES_GET_REF(pDstVirtualMemory));
212 
213         pDstVirtualMemory->hVASpace = hImportedVASpace;
214 
215         // Increase refcount if locally managed
216         bIncAllocRefCnt = !pSrcMemory->bRpcAlloc;
217     }
218     else if (pSrcVirtualMemory->hVASpace == NV_MEMORY_VIRTUAL_SYSMEM_DYNAMIC_HVASPACE)
219     {
220         // A legacy sysmem dynamic object does not have valid hVASpace
221         pDstVirtualMemory->hVASpace = NV_MEMORY_VIRTUAL_SYSMEM_DYNAMIC_HVASPACE;
222 
223         // No VASPACE is update
224         bIncAllocRefCnt = NV_FALSE;
225     }
226     else
227     {
228         OBJVASPACE *pVASDst = NULL;
229 
230         NV_CHECK_OK_OR_RETURN(LEVEL_SILENT,
231             vaspaceGetByHandleOrDeviceDefault(pSrcClient,
232                                               RES_GET_HANDLE(pSrcMemory->pDevice),
233                                               pSrcVirtualMemory->hVASpace, &pVASSrc));
234         NV_CHECK_OK_OR_RETURN(LEVEL_SILENT,
235             vaspaceGetByHandleOrDeviceDefault(pDstClient,
236                                               RES_GET_HANDLE(pDstMemory->pDevice),
237                                               NV01_NULL_OBJECT, &pVASDst));
238         if (pVASSrc != pVASDst)
239         {
240             return NV_ERR_INVALID_DEVICE;
241         }
242 
243         pDstVirtualMemory->hVASpace = NV01_NULL_OBJECT;
244 
245         // Increase refcount for locally managed NV50_MEMORY_VIRTUAL
246         bIncAllocRefCnt = pSrcVirtualMemory->bReserveVaOnAlloc && !pSrcMemory->bRpcAlloc;
247     }
248 
249     pDstVirtualMemory->bAllowUnicastMapping = pSrcVirtualMemory->bAllowUnicastMapping;
250     pDstVirtualMemory->bReserveVaOnAlloc = pSrcVirtualMemory->bReserveVaOnAlloc;
251     pDstVirtualMemory->bFlaVAS = pSrcVirtualMemory->bFlaVAS;
252 
253     // Mappings do not follow virtual memory object
254     pDstVirtualMemory->pDmaMappingList = NULL;
255 
256     if (bIncAllocRefCnt)
257     {
258         NvU64 vaddr;
259         NvU64 size;
260 
261         virtmemGetAddressAndSize(pSrcVirtualMemory, &vaddr, &size);
262         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
263             vaspaceIncAllocRefCnt(pVASSrc, vaddr));
264     }
265 
266     return NV_OK;
267 }
268 
269 /*!
270  * virtmemConstruct
271  *
272  * @brief
273  *     This routine provides common allocation services used by the
274  *     following heap allocation functions:
275  *       NVOS32_FUNCTION_ALLOC_SIZE
276  *       NVOS32_FUNCTION_ALLOC_SIZE_RANGE
277  *       NVOS32_FUNCTION_ALLOC_TILED_PITCH_HEIGHT
278  *
279  * @param[in]  pVirtualMemory    Pointer to VirtualMemory object
280  * @param[in]  pCallContext      Pointer to the current CALL_CONTEXT.
281  * @param[in]  pParams           Pointer to the alloc params
282  *
283  * @return 'NV_OK'
284  *     Operation completed successfully.
285  * @return 'NV_ERR_NO_MEMORY'
286  *     There is not enough available memory to satisfy allocation request.
287  * @return 'NV_ERR_INSUFFICIENT_RESOURCES'
288  *     Not enough available resources to satisfy allocation request.
289  */
290 NV_STATUS
virtmemConstruct_IMPL(VirtualMemory * pVirtualMemory,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)291 virtmemConstruct_IMPL
292 (
293     VirtualMemory                *pVirtualMemory,
294     CALL_CONTEXT                 *pCallContext,
295     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
296 )
297 {
298     Memory                      *pMemory               = staticCast(pVirtualMemory, Memory);
299     NV_MEMORY_ALLOCATION_PARAMS *pAllocData            = pParams->pAllocParams;
300     MEMORY_ALLOCATION_REQUEST    allocRequest          = {0};
301     MEMORY_ALLOCATION_REQUEST   *pAllocRequest         = &allocRequest;
302     OBJGPU                      *pGpu                  = pMemory->pGpu;
303     MemoryManager               *pMemoryManager        = GPU_GET_MEMORY_MANAGER(pGpu);
304     OBJVASPACE                  *pVAS                  = NULL;
305     HWRESOURCE_INFO              hwResource;
306     RsClient                    *pRsClient             = pCallContext->pClient;
307     RsResourceRef               *pResourceRef          = pCallContext->pResourceRef;
308     RsResourceRef               *pVASpaceRef           = NULL;
309     NvU32                        gpuCacheAttrib;
310     NV_STATUS                    status                = NV_OK;
311     NvHandle                     hClient               = pCallContext->pClient->hClient;
312     NvHandle                     hParent               = pCallContext->pResourceRef->pParentRef->hResource;
313     NvU64                        sizeOut;
314     NvU64                        offsetOut;
315     NvBool                       bLockAcquired         = NV_FALSE;
316     NvU32                        attr                  = 0;
317     NvU32                        attr2                 = 0;
318     NvBool                       bRpcAlloc             = NV_FALSE;
319     NvBool                       bResAllocated         = NV_FALSE;
320     NvU32                        gpuMask               = 0;
321     FB_ALLOC_INFO               *pFbAllocInfo          = NULL;
322     FB_ALLOC_PAGE_FORMAT        *pFbAllocPageFormat    = NULL;
323 
324     // Bulk of copy-construction is done by Memory class. Handle our members.
325     if (RS_IS_COPY_CTOR(pParams))
326     {
327         NV_ASSERT_OK_OR_RETURN(rmGpuGroupLockAcquire(pGpu->gpuInstance,
328                                                      GPU_LOCK_GRP_ALL,
329                                                      GPU_LOCK_FLAGS_SAFE_LOCK_UPGRADE,
330                                                      RM_LOCK_MODULES_MEM,
331                                                      &gpuMask));
332 
333         status = _virtmemCopyConstruct(pVirtualMemory, pCallContext, pParams);
334 
335         rmGpuGroupLockRelease(gpuMask, GPUS_LOCK_FLAGS_NONE);
336 
337         goto done;
338     }
339 
340     pVirtualMemory->hVASpace = RM_INVALID_VASPACE_HANDLE;
341     pVirtualMemory->bAllowUnicastMapping = NV_FALSE;
342     pVirtualMemory->bReserveVaOnAlloc = NV_FALSE;
343     pVirtualMemory->bFlaVAS = NV_FALSE;
344     pVirtualMemory->pDmaMappingList = NULL;
345 
346     // NV01_MEMORY_VIRTUAL does not allocate typed memory from the heap
347     if (pParams->externalClassId == NV01_MEMORY_VIRTUAL)
348         return NV_OK;
349 
350     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, stdmemValidateParams(pGpu, hClient, pAllocData));
351     NV_CHECK_OR_RETURN(LEVEL_ERROR, pAllocData->flags & NVOS32_ALLOC_FLAGS_VIRTUAL, NV_ERR_INVALID_ARGUMENT);
352 
353     stdmemDumpInputAllocParams(pAllocData, pCallContext);
354 
355     attr  = pAllocData->attr;
356     attr2 = pAllocData->attr2;
357 
358     pAllocRequest->classNum = NV50_MEMORY_VIRTUAL;
359     pAllocRequest->pUserParams = pAllocData;
360     pAllocRequest->hMemory = pResourceRef->hResource;
361     pAllocRequest->hClient = hClient;
362     pAllocRequest->hParent = hParent;
363     pAllocRequest->pGpu = pGpu;
364     pAllocRequest->internalflags = NVOS32_ALLOC_INTERNAL_FLAGS_CLIENTALLOC;
365     pAllocRequest->pHwResource = &hwResource;
366 
367     // Unsure if we need to keep separate copies, but keeping old behavior for now.
368     sizeOut = pAllocData->size;
369     offsetOut = pAllocData->offset;
370 
371     //
372     // Reserve memory for page tables in case of non lazy page table
373     // allocations.
374     //
375     // PageLevelMemReserve will reserve only if the PDB property for
376     // client managed page tables is set.
377     //
378     if (memmgrIsPmaInitialized(pMemoryManager) &&
379         !(pAllocData->flags & NVOS32_ALLOC_FLAGS_LAZY) &&
380         !(pAllocData->flags & NVOS32_ALLOC_FLAGS_EXTERNALLY_MANAGED))
381     {
382         NvU64 size;
383         NvU64 align;
384         NvU64 pageSizeLockMask;
385         Device *pDevice;
386 
387         NV_ASSERT_OK_OR_GOTO(status,
388             deviceGetByHandle(pRsClient, hParent, &pDevice),
389             done);
390 
391         SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
392 
393         pAllocRequest->pGpu = pGpu;
394         size             = 0;
395         align            = 0;
396         pageSizeLockMask = 0;
397 
398         status = _virtmemQueryVirtAllocParams(pGpu, hClient, hParent,
399                                               pAllocData, &align, &size,
400                                               &pVAS, &pageSizeLockMask);
401         if (NV_OK != status)
402             SLI_LOOP_GOTO(done);
403 
404         status = vaspaceReserveMempool(pVAS, pGpu, pDevice,
405                                        size, pageSizeLockMask,
406                                        VASPACE_RESERVE_FLAGS_NONE);
407         if (NV_OK != status)
408             SLI_LOOP_GOTO(done);
409 
410         SLI_LOOP_END;
411     }
412 
413     if (RMCFG_FEATURE_RM_BASIC_LOCK_MODEL)
414     {
415         //
416         // Can't move locking up as PMA locks need to be taken first.
417         // Acquire the lock *only after* PMA is done allocating.
418         //
419         if (!rmDeviceGpuLockIsOwner(pGpu->gpuInstance) && !rmGpuLockIsOwner())
420         {
421             NV_ASSERT_OK_OR_GOTO(status,
422                 rmDeviceGpuLocksAcquire(pGpu, GPUS_LOCK_FLAGS_NONE,
423                     RM_LOCK_MODULES_MEM),
424                 done);
425 
426             bLockAcquired = NV_TRUE;
427         }
428     }
429 
430     {
431         //
432         // If using thwap to generate an allocation failure here, fail the
433         // alloc right away.
434         //
435         KernelRc *pKernelRc = GPU_GET_KERNEL_RC(pGpu);
436         if (pKernelRc != NULL &&
437             !krcTestAllowAlloc(pGpu, pKernelRc,
438                                NV_ROBUST_CHANNEL_ALLOCFAIL_HEAP))
439         {
440             status = NV_ERR_INSUFFICIENT_RESOURCES;
441             goto done;
442         }
443     }
444 
445     // Validate virtual address space
446     NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT,
447         vaspaceGetByHandleOrDeviceDefault(pRsClient, hParent, pAllocData->hVASpace, &pVAS),
448         done);
449 
450     pVirtualMemory->bFlaVAS = !!(vaspaceGetFlags(pVAS) & VASPACE_FLAGS_FLA);
451     pVirtualMemory->bOptimizePageTableMempoolUsage =
452         !!(vaspaceGetFlags(pVAS) & VASPACE_FLAGS_OPTIMIZE_PTETABLE_MEMPOOL_USAGE);
453 
454     if (IS_VIRTUAL(pGpu) || IS_GSP_CLIENT(pGpu))
455     {
456         NvBool bSriovFull = IS_VIRTUAL_WITH_SRIOV(pGpu) &&
457                             !gpuIsWarBug200577889SriovHeavyEnabled(pGpu);
458         NvBool bBar1VAS   = !!(vaspaceGetFlags(pVAS) & VASPACE_FLAGS_BAR_BAR1);
459 
460         //
461         // Skip RPC to the Host RM when local RM is managing page tables.  Special case
462         // for early SR-IOV that only manages BAR1 and FLA page tables in the guest.
463         //
464         bRpcAlloc = !(gpuIsSplitVasManagementServerClientRmEnabled(pGpu) ||
465                         (bSriovFull && (bBar1VAS || pVirtualMemory->bFlaVAS)));
466     }
467 
468     if (bRpcAlloc)
469     {
470         NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT,
471             memdescCreate(&pAllocRequest->pMemDesc, pGpu,
472                           pAllocRequest->pUserParams->size, 0, NV_TRUE,
473                           ADDR_VIRTUAL,
474                           NV_MEMORY_UNCACHED,
475                           MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE),
476             done);
477     }
478     else
479     {
480         pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
481         NV_ASSERT_TRUE_OR_GOTO(status, pFbAllocInfo != NULL, NV_ERR_NO_MEMORY, done);
482 
483         pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
484         NV_ASSERT_TRUE_OR_GOTO(status, pFbAllocPageFormat != NULL, NV_ERR_NO_MEMORY, done);
485 
486         portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO));
487         portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT));
488         pFbAllocInfo->pageFormat = pFbAllocPageFormat;
489 
490         memUtilsInitFBAllocInfo(pAllocRequest->pUserParams, pFbAllocInfo, hClient, hParent);
491 
492         // Call memmgr to get memory.
493         NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT,
494             memmgrAllocResources(pGpu, pMemoryManager, pAllocRequest, pFbAllocInfo),
495             done);
496 
497         NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT,
498             virtmemAllocResources(pGpu, pMemoryManager, pAllocRequest, pFbAllocInfo),
499             done);
500 
501         bResAllocated = NV_TRUE;
502     }
503 
504     NV_ASSERT(pAllocRequest->pMemDesc != NULL);
505 
506     // Copy final heap size/offset back to client struct
507     //
508     // What should we return ?. System or the Device physical address.
509     // Return the Device physical address for now.
510     // May change with the heap refactoring !.
511     //
512     // System and Device physical address can be got using the nv0041CtrlCmdGetSurfacePhysAttr ctrl call
513     offsetOut = memdescGetPhysAddr(pAllocRequest->pMemDesc, AT_GPU, 0);
514     sizeOut   = pAllocRequest->pMemDesc->Size;
515     pAllocData->limit = sizeOut - 1;
516 
517     // To handle < nv50
518     if (DRF_VAL(OS32, _ATTR2, _GPU_CACHEABLE, pAllocData->attr2) ==
519         NVOS32_ATTR2_GPU_CACHEABLE_DEFAULT)
520     {
521         pAllocData->attr2 = FLD_SET_DRF(OS32, _ATTR2, _GPU_CACHEABLE, _NO,
522                                         pAllocData->attr2);
523     }
524 
525     if (DRF_VAL(OS32, _ATTR2, _GPU_CACHEABLE, pAllocData->attr2) ==
526         NVOS32_ATTR2_GPU_CACHEABLE_YES)
527     {
528         gpuCacheAttrib = NV_MEMORY_CACHED;
529     }
530     else
531     {
532         gpuCacheAttrib = NV_MEMORY_UNCACHED;
533     }
534 
535     //
536     // Issue RPC if page tables are managed in the Host/GSP RM.  This depends on
537     // the type object we have and the VGPU/GSP mode.  We issue this prior to
538     // as memConstructCommon as RPC fills in pAllocData->offset.
539     //
540     if (bRpcAlloc)
541     {
542         NV_RM_RPC_ALLOC_VIRTMEM(pGpu,
543                                 hClient,
544                                 hParent,
545                                 pAllocData->hVASpace,
546                                 pAllocRequest->hMemory,
547                                 &pAllocData->offset,
548                                 pAllocRequest->pMemDesc->Size,
549                                 attr,
550                                 attr2,
551                                 pAllocData->type,
552                                 pAllocData->flags,
553                                 pAllocData->height,
554                                 pAllocData->width,
555                                 pAllocData->format,
556                                 pAllocData->comprCovg,
557                                 pAllocData->zcullCovg,
558                                 pAllocData->rangeLo,
559                                 pAllocData->rangeHi,
560                                 pAllocData->alignment,
561                                 status);
562         NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, status, done);
563 
564         // Update memory descriptor with results of the RPC
565         memdescDescribe(pAllocRequest->pMemDesc,
566                         memdescGetAddressSpace(pAllocRequest->pMemDesc),
567                         pAllocData->offset,
568                         pAllocRequest->pMemDesc->Size);
569 
570         // Assign offset back to caller
571         offsetOut = memdescGetPhysAddr(pAllocRequest->pMemDesc, AT_GPU, 0);
572     }
573 
574     //
575     // The idea is to allocate virtual address space and record it (lo, limit) in this mem
576     // object. Later call MapMemoryDma(hThisMem, hSomePhysMem) to back it.
577     //
578     NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT,
579         memConstructCommon(pMemory, pAllocRequest->classNum, pAllocData->flags,
580                            pAllocRequest->pMemDesc, pAllocData->owner, NULL, pAllocData->attr,
581                            pAllocData->attr2, 0, pAllocData->type, NVOS32_MEM_TAG_NONE, NULL),
582         done);
583     pMemory->bRpcAlloc = bRpcAlloc;
584 
585     pVirtualMemory->hVASpace = pAllocData->hVASpace;
586     pVirtualMemory->bReserveVaOnAlloc = NV_TRUE;
587 
588     if (pAllocData->hVASpace != NV01_NULL_OBJECT)
589     {
590         NV_CHECK_OK_OR_GOTO(status, LEVEL_SILENT,
591             clientGetResourceRef(pRsClient, pAllocData->hVASpace, &pVASpaceRef),
592             done);
593         if (pVASpaceRef != NULL)
594             refAddDependant(pVASpaceRef, pResourceRef);
595     }
596 
597     NV_ASSERT(pMemory->pMemDesc);
598     NV_ASSERT(memdescGetAddressSpace(pMemory->pMemDesc) == ADDR_VIRTUAL);
599     memdescSetGpuCacheAttrib(pMemory->pMemDesc, gpuCacheAttrib);
600 
601     pAllocData->size = sizeOut;
602     pAllocData->offset = offsetOut;
603 
604     stdmemDumpOutputAllocParams(pAllocData);
605 
606 done:
607     if (status != NV_OK)
608     {
609         if (pAllocRequest->pMemDesc != NULL)
610         {
611             if (pMemory->pMemDesc != NULL)
612             {
613                 memDestructCommon(pMemory);
614                 pMemory->pMemDesc = NULL;
615             }
616 
617             if (bResAllocated)
618             {
619                 memmgrFree(pGpu, pMemoryManager, NULL,
620                            hClient, hParent, pAllocData->hVASpace,
621                            pAllocData->owner,
622                            pAllocRequest->pMemDesc);
623             }
624 
625             if (bRpcAlloc)
626             {
627                 memdescDestroy(pAllocRequest->pMemDesc);
628             }
629         }
630         // vaspaceReserveMempool allocations are clean up is managed independently
631     }
632 
633     portMemFree(pFbAllocPageFormat);
634     portMemFree(pFbAllocInfo);
635 
636     if (bLockAcquired)
637     {
638         // UNLOCK: release GPUs lock
639         rmDeviceGpuLocksRelease(pGpu, GPUS_LOCK_FLAGS_NONE, NULL);
640     }
641 
642     return status;
643 }
644 
645 /*!
646  * @brief Handle destruction of VirtualMemory specific fields
647  */
648 void
virtmemDestruct_IMPL(VirtualMemory * pVirtualMemory)649 virtmemDestruct_IMPL
650 (
651     VirtualMemory *pVirtualMemory
652 )
653 {
654     Memory             *pMemory         = staticCast(pVirtualMemory, Memory);
655     OBJGPU             *pGpu            = pMemory->pGpu;
656     MemoryManager      *pMemoryManager  = GPU_GET_MEMORY_MANAGER(pGpu);
657     NvHandle            hClient;
658     NvHandle            hParent;
659     NvHandle            hVASpace;
660     MEMORY_DESCRIPTOR  *pMemDesc;
661     NvU32               heapOwner;
662     NV_STATUS           status = NV_OK;
663 
664     // Save needed state from memory object before common destruction
665     hClient = RES_GET_CLIENT_HANDLE(pVirtualMemory);
666     hParent = RES_GET_PARENT_HANDLE(pVirtualMemory);
667     hVASpace = pVirtualMemory->hVASpace;
668     pMemDesc = pMemory->pMemDesc;
669     heapOwner = pMemory->HeapOwner;
670 
671     NV_ASSERT(pMemDesc);
672 
673     memDestructCommon(pMemory);
674 
675     //
676     // NV50_MEMORY_VIRTUAL may have underlying heap allocation associated with the object
677     // to free depending on which RM/VGPU context we are in. This is tracked at object
678     // creation time.
679     //
680     // If we RPCed a NV50_MEMORY_VIRTUAL or we have a NV01_MEMORY_VIRTUAL than just destroy
681     // the memdesc and RPC the free if required.
682     //
683     if (pMemory->bRpcAlloc || pMemory->categoryClassId == NV01_MEMORY_VIRTUAL)
684     {
685         NV_ASSERT(pMemDesc->Allocated == 0);
686         memdescDestroy(pMemDesc);
687     }
688     else
689     {
690         NV_ASSERT(heapOwner != 0);
691 
692         // Get the relevant information from the client memory info and free it
693         status = memmgrFree(pGpu,
694                             pMemoryManager,
695                             NULL,
696                             hClient,
697                             hParent,
698                             hVASpace,
699                             heapOwner,
700                             pMemDesc);
701         if (status != NV_OK)
702         {
703             NV_PRINTF(LEVEL_ERROR,
704                       "VirtualMemory memmgrFree failed, client: %x, hVASpace: %x, gpu: %x\n",
705                       RES_GET_CLIENT_HANDLE(pVirtualMemory),
706                       hVASpace,
707                       pGpu->gpuInstance);
708         }
709     }
710 }
711 
712 NV_STATUS
virtmemAllocResources(OBJGPU * pGpu,MemoryManager * pMemoryManager,MEMORY_ALLOCATION_REQUEST * pAllocRequest,FB_ALLOC_INFO * pFbAllocInfo)713 virtmemAllocResources
714 (
715     OBJGPU                      *pGpu,
716     MemoryManager               *pMemoryManager,
717     MEMORY_ALLOCATION_REQUEST   *pAllocRequest,
718     FB_ALLOC_INFO               *pFbAllocInfo
719 )
720 {
721     NV_STATUS                    status          = NV_OK;
722     MEMORY_DESCRIPTOR           *pMemDesc        = NULL;
723     RsClient                    *pRsClient       = NULL;
724     NV_MEMORY_ALLOCATION_PARAMS *pVidHeapAlloc   = pAllocRequest->pUserParams;
725     NvHandle                     hVASpace        = pVidHeapAlloc->hVASpace;
726     NvBool                       bAllocedMemDesc = NV_FALSE;
727     NvBool                       bBar1VA         = NV_FALSE;
728     NvBool                       bFlaVA          = NV_FALSE;
729 
730     NV_ASSERT(!(pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_WPR1) && !(pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_WPR2));
731 
732     NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR, memUtilsAllocMemDesc(pGpu, pAllocRequest, pFbAllocInfo, &pMemDesc, NULL,
733                                                                   ADDR_VIRTUAL, NV_TRUE, &bAllocedMemDesc), failed);
734 
735     // Only a kernel client can request for a protected allocation
736     if (pFbAllocInfo->pageFormat->flags & NVOS32_ALLOC_FLAGS_ALLOCATE_KERNEL_PRIVILEGED)
737     {
738         CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
739         RS_PRIV_LEVEL privLevel;
740 
741         //
742         // This fn has usescases where call context is unavailable.
743         // In those cases, fall back to cached privileges.
744         //
745         if (pCallContext == NULL)
746         {
747             privLevel = rmclientGetCachedPrivilegeByHandle(pFbAllocInfo->hClient);
748         }
749         else
750         {
751             privLevel = pCallContext->secInfo.privLevel;
752         }
753 
754         if (
755             (privLevel >= RS_PRIV_LEVEL_KERNEL))
756         {
757             pFbAllocInfo->bIsKernelAlloc = NV_TRUE;
758         }
759         else
760         {
761             NV_PRINTF(LEVEL_ERROR, "NV_ERR_INSUFFICIENT_PERMISSIONS\n");
762             status = NV_ERR_INSUFFICIENT_PERMISSIONS;
763             goto failed;
764         }
765     }
766 
767     // Allocate a virtual surface
768     if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE)
769         pFbAllocInfo->offset = pVidHeapAlloc->offset - pFbAllocInfo->alignPad;
770 
771     //
772     // pFbAllocInfo->hClient=0 is sometimes passed and not always needed,
773     // do not immediately fail if this call, only if the client needs to be used.
774     //
775     status = serverGetClientUnderLock(&g_resServ, pFbAllocInfo->hClient, &pRsClient);
776 
777     //
778     // vGPU:
779     //
780     // Since vGPU does all real hardware management in the
781     // host, if we are in guest OS (where IS_VIRTUAL(pGpu) is true),
782     // do an RPC to the host to do the hardware update.
783     // In case of SR-IOV, the VAS is managed by the guest. So, no need
784     // to communicate with the host for VA allocation.
785     //
786     if (IS_VIRTUAL_WITH_SRIOV(pGpu))
787     {
788         OBJVASPACE *pVAS = NULL;
789 
790         // Only try this if GetClient succeeded, else pass through the status from its fail.
791         if (pRsClient != NULL)
792             status = vaspaceGetByHandleOrDeviceDefault(pRsClient, pFbAllocInfo->hDevice, hVASpace, &pVAS);
793         if (NV_OK != status)
794             goto failed;
795 
796         bBar1VA = !!(vaspaceGetFlags(pVAS) & VASPACE_FLAGS_BAR_BAR1);
797         bFlaVA = !!(vaspaceGetFlags(pVAS) & VASPACE_FLAGS_FLA);
798     }
799 
800     // For Virtual FLA allocations, we don't have to RPC
801     if ((!IS_VIRTUAL(pGpu) && !IS_GSP_CLIENT(pGpu)) ||
802         bBar1VA || bFlaVA ||
803         gpuIsSplitVasManagementServerClientRmEnabled(pGpu))
804     {
805         OBJVASPACE     *pVAS  = NULL;
806         OBJGVASPACE    *pGVAS = NULL;
807         NvU64           align = pFbAllocInfo->align + 1;
808         VAS_ALLOC_FLAGS flags = {0};
809         NvU64           pageSizeLockMask = 0;
810         pFbAllocInfo->internalflags = pAllocRequest->internalflags;
811 
812         // Only try this if GetClient succeeded, else pass through the status from its fail.
813         if (pRsClient != NULL)
814             status = vaspaceGetByHandleOrDeviceDefault(pRsClient, pFbAllocInfo->hDevice, hVASpace, &pVAS);
815         if (NV_OK != status)
816             goto failed;
817 
818         //
819         // Feature requested for RM unlinked SLI:
820         // Clients can pass an allocation flag to the device or VA space constructor
821         // so that mappings and allocations will fail without an explicit address.
822         //
823         pGVAS = dynamicCast(pVAS, OBJGVASPACE);
824         if (pGVAS != NULL)
825         {
826             if ((pGVAS->flags & VASPACE_FLAGS_REQUIRE_FIXED_OFFSET) &&
827                 !(pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE))
828             {
829                 status = NV_ERR_INVALID_ARGUMENT;
830                 NV_PRINTF(LEVEL_ERROR, "The VA space requires all allocations to specify a fixed address\n");
831                 goto failed;
832             }
833         }
834 
835         status = vaspaceFillAllocParams(pVAS, pFbAllocInfo,
836                                         &pFbAllocInfo->size, &align,
837                                         &pVidHeapAlloc->rangeLo, &pVidHeapAlloc->rangeHi,
838                                         &pageSizeLockMask, &flags);
839         if (NV_OK != status)
840         {
841             NV_PRINTF(LEVEL_ERROR, "FillAllocParams failed.\n");
842             DBG_BREAKPOINT();
843         }
844         else
845         {
846             status = vaspaceAlloc(pVAS, pFbAllocInfo->size, align,
847                                   pVidHeapAlloc->rangeLo, pVidHeapAlloc->rangeHi,
848                                   pageSizeLockMask, flags, &pFbAllocInfo->offset);
849             if (NV_OK != status)
850             {
851                 NV_PRINTF(LEVEL_ERROR,
852                           "VA Space alloc failed! Status Code: 0x%x Size: 0x%llx RangeLo: 0x%llx,"
853                           " RangeHi: 0x%llx, pageSzLockMask: 0x%llx\n",
854                           status, pFbAllocInfo->size,
855                           pVidHeapAlloc->rangeLo, pVidHeapAlloc->rangeHi,
856                           pageSizeLockMask);
857                 status = NV_ERR_INSUFFICIENT_RESOURCES;
858                 goto failed;
859             }
860 
861             memdescDescribe(pMemDesc, ADDR_VIRTUAL,
862                             pFbAllocInfo->offset,
863                             pFbAllocInfo->size);
864 
865             // Return alignment info.
866             pFbAllocInfo->align        = align - 1;
867             pVidHeapAlloc->alignment   = align;
868         }
869     }
870     else
871     {
872         // Possibly dead code: IS_VIRTUAL and bSplitVAs are only enabled on legacy vGPU.
873         memdescDescribe(pMemDesc, ADDR_VIRTUAL, memdescGetPte(pMemDesc, AT_GPU, 0),
874             pMemDesc->Size);
875     }
876 
877     //
878     // Report default (any) page size for virtual allocations with no page size restriction.
879     // Actual page size will be determined at map time.
880     //
881     if (FLD_TEST_DRF(OS32, _ATTR, _PAGE_SIZE, _DEFAULT, pFbAllocInfo->pageFormat->attr))
882     {
883         pFbAllocInfo->retAttr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _DEFAULT, pFbAllocInfo->retAttr);
884     }
885 
886     // get possibly updated surface attributes
887     pVidHeapAlloc->attr = pFbAllocInfo->retAttr;
888     pVidHeapAlloc->attr2 = pFbAllocInfo->retAttr2;
889 
890     // update contiguity attribute to reflect memdesc
891     if (memdescGetContiguity(pAllocRequest->pMemDesc, AT_GPU))
892     {
893         pVidHeapAlloc->attr = FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY,
894                                           _CONTIGUOUS,
895                                           pVidHeapAlloc->attr);
896     }
897     else
898     {
899         pVidHeapAlloc->attr = FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY,
900                                           _NONCONTIGUOUS,
901                                           pVidHeapAlloc->attr);
902     }
903 
904     pVidHeapAlloc->offset = pFbAllocInfo->offset;
905 
906     if (pAllocRequest->pHwResource != NULL)
907     {
908         pAllocRequest->pHwResource->attr       = pFbAllocInfo->retAttr;
909         pAllocRequest->pHwResource->attr2      = pFbAllocInfo->retAttr2;
910         pAllocRequest->pHwResource->hwResId    = pFbAllocInfo->hwResId;
911         pAllocRequest->pHwResource->comprCovg  = pFbAllocInfo->comprCovg;
912         pAllocRequest->pHwResource->ctagOffset = pFbAllocInfo->ctagOffset;
913         pAllocRequest->pHwResource->hwResId    = pFbAllocInfo->hwResId;
914     }
915 
916     return NV_OK;
917 
918 failed:
919     memmgrFreeHwResources(pGpu, pMemoryManager, pFbAllocInfo);
920 
921     if (bAllocedMemDesc)
922     {
923         memdescDestroy(pAllocRequest->pMemDesc);
924         pAllocRequest->pMemDesc = NULL;
925     }
926 
927     return status;
928 }
929 
930 /*
931  * @brief Interface to vaspaceReserveMempool to reserve PMA memory for page tables
932  */
virtmemReserveMempool_IMPL(VirtualMemory * pVirtualMemory,OBJGPU * pGpu,Device * pDevice,NvU64 size,NvU64 pageSizeMask)933 NV_STATUS virtmemReserveMempool_IMPL
934 (
935     VirtualMemory *pVirtualMemory,
936     OBJGPU        *pGpu,
937     Device        *pDevice,
938     NvU64          size,
939     NvU64          pageSizeMask
940 )
941 {
942     RsClient   *pClient = RES_GET_CLIENT(pVirtualMemory);
943     OBJVASPACE *pVAS    = NULL;
944     NvU32       mempoolFlags = VASPACE_RESERVE_FLAGS_NONE;
945 
946     //
947     // Reject mappings for a legacy NV01_MEMORY_SYSTEM_DYNAMIC
948     // object silently.
949     //
950     if (pVirtualMemory->hVASpace == NV_MEMORY_VIRTUAL_SYSMEM_DYNAMIC_HVASPACE)
951     {
952         return NV_ERR_INVALID_OBJECT;
953     }
954 
955     if (pVirtualMemory->bOptimizePageTableMempoolUsage)
956     {
957         mempoolFlags = VASPACE_RESERVE_FLAGS_ALLOC_UPTO_TARGET_LEVEL_ONLY;
958     }
959 
960     NV_ASSERT_OK_OR_RETURN(
961         vaspaceGetByHandleOrDeviceDefault(pClient, RES_GET_HANDLE(pDevice),
962                                           pVirtualMemory->hVASpace, &pVAS));
963 
964     return vaspaceReserveMempool(pVAS, pGpu, pDevice,
965                                  size, pageSizeMask, mempoolFlags);
966 }
967 
968 /*!
969  * @brief Does this VirtualMemory object use the specified hVASpace?
970  */
971 NvBool
virtmemMatchesVASpace_IMPL(VirtualMemory * pVirtualMemory,NvHandle hClient,NvHandle hVASpace)972 virtmemMatchesVASpace_IMPL
973 (
974     VirtualMemory *pVirtualMemory,
975     NvHandle hClient,
976     NvHandle hVASpace
977 )
978 {
979     return (RES_GET_CLIENT_HANDLE(pVirtualMemory) == hClient) && (pVirtualMemory->hVASpace == hVASpace);
980 }
981 
982 /*!
983  * @brief Helper to look up a VirtualMemory object
984  */
985 NV_STATUS
virtmemGetByHandleAndDevice_IMPL(RsClient * pClient,NvHandle hMemory,NvHandle hDevice,VirtualMemory ** ppVirtualMemory)986 virtmemGetByHandleAndDevice_IMPL
987 (
988     RsClient          *pClient,
989     NvHandle           hMemory,
990     NvHandle           hDevice,
991     VirtualMemory    **ppVirtualMemory
992 )
993 {
994     Memory *pMemory;
995 
996     NV_CHECK_OK_OR_RETURN(LEVEL_SILENT,
997         memGetByHandleAndDevice(pClient, hMemory, hDevice, &pMemory));
998 
999     *ppVirtualMemory = dynamicCast(pMemory, VirtualMemory);
1000 
1001     return (*ppVirtualMemory != NULL) ? NV_OK : NV_ERR_INVALID_OBJECT_HANDLE;
1002 }
1003 
1004 /*!
1005  * @brief Create a CPU mapping in addition to the DMA mapping
1006  */
1007 static NV_STATUS
_virtmemAllocKernelMapping(OBJGPU * pGpu,OBJVASPACE * pVAS,CLI_DMA_MAPPING_INFO * pDmaMappingInfo,NvU64 offset,NvU64 size,Memory * pMemoryInfo)1008 _virtmemAllocKernelMapping
1009 (
1010     OBJGPU               *pGpu,
1011     OBJVASPACE           *pVAS,
1012     CLI_DMA_MAPPING_INFO *pDmaMappingInfo,
1013     NvU64                 offset,
1014     NvU64                 size,
1015     Memory               *pMemoryInfo
1016 )
1017 {
1018     NV_STATUS  status              = NV_OK;
1019     NvBool     bCoherentCpuMapping = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING);
1020     NvU32      gpuSubDevInst;
1021     RmPhysAddr bar1PhysAddr;
1022 
1023     SLI_LOOP_START(SLI_LOOP_FLAGS_NONE)
1024 
1025     gpuSubDevInst = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
1026     if (bCoherentCpuMapping)
1027     {
1028         // Use a temp pointer to prevent overwriting the previous pointer by accident
1029         NvP64              tempCpuPtr    = NvP64_NULL;
1030         MEMORY_DESCRIPTOR *pMemDesc      = memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu);
1031         KernelBus         *pKernelBus    = GPU_GET_KERNEL_BUS(pGpu);
1032 
1033         NV_PRINTF(LEVEL_INFO,
1034                   "Allocating coherent link mapping. length=%lld, memDesc->size=%lld\n",
1035                   size, pDmaMappingInfo->pMemDesc->Size);
1036 
1037         NV_ASSERT(pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED));
1038         NV_ASSERT(pDmaMappingInfo->pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS);
1039 
1040         tempCpuPtr = kbusMapCoherentCpuMapping_HAL(pGpu, pKernelBus, pMemDesc);
1041         if (tempCpuPtr == NULL)
1042         {
1043             status = NV_ERR_GENERIC;
1044         }
1045         else
1046         {
1047             status = NV_OK;
1048             tempCpuPtr =  NvP64_PLUS_OFFSET(tempCpuPtr, offset);
1049         }
1050 
1051         pDmaMappingInfo->KernelVAddr[gpuSubDevInst] = NvP64_VALUE(tempCpuPtr);
1052     }
1053     else
1054     {
1055         //
1056         // Allocate GPU virtual address space for the video memory region
1057         // for those GPUs that support it.
1058         //
1059         pDmaMappingInfo->FbApertureLen[gpuSubDevInst] = pDmaMappingInfo->pMemDesc->Size;
1060         if (RMCFG_FEATURE_PLATFORM_GSP)
1061         {
1062             status = osMapSystemMemory(pMemoryInfo->pMemDesc,
1063                                        offset,
1064                                        pDmaMappingInfo->pMemDesc->Size,
1065                                        NV_TRUE /*Kernel*/,
1066                                        NV_PROTECT_READ_WRITE,
1067                                        (NvP64 *) &pDmaMappingInfo->KernelVAddr[gpuSubDevInst],
1068                                        (NvP64 *) &pDmaMappingInfo->KernelPriv);
1069 
1070             if (status != NV_OK)
1071             {
1072                 pDmaMappingInfo->FbApertureLen[gpuSubDevInst] = 0;
1073                 pDmaMappingInfo->FbAperture[gpuSubDevInst]    = 0;
1074                 pDmaMappingInfo->KernelPriv                   = 0;
1075                 SLI_LOOP_BREAK;
1076             }
1077         }
1078         else
1079         {
1080             KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1081             Device *pDevice = NULL;
1082             CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
1083             if ((pCallContext != NULL) && (pCallContext->pClient != NULL))
1084             {
1085                 RsResourceRef *pDeviceRef = NULL;
1086 
1087                 status = refFindAncestorOfType(pCallContext->pResourceRef,
1088                                                classId(Device), &pDeviceRef);
1089                 if (status == NV_OK)
1090                 {
1091                     pDevice = dynamicCast(pDeviceRef->pResource, Device);
1092                 }
1093             }
1094 
1095             status = kbusMapFbAperture_HAL(pGpu, pKernelBus,
1096                                            pMemoryInfo->pMemDesc, offset,
1097                                            &pDmaMappingInfo->FbAperture[gpuSubDevInst],
1098                                            &pDmaMappingInfo->FbApertureLen[gpuSubDevInst],
1099                                            BUS_MAP_FB_FLAGS_MAP_UNICAST, pDevice);
1100 
1101             if (status != NV_OK)
1102             {
1103                 pDmaMappingInfo->FbApertureLen[gpuSubDevInst] = 0;
1104                 pDmaMappingInfo->FbAperture[gpuSubDevInst]    = 0;
1105                 SLI_LOOP_BREAK;
1106             }
1107 
1108             bar1PhysAddr = gpumgrGetGpuPhysFbAddr(pGpu) + pDmaMappingInfo->FbAperture[gpuSubDevInst];
1109             status = osMapPciMemoryKernelOld(pGpu, bar1PhysAddr,
1110                                              pDmaMappingInfo->pMemDesc->Size,
1111                                              NV_PROTECT_READ_WRITE,
1112                                              &pDmaMappingInfo->KernelVAddr[gpuSubDevInst],
1113                                              NV_MEMORY_WRITECOMBINED);
1114         }
1115     }
1116 
1117     if (status != NV_OK)
1118     {
1119         SLI_LOOP_BREAK;
1120     }
1121 
1122     SLI_LOOP_END
1123 
1124     if (status != NV_OK)
1125     {
1126         _virtmemFreeKernelMapping(pGpu, pDmaMappingInfo);
1127     }
1128 
1129     return status;
1130 }
1131 /*!
1132  * @brief Free CPU mapping
1133  */
1134 static void
_virtmemFreeKernelMapping(OBJGPU * pGpu,CLI_DMA_MAPPING_INFO * pDmaMappingInfo)1135 _virtmemFreeKernelMapping
1136 (
1137     OBJGPU               *pGpu,
1138     CLI_DMA_MAPPING_INFO *pDmaMappingInfo
1139 )
1140 {
1141     NvU32    gpuSubDevInst;
1142     NvBool   bCoherentCpuMapping = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING);
1143 
1144     SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
1145 
1146     gpuSubDevInst = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
1147 
1148     // Unmap a kernel CPU mapping if one exists
1149     if (pDmaMappingInfo->KernelVAddr[gpuSubDevInst] != NULL)
1150     {
1151         if (bCoherentCpuMapping)
1152         {
1153             KernelBus         *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1154             MEMORY_DESCRIPTOR *pMemDesc   = memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu);
1155             kbusUnmapCoherentCpuMapping_HAL(pGpu, pKernelBus, pMemDesc);
1156         }
1157         else
1158         {
1159             osUnmapPciMemoryKernelOld(pGpu, pDmaMappingInfo->KernelVAddr[gpuSubDevInst]);
1160         }
1161 
1162         pDmaMappingInfo->KernelVAddr[gpuSubDevInst] = NULL;
1163     }
1164 
1165     // Unmap the FB aperture mapping if one exists
1166     if ((pDmaMappingInfo->FbApertureLen[gpuSubDevInst]) && (!bCoherentCpuMapping))
1167     {
1168         if (RMCFG_FEATURE_PLATFORM_GSP)
1169         {
1170             // This is a no-op in GSP, but document it here as code in case it changes.
1171             osUnmapSystemMemory(pDmaMappingInfo->pMemDesc,
1172                                 NV_TRUE /*Kernel*/,
1173                                 0 /*ProcessId*/,
1174                                 (NvP64)pDmaMappingInfo->FbAperture[gpuSubDevInst],
1175                                 NV_PTR_TO_NvP64(pDmaMappingInfo->KernelPriv));
1176         }
1177         else
1178         {
1179             KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1180             kbusUnmapFbAperture_HAL(pGpu,
1181                                     pKernelBus,
1182                                     pDmaMappingInfo->pMemDesc,
1183                                     pDmaMappingInfo->FbAperture[gpuSubDevInst],
1184                                     pDmaMappingInfo->FbApertureLen[gpuSubDevInst],
1185                                     BUS_MAP_FB_FLAGS_MAP_UNICAST);
1186         }
1187         pDmaMappingInfo->FbAperture[gpuSubDevInst] = 0;
1188         pDmaMappingInfo->FbApertureLen[gpuSubDevInst] = 0;
1189         pDmaMappingInfo->KernelPriv = 0;
1190     }
1191 
1192     SLI_LOOP_END
1193 }
1194 
1195 /*!
1196  * @brief Map an object into a VirtualMemory object
1197  */
1198 NV_STATUS
virtmemMapTo_IMPL(VirtualMemory * pVirtualMemory,RS_RES_MAP_TO_PARAMS * pParams)1199 virtmemMapTo_IMPL
1200 (
1201     VirtualMemory *pVirtualMemory,
1202     RS_RES_MAP_TO_PARAMS *pParams
1203 )
1204 {
1205     NV_STATUS   status                = NV_ERR_NOT_SUPPORTED;
1206     Memory         *pMemory           = staticCast(pVirtualMemory, Memory);
1207     OBJGPU         *pGpu              = pParams->pGpu;
1208     OBJGPU         *pSrcGpu           = pParams->pSrcGpu;
1209     RsClient       *pClient           = RES_GET_CLIENT(pVirtualMemory);
1210     MemoryManager  *pMemoryManager    = GPU_GET_MEMORY_MANAGER(pGpu);
1211     RsResourceRef  *pMemoryRef        = pParams->pMemoryRef;
1212     NvHandle        hClient           = pClient->hClient;
1213     NvHandle        hBroadcastDevice  = pParams->hBroadcastDevice;
1214     NvHandle        hVirtualMem       = RES_GET_HANDLE(pVirtualMemory);
1215     NvHandle        hMemoryDevice     = pParams->hMemoryDevice;
1216     NvU32           gpuMask           = pParams->gpuMask;
1217     NvU64           offset            = pParams->offset;    // offset into pMemoryRef to map
1218     NvU64           length            = pParams->length;
1219     NvU32           flags             = pParams->flags;
1220     NvU32           p2p               = DRF_VAL(OS46, _FLAGS, _P2P_ENABLE, pParams->flags);
1221 
1222     VirtMemAllocator     *pDma                  = GPU_GET_DMA(pGpu);
1223     MEMORY_DESCRIPTOR    *pSrcMemDesc           = pParams->pSrcMemDesc;
1224     NvU64                *pDmaOffset            = pParams->pDmaOffset;  // return VirtualMemory offset
1225     CLI_DMA_MAPPING_INFO *pDmaMappingInfo       = NULL;
1226     OBJVASPACE           *pVas                  = NULL;
1227     Memory               *pSrcMemory            = dynamicCast(pMemoryRef->pResource, Memory);
1228 
1229     NvU32       tgtAddressSpace   = ADDR_UNKNOWN;
1230     NvBool      bDmaMappingRegistered = NV_FALSE;
1231     NvBool      bFlaMapping           = pParams->bFlaMapping;
1232     NvBool      bIsIndirectPeer       = NV_FALSE;
1233     NvBool      bEncrypted;
1234     NvBool      bIsSysmem             = NV_FALSE;
1235     NvBool      bBar1P2P              = (p2p && kbusHasPcieBar1P2PMapping_HAL(pGpu,
1236                                                                               GPU_GET_KERNEL_BUS(pGpu),
1237                                                                               pSrcGpu,
1238                                                                               GPU_GET_KERNEL_BUS(pSrcGpu)));
1239     NvBool      bKernelMappingRequired = FLD_TEST_DRF(OS46, _FLAGS, _KERNEL_MAPPING, _ENABLE, flags);
1240 
1241     //
1242     // Allow unicast on NV01_MEMORY_VIRTUAL object, but maintain the broadcast
1243     // requirement for NV50_MEMORY_VIRTUAL.
1244     //
1245     if (pParams->bSubdeviceHandleProvided && !pVirtualMemory->bAllowUnicastMapping)
1246     {
1247         NV_PRINTF(LEVEL_ERROR, "Unicast mappings into virtual memory object not supported.\n");
1248         return NV_ERR_NOT_SUPPORTED;
1249     }
1250 
1251     status = vaspaceGetByHandleOrDeviceDefault(pClient, hBroadcastDevice, pVirtualMemory->hVASpace, &pVas);
1252     if (status != NV_OK)
1253         return status;
1254 
1255     //
1256     // Use the encryption setting of the virtual allocation.
1257     // This makes sense, since the same physical mem descriptor could have
1258     // more than one mapping, each with different encryption settings.
1259     //
1260     bEncrypted = memdescGetFlag(pMemory->pMemDesc, MEMDESC_FLAGS_ENCRYPTED);
1261 
1262     // Validate the offset and limit passed in.
1263     if (offset + length > pSrcMemDesc->Size)
1264         return NV_ERR_INVALID_BASE;
1265 
1266     status = intermapCreateDmaMapping(pClient, pVirtualMemory, &pDmaMappingInfo, flags);
1267     if (status != NV_OK)
1268         return status;
1269 
1270     if (bBar1P2P)
1271     {
1272         DMA_BAR1P2P_MAPPING_PRARAMS params = {0};
1273 
1274         params.pVas = pVas;
1275         params.pPeerGpu = pSrcGpu;
1276         params.pPeerMemDesc = pSrcMemDesc;
1277         params.flags = flags;
1278         params.offset = offset;
1279         params.length = length;
1280         params.pDmaMappingInfo = pDmaMappingInfo;
1281 
1282         status = dmaAllocBar1P2PMapping_HAL(pGpu, pDma, &params);
1283         if (status != NV_OK)
1284             goto done;
1285 
1286         // Adjust local variables for the BAR1 P2P mappings
1287         pSrcMemDesc = params.pMemDescOut;
1288         flags = params.flagsOut;
1289         offset = params.offsetOut;
1290     }
1291 
1292     //
1293     // Determine target address space.  If we're mapping fbmem from
1294     // one gpu for use by another, then we need to treat that memory as
1295     // ADDR_SYSMEM.
1296     //
1297     tgtAddressSpace = memdescGetAddressSpace(memdescGetMemDescFromGpu(pSrcMemDesc, pGpu));
1298     if ((pSrcGpu != pGpu) && (tgtAddressSpace == ADDR_FBMEM))
1299     {
1300         tgtAddressSpace = ADDR_SYSMEM;
1301 
1302         if (gpumgrCheckIndirectPeer(pGpu, pSrcGpu))
1303             bIsIndirectPeer = NV_TRUE;
1304     }
1305 
1306     // Different cases for vidmem & system memory/fabric memory.
1307     bIsSysmem = (tgtAddressSpace == ADDR_SYSMEM);
1308     bIsSysmem = bIsSysmem || (tgtAddressSpace == ADDR_EGM);
1309 
1310     //
1311     // Create a MEMORY_DESCRIPTOR describing this region of the memory
1312     // alloc in question
1313     //
1314     status = memdescCreateSubMem(&pDmaMappingInfo->pMemDesc, pSrcMemDesc, pGpu, offset, length);
1315     if (status != NV_OK)
1316         goto done;
1317 
1318     SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
1319     memdescSetFlag(memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu),
1320                MEMDESC_FLAGS_ENCRYPTED,
1321                bEncrypted);
1322     SLI_LOOP_END
1323 
1324     if (FLD_TEST_DRF(OS46, _FLAGS, _PAGE_KIND, _VIRTUAL, flags))
1325     {
1326         NvU32 kind = memdescGetPteKind(pMemory->pMemDesc);
1327 
1328         NV_ASSERT(memdescGetFlag(pMemory->pMemDesc, MEMDESC_FLAGS_SET_KIND));
1329 
1330         SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY);
1331         if (tgtAddressSpace == ADDR_SYSMEM && !memmgrComprSupported(pMemoryManager, ADDR_SYSMEM))
1332         {
1333             //
1334             // If system memory does not support compression, the virtual kind is compressible,
1335             // and being mapped into system memory fallback to using the uncompressed kind.
1336             //
1337             kind = memmgrGetUncompressedKind_HAL(pGpu, pMemoryManager, kind, 0);
1338         }
1339         memdescSetPteKind(memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu), kind);
1340         SLI_LOOP_END;
1341     }
1342 
1343     if (bIsSysmem ||
1344         (tgtAddressSpace == ADDR_FABRIC_MC) ||
1345         (tgtAddressSpace == ADDR_FABRIC_V2))
1346     {
1347         // if GPUs are indirect peers, create TCE mappings
1348         if (bIsIndirectPeer)
1349         {
1350             //
1351             // TODO: Ideally memdescMapIommu should be called on FB memdesc with
1352             // pSrcGpu That would clearly convey that memory is owned by pSrcGpu and
1353             // we are trying to create IOMMU mappings for pGpu. This effort is being
1354             // tracked in bug 2043603
1355             //
1356             status = memdescMapIommu(pDmaMappingInfo->pMemDesc, pGpu->busInfo.iovaspaceId);
1357             if (status != NV_OK)
1358             {
1359                 NV_PRINTF(LEVEL_ERROR, "DMA map pages failed for requested GPU!\n");
1360                 goto done;
1361             }
1362         }
1363 
1364         // Monolithic CPU RM or SPLIT_VAS_MGMT
1365         if (!pMemory->bRpcAlloc || gpuIsSplitVasManagementServerClientRmEnabled(pGpu))
1366         {
1367             pDmaMappingInfo->DmaOffset = *pDmaOffset; // in case this is 'in'
1368 
1369             // allocate mapping in VirtualMemory object
1370             status = dmaAllocMap(pGpu, pDma, pVas, pVirtualMemory, pSrcMemory, pDmaMappingInfo);
1371             if (status != NV_OK)
1372                 goto done;
1373 
1374             status = intermapRegisterDmaMapping(pClient, pVirtualMemory, pDmaMappingInfo, pDmaMappingInfo->DmaOffset, gpuMask);
1375             if (status != NV_OK)
1376             {
1377                 dmaFreeMap(pGpu, pDma, pVas,
1378                            pVirtualMemory, pDmaMappingInfo,
1379                            DRF_DEF(OS47, _FLAGS, _DEFER_TLB_INVALIDATION, _FALSE));
1380                 goto done;
1381             }
1382 
1383             bDmaMappingRegistered = NV_TRUE;
1384 
1385             // If a kernel mapping has been requested, create one
1386             if (bKernelMappingRequired)
1387             {
1388                 status = memdescMapOld(pDmaMappingInfo->pMemDesc,
1389                                        0,
1390                                        pDmaMappingInfo->pMemDesc->Size,
1391                                        NV_TRUE, NV_PROTECT_READ_WRITE,
1392                                        &pDmaMappingInfo->KernelVAddr[gpumgrGetSubDeviceInstanceFromGpu(gpumgrGetParentGPU(pGpu))],
1393                                        &pDmaMappingInfo->KernelPriv);
1394 
1395                 if (status != NV_OK)
1396                     goto done;
1397             }
1398 
1399             *pDmaOffset = pDmaMappingInfo->DmaOffset;
1400         } // !IS_VIRTUAL(pGpu) && !IS_GSP_CLIENT(pGpu)
1401     }
1402     else if (tgtAddressSpace == ADDR_FBMEM)
1403     {
1404         pDmaMappingInfo->DmaOffset = *pDmaOffset; // in case this is 'in'
1405 
1406         // Monolithic CPU RM or SPLIT_VAS_MGMT
1407         if (!pMemory->bRpcAlloc || gpuIsSplitVasManagementServerClientRmEnabled(pGpu))
1408         {
1409             // allocate mapping in VirtualMemory object
1410             status = dmaAllocMap(pGpu, pDma, pVas, pVirtualMemory, pSrcMemory, pDmaMappingInfo);
1411             if (status != NV_OK)
1412                 goto done;
1413 
1414             *pDmaOffset = pDmaMappingInfo->DmaOffset;
1415 
1416             status = intermapRegisterDmaMapping(pClient, pVirtualMemory, pDmaMappingInfo, pDmaMappingInfo->DmaOffset, gpuMask);
1417             if (status != NV_OK)
1418             {
1419                 dmaFreeMap(pGpu, pDma, pVas,
1420                            pVirtualMemory, pDmaMappingInfo,
1421                            DRF_DEF(OS47, _FLAGS, _DEFER_TLB_INVALIDATION, _FALSE));
1422                 goto done;
1423             }
1424 
1425             bDmaMappingRegistered = NV_TRUE;
1426 
1427             if (bKernelMappingRequired)
1428             {
1429                 status = _virtmemAllocKernelMapping(pGpu, pVas, pDmaMappingInfo, offset, length, pSrcMemory);
1430                 if (status != NV_OK)
1431                     goto done;
1432             }
1433 
1434             *pDmaOffset = pDmaMappingInfo->DmaOffset;
1435         } // if (!IS_VIRTUAL(pGpu) && !IS_GSP_CLIENT(pGpu))
1436     }
1437     else
1438     {
1439         // unknown (or mixed vidmem+sysmem?) mem case
1440         status = NV_ERR_INVALID_OBJECT_HANDLE;
1441         goto done;
1442     }
1443 
1444     if (RMCFG_CLASS_NV50_P2P &&
1445         !bFlaMapping &&
1446         (bBar1P2P || DRF_VAL(OS46, _FLAGS, _P2P_ENABLE, pDmaMappingInfo->Flags) == NVOS46_FLAGS_P2P_ENABLE_NOSLI))
1447     {
1448         //
1449         // if we are on SLI and trying to map peer memory between two GPUs
1450         // on the same device, we don't rely on dynamic p2p mailbox setup.
1451         // SLI uses static p2p mailbox and hence will not have any
1452         // P2P object associated with it
1453         //
1454         if ((hBroadcastDevice == hMemoryDevice) && IsSLIEnabled(pGpu))
1455         {
1456             goto vgpu_send_rpc;
1457         }
1458 
1459         pDmaMappingInfo->bP2P = NV_TRUE;
1460     }
1461 
1462 vgpu_send_rpc:
1463 
1464     if (pMemory->bRpcAlloc)
1465     {
1466         NV_RM_RPC_MAP_MEMORY_DMA(pGpu, hClient, hBroadcastDevice, hVirtualMem, pMemoryRef->hResource,
1467                                  offset, length, flags, pDmaOffset, status);
1468         if (status != NV_OK)
1469             goto done;
1470 
1471         if ((IS_VIRTUAL(pGpu) || IS_GSP_CLIENT(pGpu)) &&
1472             !gpuIsSplitVasManagementServerClientRmEnabled(pGpu))
1473         {
1474             //
1475             // vGPU doesn't understand subdevice handles.  But clients map memory
1476             // with subdevice handles and we don't want that to fail on vGPU.
1477             // Currently, we just pass down the broadcast device handle to the host
1478             // (which should be equivalent if SLI is disabled).  This will need to
1479             // be revisited if vGPU ever supports SLI.
1480             //
1481             NV_ASSERT(!IsSLIEnabled(pGpu));
1482 
1483             pDmaMappingInfo->DmaOffset = *pDmaOffset;
1484 
1485             status = intermapRegisterDmaMapping(pClient, pVirtualMemory, pDmaMappingInfo,
1486                                                 pDmaMappingInfo->DmaOffset, gpuMask);
1487             if (status != NV_OK)
1488                 goto done;
1489 
1490             bDmaMappingRegistered = NV_TRUE;
1491 
1492             if (tgtAddressSpace == ADDR_SYSMEM)
1493             {
1494                 // If a kernel mapping has been requested, create one
1495                 if (bKernelMappingRequired)
1496                 {
1497                     status = memdescMapOld(pDmaMappingInfo->pMemDesc,
1498                                            0,
1499                                            pDmaMappingInfo->pMemDesc->Size,
1500                                            NV_TRUE, NV_PROTECT_READ_WRITE,
1501                                            &pDmaMappingInfo->KernelVAddr[gpumgrGetSubDeviceInstanceFromGpu(gpumgrGetParentGPU(pGpu))],
1502                                            &pDmaMappingInfo->KernelPriv);
1503                     if (status != NV_OK)
1504                         goto done;
1505                 }
1506             }
1507         }
1508     }
1509 
1510 done:
1511     if (status != NV_OK)
1512     {
1513         if (pDmaMappingInfo != NULL)
1514         {
1515             if ((pDmaMappingInfo->pMemDesc != NULL) && bKernelMappingRequired)
1516             {
1517                 //
1518                 // if Kernel cookie exists and mapping is in sysmem, free sysmem mapping
1519                 // for ADDR_FBMEM function determines whether mapping was created itself
1520                 //
1521                 if ((pDmaMappingInfo->KernelPriv != NULL) &&
1522                     (memdescGetAddressSpace(pDmaMappingInfo->pMemDesc) == ADDR_SYSMEM))
1523                 {
1524                         memdescUnmapOld(pDmaMappingInfo->pMemDesc, NV_TRUE, 0,
1525                                         pDmaMappingInfo->KernelVAddr[gpumgrGetSubDeviceInstanceFromGpu(gpumgrGetParentGPU(pGpu))],
1526                                         pDmaMappingInfo->KernelPriv);
1527                         pDmaMappingInfo->KernelPriv = NULL;
1528                 }
1529                 else if (memdescGetAddressSpace(pDmaMappingInfo->pMemDesc) == ADDR_FBMEM)
1530                 {
1531                     _virtmemFreeKernelMapping(pGpu, pDmaMappingInfo);
1532                 }
1533             }
1534 
1535             if (pDmaMappingInfo->pMemDesc != NULL && bIsIndirectPeer)
1536             {
1537                 memdescUnmapIommu(pDmaMappingInfo->pMemDesc, pGpu->busInfo.iovaspaceId);
1538             }
1539 
1540             dmaFreeBar1P2PMapping_HAL(pDma, pDmaMappingInfo);
1541 
1542             memdescDestroy(pDmaMappingInfo->pMemDesc);
1543             pDmaMappingInfo->pMemDesc = NULL;
1544 
1545             if (bDmaMappingRegistered)
1546             {
1547                 NV_ASSERT_OK(intermapDelDmaMapping(pClient, pVirtualMemory, *pDmaOffset, gpuMask));
1548             }
1549             else
1550             {
1551                 // Explicitly free the DMA mapping if mapping was not yet registered
1552                 intermapFreeDmaMapping(pDmaMappingInfo);
1553             }
1554         }
1555     }
1556 
1557     return status;
1558 }
1559 
1560 /*!
1561  * @brief Unmap object from VirtualMemory object
1562  */
1563 NV_STATUS
virtmemUnmapFrom_IMPL(VirtualMemory * pVirtualMemory,RS_RES_UNMAP_FROM_PARAMS * pParams)1564 virtmemUnmapFrom_IMPL
1565 (
1566     VirtualMemory *pVirtualMemory,
1567     RS_RES_UNMAP_FROM_PARAMS *pParams
1568 )
1569 {
1570     OBJGPU     *pGpu              = pParams->pGpu;
1571     Memory     *pMemory           = staticCast(pVirtualMemory, Memory);
1572     RsClient   *pClient           = RES_GET_CLIENT(pVirtualMemory);
1573     NvHandle    hClient           = pClient->hClient;
1574     NvHandle    hMemory           = pParams->hMemory;
1575     NvHandle    hVirtualMem       = RES_GET_HANDLE(pVirtualMemory);
1576     NvHandle    hBroadcastDevice  = pParams->hBroadcastDevice;
1577     NvU32       gpuMask           = pParams->gpuMask;
1578     NvU64       dmaOffset         = pParams->dmaOffset;
1579     OBJVASPACE *pVas              = NULL;
1580     NV_STATUS   status            = NV_OK;
1581     NvBool      bIsIndirectPeer   = NV_FALSE;
1582     CLI_DMA_MAPPING_INFO *pDmaMappingInfoLeft = NULL;
1583     NvBool                bDmaMappingInfoLeftRegistered = NV_FALSE;
1584     CLI_DMA_MAPPING_INFO *pDmaMappingInfoRight = NULL;
1585     NvBool                bDmaMappingInfoRightRegistered = NV_FALSE;
1586     CLI_DMA_MAPPING_INFO *pDmaMappingInfoUnmap = NULL;
1587 
1588     CLI_DMA_MAPPING_INFO *pDmaMappingInfo   = NULL;
1589 
1590     if (hMemory != NV01_NULL_OBJECT)
1591     {
1592         RsResourceRef  *pSrcMemoryRef;
1593         Memory         *pMemorySrc;
1594 
1595         if (clientGetResourceRef(pClient, hMemory, &pSrcMemoryRef) != NV_OK)
1596             return NV_ERR_OBJECT_NOT_FOUND;
1597 
1598         status = rmresCheckMemInterUnmap(dynamicCast(pSrcMemoryRef->pResource, RmResource), pParams->bSubdeviceHandleProvided);
1599 
1600         // Exit if failed or invalid class, otherwise continue on to next part
1601         if (status != NV_OK)
1602             return status;
1603 
1604         pMemorySrc = dynamicCast(pSrcMemoryRef->pResource, Memory);
1605         if (pMemorySrc != NULL)
1606         {
1607             if (gpumgrCheckIndirectPeer(pMemorySrc->pGpu, pGpu))
1608                 bIsIndirectPeer = NV_TRUE;
1609         }
1610     }
1611 
1612     if (pParams->bSubdeviceHandleProvided && !pVirtualMemory->bAllowUnicastMapping)
1613     {
1614         NV_PRINTF(LEVEL_ERROR, "Unicast DMA mappings into virtual memory object not supported.\n");
1615         return NV_ERR_NOT_SUPPORTED;
1616     }
1617 
1618     status = vaspaceGetByHandleOrDeviceDefault(pClient, hBroadcastDevice, pVirtualMemory->hVASpace, &pVas);
1619     if (status != NV_OK)
1620         return status;
1621 
1622     // Get DMA mapping info.
1623     pDmaMappingInfo = intermapGetDmaMapping(pVirtualMemory, dmaOffset, gpuMask);
1624     NV_ASSERT_OR_RETURN(pDmaMappingInfo != NULL, NV_ERR_INVALID_OBJECT_HANDLE);
1625     NvBool bPartialUnmap = dmaOffset != pDmaMappingInfo->DmaOffset || pParams->size != pDmaMappingInfo->pMemDesc->Size;
1626     NV_ASSERT_OR_RETURN(!bPartialUnmap || (gpuMask & (gpuMask - 1)) == 0, NV_ERR_INVALID_ARGUMENT);
1627     NV_ASSERT_OR_RETURN(!bPartialUnmap || !bIsIndirectPeer, NV_ERR_INVALID_ARGUMENT);
1628 
1629     if (FLD_TEST_DRF(OS46, _FLAGS, _KERNEL_MAPPING, _ENABLE, pDmaMappingInfo->Flags))
1630     {
1631         NV_ASSERT_OR_RETURN(!bPartialUnmap, NV_ERR_INVALID_ARGUMENT);
1632 
1633         //
1634         // if Kernel cookie exists and mapping is in sysmem, free sysmem mapping
1635         // for ADDR_FBMEM function determines whether mapping was created itself
1636         //
1637         if ((pDmaMappingInfo->KernelPriv != NULL) &&
1638             (memdescGetAddressSpace(pDmaMappingInfo->pMemDesc) == ADDR_SYSMEM))
1639         {
1640             memdescUnmapOld(pDmaMappingInfo->pMemDesc, NV_TRUE, 0,
1641                             pDmaMappingInfo->KernelVAddr[gpumgrGetSubDeviceInstanceFromGpu(gpumgrGetParentGPU(pGpu))],
1642                             pDmaMappingInfo->KernelPriv);
1643             pDmaMappingInfo->KernelPriv = NULL;
1644         }
1645         else if (memdescGetAddressSpace(memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu)) == ADDR_FBMEM)
1646         {
1647             _virtmemFreeKernelMapping(pGpu, pDmaMappingInfo);
1648         }
1649     }
1650 
1651     // if this was peer mapped context dma, remove it from P2P object
1652     if (RMCFG_CLASS_NV50_P2P && pDmaMappingInfo->bP2P)
1653     {
1654         NV_ASSERT_OR_RETURN(!bPartialUnmap, NV_ERR_INVALID_ARGUMENT);
1655         dmaFreeBar1P2PMapping_HAL(GPU_GET_DMA(pGpu), pDmaMappingInfo);
1656     }
1657 
1658     if (dmaOffset > pDmaMappingInfo->DmaOffset)
1659     {
1660         NV_ASSERT_OK_OR_GOTO(status,
1661             intermapCreateDmaMapping(pClient, pVirtualMemory, &pDmaMappingInfoLeft, pDmaMappingInfo->Flags),
1662             failed);
1663 
1664         pDmaMappingInfoLeft->DmaOffset          = pDmaMappingInfo->DmaOffset;
1665         pDmaMappingInfoLeft->bP2P               = pDmaMappingInfo->bP2P;
1666         pDmaMappingInfoLeft->addressTranslation = pDmaMappingInfo->addressTranslation;
1667         pDmaMappingInfoLeft->mapPageSize        = pDmaMappingInfo->mapPageSize;
1668 
1669         NV_ASSERT_OK_OR_GOTO(status,
1670             memdescCreateSubMem(&pDmaMappingInfoLeft->pMemDesc, pDmaMappingInfo->pMemDesc, pGpu,
1671                                 pDmaMappingInfoLeft->DmaOffset - pDmaMappingInfo->DmaOffset,
1672                                 dmaOffset - pDmaMappingInfoLeft->DmaOffset),
1673             failed);
1674     }
1675 
1676     if (dmaOffset + pParams->size < pDmaMappingInfo->DmaOffset + pDmaMappingInfo->pMemDesc->Size)
1677     {
1678         NV_ASSERT_OK_OR_GOTO(status,
1679             intermapCreateDmaMapping(pClient, pVirtualMemory, &pDmaMappingInfoRight, pDmaMappingInfo->Flags),
1680             failed);
1681 
1682         pDmaMappingInfoRight->DmaOffset          = dmaOffset + pParams->size;
1683         pDmaMappingInfoRight->bP2P               = pDmaMappingInfo->bP2P;
1684         pDmaMappingInfoRight->addressTranslation = pDmaMappingInfo->addressTranslation;
1685         pDmaMappingInfoRight->mapPageSize        = pDmaMappingInfo->mapPageSize;
1686 
1687         NV_ASSERT_OK_OR_GOTO(status,
1688             memdescCreateSubMem(&pDmaMappingInfoRight->pMemDesc, pDmaMappingInfo->pMemDesc, pGpu,
1689                 pDmaMappingInfoRight->DmaOffset - pDmaMappingInfo->DmaOffset,
1690                 pDmaMappingInfo->DmaOffset + pDmaMappingInfo->pMemDesc->Size - pDmaMappingInfoRight->DmaOffset),
1691             failed);
1692     }
1693 
1694     pDmaMappingInfoUnmap = pDmaMappingInfo;
1695     if (pDmaMappingInfoLeft != NULL || pDmaMappingInfoRight != NULL)
1696     {
1697         NV_ASSERT_OK_OR_GOTO(status,
1698             intermapCreateDmaMapping(pClient, pVirtualMemory, &pDmaMappingInfoUnmap, pDmaMappingInfo->Flags),
1699             failed);
1700 
1701         pDmaMappingInfoUnmap->DmaOffset          = dmaOffset;
1702         pDmaMappingInfoUnmap->bP2P               = pDmaMappingInfo->bP2P;
1703         pDmaMappingInfoUnmap->addressTranslation = pDmaMappingInfo->addressTranslation;
1704         pDmaMappingInfoUnmap->mapPageSize        = pDmaMappingInfo->mapPageSize;
1705         pDmaMappingInfoUnmap->gpuMask            = pDmaMappingInfo->gpuMask;
1706 
1707         NV_ASSERT_OK_OR_GOTO(status,
1708             memdescCreateSubMem(&pDmaMappingInfoUnmap->pMemDesc, pDmaMappingInfo->pMemDesc, pGpu,
1709                                 pDmaMappingInfoUnmap->DmaOffset - pDmaMappingInfo->DmaOffset,
1710                                 pParams->size),
1711             failed);
1712     }
1713 
1714     if (!pMemory->bRpcAlloc || gpuIsSplitVasManagementServerClientRmEnabled(pGpu))
1715     {
1716         // free mapping in context dma
1717         dmaFreeMap(pGpu, GPU_GET_DMA(pGpu), pVas, pVirtualMemory, pDmaMappingInfoUnmap, pParams->flags);
1718 
1719         if ((memdescGetAddressSpace(memdescGetMemDescFromGpu(pDmaMappingInfo->pMemDesc, pGpu)) == ADDR_FBMEM) &&
1720              bIsIndirectPeer)
1721         {
1722             memdescUnmapIommu(pDmaMappingInfo->pMemDesc, pGpu->busInfo.iovaspaceId);
1723         }
1724     }
1725 
1726     // free memory descriptor
1727     memdescFree(pDmaMappingInfo->pMemDesc);
1728     memdescDestroy(pDmaMappingInfo->pMemDesc);
1729     pDmaMappingInfo->pMemDesc = NULL;
1730 
1731     // delete client dma mapping
1732     intermapDelDmaMapping(pClient, pVirtualMemory, pDmaMappingInfo->DmaOffset, gpuMask);
1733 
1734     if (pDmaMappingInfoLeft != NULL)
1735     {
1736         NV_ASSERT_OK_OR_GOTO(status,
1737             intermapRegisterDmaMapping(pClient, pVirtualMemory, pDmaMappingInfoLeft,
1738                                        pDmaMappingInfoLeft->DmaOffset, gpuMask),
1739             failed);
1740         bDmaMappingInfoLeftRegistered = NV_TRUE;
1741     }
1742 
1743     if (pDmaMappingInfoRight != NULL)
1744     {
1745         NV_ASSERT_OK_OR_GOTO(status,
1746             intermapRegisterDmaMapping(pClient, pVirtualMemory, pDmaMappingInfoRight,
1747                                        pDmaMappingInfoRight->DmaOffset, gpuMask),
1748             failed);
1749         bDmaMappingInfoRightRegistered = NV_TRUE;
1750     }
1751 
1752 failed:
1753     if (pDmaMappingInfoUnmap != NULL && pDmaMappingInfoUnmap != pDmaMappingInfo)
1754     {
1755         memdescFree(pDmaMappingInfoUnmap->pMemDesc);
1756         memdescDestroy(pDmaMappingInfoUnmap->pMemDesc);
1757         intermapFreeDmaMapping(pDmaMappingInfoUnmap);
1758     }
1759 
1760     if (status != NV_OK)
1761     {
1762         if (pDmaMappingInfoLeft != NULL)
1763         {
1764             if (bDmaMappingInfoLeftRegistered)
1765                 intermapDelDmaMapping(pClient, pVirtualMemory, pDmaMappingInfoLeft->DmaOffset, gpuMask);
1766             else
1767                 intermapFreeDmaMapping(pDmaMappingInfoLeft);
1768         }
1769 
1770         if (pDmaMappingInfoRight != NULL)
1771         {
1772             if (bDmaMappingInfoRightRegistered)
1773                 intermapDelDmaMapping(pClient, pVirtualMemory, pDmaMappingInfoRight->DmaOffset, gpuMask);
1774             else
1775                 intermapFreeDmaMapping(pDmaMappingInfoRight);
1776         }
1777 
1778     }
1779     //
1780     // vGPU:
1781     //
1782     // Since vGPU does all real hardware management in the
1783     // host, if we are in guest OS (where IS_VIRTUAL(pGpu) is true),
1784     // do an RPC to the host to do the hardware update.
1785     //
1786     // vGPU doesn't understand subdevice handles.  But clients map memory
1787     // with subdevice handles and we don't want that to fail on vGPU.
1788     // Currently, we just pass down the broadcast device handle to the host
1789     // (which should be equivalent if SLI is disabled).  This will need to
1790     // be revisited if vGPU ever supports SLI.
1791     //
1792     NV_ASSERT((!IS_VIRTUAL(pGpu) && !IS_GSP_CLIENT(pGpu)) || !IsSLIEnabled(pGpu));
1793 
1794     if (pMemory->bRpcAlloc &&
1795         (NV01_NULL_OBJECT != hMemory) &&
1796         (resGetRefCount(staticCast(pVirtualMemory, RsResource)) || (hVirtualMem == hMemory)))
1797     {
1798         //
1799         // resGetRefCount(pMemCtx->pResource) is zero when we are here from call of
1800         // RmFree -> clientFreeResourceTree_IMPL -> clientFreeResource_IMPL -> __nvoc_objDelete
1801         //
1802         // memDestruct_IMPL-> CliDelDeviceMemory(i.e. hVirtualMem == hMemory) -> RmUnmapMemoryDma are valid calls since we
1803         // call RPC_FREE later in memDestruct_IMPL.
1804         //
1805         // ifbDestruct_IMPL-> RmUnmapMemoryDma should not RPC_UNMAP_MEMORY_DMA since RPC_FREE is invoked in call stack earlier.
1806         //
1807         NV_RM_RPC_UNMAP_MEMORY_DMA(pGpu, hClient, hBroadcastDevice, hVirtualMem, hMemory, 0, dmaOffset, status);
1808     }
1809 
1810     return status;
1811 }
1812 
1813 /*!
1814  * @brief return address and size of a VirtualMemory object
1815  */
virtmemGetAddressAndSize_IMPL(VirtualMemory * pVirtualMemory,NvU64 * pVAddr,NvU64 * pSize)1816 void virtmemGetAddressAndSize_IMPL
1817 (
1818     VirtualMemory *pVirtualMemory,
1819     NvU64 *pVAddr,
1820     NvU64 *pSize
1821 )
1822 {
1823     MEMORY_DESCRIPTOR *pMemDesc = staticCast(pVirtualMemory, Memory)->pMemDesc;
1824 
1825     *pVAddr = memdescGetPhysAddr(pMemDesc, AT_GPU_VA, 0);
1826     *pSize  = memdescGetSize(pMemDesc);
1827 }
1828