1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "mem_mgr/video_mem.h"
25 #include "gpu/mem_mgr/mem_desc.h"
26 #include "gpu/mem_mgr/heap.h"
27 #include "gpu/mem_mgr/mem_mgr.h"
28 #include "gpu/mem_mgr/mem_utils.h"
29 #include "gpu/mem_sys/kern_mem_sys.h"
30 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h"
31 #include "vgpu/rpc.h"
32 #include "core/locks.h"
33 #include "kernel/gpu/rc/kernel_rc.h"
34 #include "diagnostics/gpu_acct.h"
35 #include "Nvcm.h"
36 #include "gpu/bus/third_party_p2p.h"
37 #include "gpu/bus/kern_bus.h"
38 
39 #include "class/cl0040.h" // NV01_MEMORY_LOCAL_USER
40 
41 /*!
42  * _vidmemQueryAlignment
43  *
44  * @brief
45  *     Returns the size and alignment for this allocation.
46  *
47  * @param[in]  pRVHCP      Pointer to RmVidHeapControlParams data
48  * @param[in]  pAllocData  Pointer to VIDHEAP_ALLOC_DATA
49  * @param[out] pSize       The size aligned to the HW/requested alignment
50  * @param[out] pAlign      The alignment required for this allocation.
51 
52  * @returns
53  *      NV_OK Operation is successful.
54  */
55 static NV_STATUS
56 _vidmemQueryAlignment
57 (
58     MEMORY_ALLOCATION_REQUEST *pAllocRequest,
59     NvU64                     *pSize,
60     NvU64                     *pAlign
61 )
62 {
63     NV_MEMORY_ALLOCATION_PARAMS *pAllocData     = pAllocRequest->pUserParams;
64     OBJGPU                      *pGpu           = pAllocRequest->pGpu;
65     MemoryManager               *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
66     NV_STATUS                    rmStatus       = NV_OK;
67     NvU64                        size           = pAllocData->size;
68     NvU64                        pageSize       = 0;
69     NvU64                        align          = 0;
70     NvU32                        retAttr        = pAllocData->attr;
71     NvU32                        retAttr2       = pAllocData->attr2;
72 
73     NV_ASSERT_OR_RETURN((NULL != pSize) && (NULL != pAlign),
74                       NV_ERR_INVALID_ARGUMENT);
75 
76     if ((pAllocData->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_HINT) ||
77         (pAllocData->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE))
78         align = pAllocData->alignment;
79     else
80         align = RM_PAGE_SIZE;
81 
82     // Fetch RM page size
83     pageSize = memmgrDeterminePageSize(pMemoryManager, pAllocRequest->hClient, size, pAllocData->format,
84                                        pAllocData->flags, &retAttr, &retAttr2);
85 
86     if (pageSize == 0)
87     {
88         rmStatus = NV_ERR_INVALID_STATE;
89         NV_ASSERT_OK_FAILED("memmgrDeterminePageSize", rmStatus);
90         return rmStatus;
91     }
92 
93     // Fetch memory alignment
94     NV_ASSERT_OK_OR_RETURN(memmgrAllocDetermineAlignment_HAL(pGpu, pMemoryManager,
95                     &size, &align, 0, pAllocData->flags, retAttr, retAttr2, 0));
96 
97     *pSize = size;
98     *pAlign = align + 1;
99 
100     return rmStatus;
101 }
102 
103 /*!
104  * _vidmemPmaAllocate
105  *
106  * @brief
107  *     Allocates memory on vidmem through PMA.
108  *
109  * @param[in] pHeap         Pointer to Heap object
110  * @param[in] pAllocRequest Pointer to the MEMORY_ALLOCATION_REQUEST.
111  *
112  * @returns
113  *      NV_OK    Operation is successful
114  *      NV_ERR_* Error code in case of errors.
115  */
116 static NV_STATUS
117 _vidmemPmaAllocate
118 (
119     Heap                      *pHeap,
120     MEMORY_ALLOCATION_REQUEST *pAllocRequest
121 )
122 {
123     NV_MEMORY_ALLOCATION_PARAMS *pAllocData     = pAllocRequest->pUserParams;
124     OBJGPU                      *pGpu           = pAllocRequest->pGpu;
125     MemoryManager               *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
126     PMA                         *pPma           = &pHeap->pmaObject;
127     NvU64                        size           = 0;
128     NvU32                        pageCount;
129     NvU32                        pmaInfoSize;
130     NvU64                        pageSize;
131     NV_STATUS                    status;
132     NvU64                        sizeAlign    = 0;
133     PMA_ALLOCATION_OPTIONS       allocOptions = {0};
134     NvBool                       bContig;
135     NvU32                        subdevInst   = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
136     NvBool                       bCompressed  = !FLD_TEST_DRF(OS32, _ATTR, _COMPR,
137                                                   _NONE, pAllocData->attr);
138     KernelBus                   *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
139     NvU32                        gfid;
140     NvU32                        pmaConfig    = PMA_QUERY_NUMA_ENABLED;
141 
142     status = pmaQueryConfigs(pPma, &pmaConfig);
143     NV_ASSERT(status == NV_OK);
144 
145     //
146     // In NUMA platforms, contig memory is allocated using page order from
147     // kernel and that could lead to memory wastage when the size is not
148     // naturally aligned to page order. Prefer non-contig when clients
149     // are okay with NON_CONTIG.
150     //
151     if ((status == NV_OK) && (pmaConfig & PMA_QUERY_NUMA_ENABLED))
152     {
153         bContig =
154             !FLD_TEST_DRF(OS32, _ATTR, _PHYSICALITY,
155                                 _ALLOW_NONCONTIGUOUS, pAllocData->attr) &&
156             !FLD_TEST_DRF(OS32, _ATTR, _PHYSICALITY,
157                           _NONCONTIGUOUS, pAllocData->attr);
158     }
159     else
160     {
161         bContig = !FLD_TEST_DRF(OS32, _ATTR,
162                                 _PHYSICALITY, _NONCONTIGUOUS,
163                                 pAllocData->attr);
164     }
165 
166     // LOCK: acquire device lock
167     status = rmDeviceGpuLocksAcquire(pGpu, GPUS_LOCK_FLAGS_NONE,
168                                      RM_LOCK_MODULES_MEM_PMA);
169     NV_ASSERT_OR_RETURN(status == NV_OK, status);
170 
171     if (bCompressed &&
172         (vgpuGetCallingContextGfid(pGpu, &gfid) == NV_OK) &&
173         pKernelBus->bar1[gfid].bStaticBar1Enabled)
174     {
175         // Override the attr to use 2MB page size
176         pAllocData->attr = FLD_SET_DRF(OS32, _ATTR, _PAGE_SIZE, _HUGE, pAllocData->attr);
177 
178         NV_PRINTF(LEVEL_INFO,
179                   "Overrode the page size to 2MB on this compressed vidmem for the static bar1\n");
180     }
181 
182     NV_PRINTF(LEVEL_INFO, "PMA input\n");
183     NV_PRINTF(LEVEL_INFO, "          Owner: 0x%x\n", pAllocData->owner);
184     NV_PRINTF(LEVEL_INFO, "        hMemory: 0x%x\n", pAllocRequest->hMemory);
185     NV_PRINTF(LEVEL_INFO, "           Type: 0x%x\n", pAllocData->type);
186     NV_PRINTF(LEVEL_INFO, "          Flags: 0x%x\n", pAllocData->flags);
187     NV_PRINTF(LEVEL_INFO, "          Begin: 0x%08llx\n", pAllocData->rangeLo);
188     NV_PRINTF(LEVEL_INFO, "            End: 0x%08llx\n", pAllocData->rangeHi);
189     NV_PRINTF(LEVEL_INFO, "         Height: 0x%x\n", pAllocData->height);
190     NV_PRINTF(LEVEL_INFO, "          Width: 0x%x\n", pAllocData->width);
191     NV_PRINTF(LEVEL_INFO, "          Pitch: 0x%x\n", pAllocData->pitch);
192     NV_PRINTF(LEVEL_INFO, "           Size: 0x%08llx\n", pAllocData->size);
193     NV_PRINTF(LEVEL_INFO, "      Alignment: 0x%08llx\n",
194               pAllocData->alignment);
195     NV_PRINTF(LEVEL_INFO, "         Offset: 0x%08llx\n", pAllocData->offset);
196     NV_PRINTF(LEVEL_INFO, "           Attr: 0x%x\n", pAllocData->attr);
197     NV_PRINTF(LEVEL_INFO, "          Attr2: 0x%x\n", pAllocData->attr2);
198     NV_PRINTF(LEVEL_INFO, "         Format: 0x%x\n", pAllocData->format);
199     NV_PRINTF(LEVEL_INFO, "      ComprCovg: 0x%x\n", pAllocData->comprCovg);
200     NV_PRINTF(LEVEL_INFO, "      ZCullCovg: 0x%x\n", pAllocData->zcullCovg);
201     NV_PRINTF(LEVEL_INFO, "     CtagOffset: 0x%x\n", pAllocData->ctagOffset);
202     NV_PRINTF(LEVEL_INFO, "       hVASpace: 0x%x\n", pAllocData->hVASpace);
203 
204     // Get the page size returned by RM.
205     pageSize = stdmemQueryPageSize(pMemoryManager, pAllocRequest->hClient, pAllocData);
206     NV_ASSERT_OR_RETURN(pageSize != 0, NV_ERR_INVALID_STATE);
207 
208     if (pageSize == RM_PAGE_SIZE)
209     {
210         //
211         // TODO Remove this after the suballocator is in place
212         // Minimum granularity of PMA is 64K.
213         //
214         pageSize = RM_PAGE_SIZE_64K;
215     }
216 
217     // Get the alignment returned by RM without actually allocating.
218     status = _vidmemQueryAlignment(pAllocRequest, &size, &sizeAlign);
219 
220     //
221     // Pass the turn blacklist off flag to PMA Allocation API
222     // No need for FB check since PMA only manages FB
223     // Bug:2451834, gpuCheckPageRetirementSupport should not be called outside
224     // RM lock.
225     //
226     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_ALLOW_PAGE_RETIREMENT) &&
227            gpuCheckPageRetirementSupport_HAL(pGpu) &&
228            FLD_TEST_DRF(OS32, _ATTR2, _BLACKLIST, _OFF, pAllocData->attr2))
229     {
230         allocOptions.flags |= PMA_ALLOCATE_TURN_BLACKLIST_OFF;
231     }
232 
233     // UNLOCK: release device lock
234     rmDeviceGpuLocksRelease(pGpu, GPUS_LOCK_FLAGS_NONE, NULL);
235 
236     NV_ASSERT_OR_RETURN(NV_OK == status, status);
237 
238     // RM allocations are always pinned.
239     allocOptions.flags |= PMA_ALLOCATE_PINNED;
240 
241     if (pAllocData->flags & NVOS32_ALLOC_FLAGS_PERSISTENT_VIDMEM)
242     {
243         allocOptions.flags |= PMA_ALLOCATE_PERSISTENT;
244     }
245 
246     // Check for VPR region.
247     if (pAllocData->flags & NVOS32_ALLOC_FLAGS_PROTECTED)
248     {
249         allocOptions.flags |= PMA_ALLOCATE_PROTECTED_REGION;
250     }
251 
252     // Check memory alloc direction.
253     if (pAllocData->flags & NVOS32_ALLOC_FLAGS_FORCE_REVERSE_ALLOC)
254     {
255         allocOptions.flags |= PMA_ALLOCATE_REVERSE_ALLOC;
256     }
257 
258     // Fixed address allocations.
259     if (pAllocData->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE)
260     {
261         NvU64 offsetAlign = NV_MAX(sizeAlign, pageSize);
262 
263         allocOptions.flags |= PMA_ALLOCATE_SPECIFY_ADDRESS_RANGE;
264         allocOptions.physBegin = NV_ALIGN_DOWN(pAllocData->offset, offsetAlign);
265         allocOptions.physEnd   = NV_ALIGN_UP(allocOptions.physBegin + size, offsetAlign) - 1;
266         size = allocOptions.physEnd - allocOptions.physBegin + 1;
267     }
268 
269     // Range based allocations.
270     if (pAllocData->flags & NVOS32_ALLOC_FLAGS_USE_BEGIN_END)
271     {
272         allocOptions.flags |= PMA_ALLOCATE_SPECIFY_ADDRESS_RANGE;
273         allocOptions.physBegin = NV_ALIGN_UP(pAllocData->rangeLo, ((NvU64)pageSize));
274         allocOptions.physEnd   = NV_ALIGN_DOWN(pAllocData->rangeHi, ((NvU64)pageSize));
275         allocOptions.physEnd = (allocOptions.physEnd > 0) ?
276                                           allocOptions.physEnd - 1 : 0;
277         NV_ASSERT_OR_RETURN(allocOptions.physBegin <= allocOptions.physEnd,
278                          NV_ERR_INVALID_ARGUMENT);
279     }
280 
281     // Skip scrubber, used only by scrubber construction path
282     if (pAllocData->internalflags & NVOS32_ALLOC_INTERNAL_FLAGS_SKIP_SCRUB)
283     {
284         allocOptions.flags |= PMA_ALLOCATE_NO_ZERO;
285     }
286 
287     // Pass along client requested alignment
288     allocOptions.flags |= PMA_ALLOCATE_FORCE_ALIGNMENT;
289     allocOptions.alignment = NV_MAX(sizeAlign, pageSize);
290 
291 
292     // Get the number of pages to be allocated by PMA
293     pageCount = (NvU32) NV_DIV_AND_CEIL(size, pageSize);
294 
295 retry_alloc:
296     // Evaluate the size of the PMA_ALLOC_INFO struct.
297     if (bContig)
298     {
299         allocOptions.flags |= PMA_ALLOCATE_CONTIGUOUS;
300         pmaInfoSize = sizeof(PMA_ALLOC_INFO);
301     }
302     else
303     {
304         pmaInfoSize = sizeof(PMA_ALLOC_INFO) + ((pageCount - 1) * sizeof(NvU64));
305     }
306 
307     // Alloc the tracking structure and store the values in it.
308     pAllocRequest->pPmaAllocInfo[subdevInst] = portMemAllocNonPaged(pmaInfoSize);
309     NV_ASSERT_OR_RETURN(NULL != pAllocRequest->pPmaAllocInfo[subdevInst], NV_ERR_NO_MEMORY);
310     portMemSet(pAllocRequest->pPmaAllocInfo[subdevInst], 0, pmaInfoSize);
311 
312     pAllocRequest->pPmaAllocInfo[subdevInst]->pageSize  = pageSize;
313     pAllocRequest->pPmaAllocInfo[subdevInst]->pageCount = pageCount;
314     pAllocRequest->pPmaAllocInfo[subdevInst]->allocSize = ((NvU64) pageCount) * pageSize;
315     pAllocRequest->pPmaAllocInfo[subdevInst]->bContig   = bContig;
316     pAllocRequest->pPmaAllocInfo[subdevInst]->refCount  = 1;
317     pAllocRequest->pPmaAllocInfo[subdevInst]->flags     = allocOptions.flags;
318 
319     NV_PRINTF(LEVEL_INFO, "\nNVRM:  Size requested: 0x%llx bytes\n", size);
320     NV_PRINTF(LEVEL_INFO, "       PageSize: 0x%llx bytes\n", pageSize);
321     NV_PRINTF(LEVEL_INFO, "      PageCount: 0x%x\n", pageCount);
322     NV_PRINTF(LEVEL_INFO, "    Actual Size: 0x%llx\n",
323               pAllocRequest->pPmaAllocInfo[subdevInst]->allocSize);
324     NV_PRINTF(LEVEL_INFO, "     Contiguous:  %s\n", bContig ? "YES" : "NO");
325 
326     // Get the allocation from PMA.
327     status = pmaAllocatePages(pPma, pageCount, pageSize, &allocOptions,
328                               pAllocRequest->pPmaAllocInfo[subdevInst]->pageArray);
329     if (NV_OK != status)
330     {
331         portMemFree(pAllocRequest->pPmaAllocInfo[subdevInst]);
332         pAllocRequest->pPmaAllocInfo[subdevInst] = NULL;
333 
334         if (bContig)
335         {
336             if (FLD_TEST_DRF(OS32, _ATTR, _PHYSICALITY, _ALLOW_NONCONTIGUOUS, pAllocData->attr) ||
337                 (FLD_TEST_DRF(OS32, _ATTR, _PHYSICALITY, _DEFAULT, pAllocData->attr) &&
338                    pHeap->getProperty(pHeap, PDB_PROP_HEAP_NONCONTIG_ALLOC_BY_DEFAULT)))
339             {
340                 bContig = NV_FALSE;
341                 allocOptions.flags &= ~PMA_ALLOCATE_CONTIGUOUS;
342                 NV_PRINTF(LEVEL_INFO,
343                           "pmaAllocatePages failed -- retrying as noncontiguous\n");
344                 goto retry_alloc;
345             }
346         }
347 
348         NV_PRINTF(LEVEL_WARNING, "pmaAllocatePages failed (%x)\n", status);
349     }
350     else
351     {
352         pAllocData->attr = (bContig ?
353             FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY, _CONTIGUOUS,    pAllocData->attr) :
354             FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY, _NONCONTIGUOUS, pAllocData->attr));
355     }
356 
357     return status;
358 }
359 
360 /*!
361  * vidmemPmaFree
362  *
363  * @brief
364  *     Frees the memory allocated by PMA
365  *
366  * @param[in] pGpu           Pointer to OBJGPU
367  * @param[in] pHeap          Pointer to Heap object
368  * @param[in] pPmaAllocInfo  Pointer to the PMA allocation tracking structure
369  * @param[in] flag           Flags to modify PMA behavior
370  *
371  * @returns NONE
372  */
373 void
374 vidmemPmaFree
375 (
376     OBJGPU         *pGpu,
377     Heap           *pHeap,
378     PMA_ALLOC_INFO *pPmaAllocInfo,
379     NvU32           flags
380 )
381 {
382     PMA   *pPma  = &pHeap->pmaObject;
383     NvU32 pmaFreeFlags = flags;
384 
385     NV_ASSERT_OR_RETURN_VOID(NULL != pPmaAllocInfo);
386 
387     // Decrement the refcount and free only in case of zero references.
388     pPmaAllocInfo->refCount--;
389     if (pPmaAllocInfo->refCount != 0)
390     {
391         return;
392     }
393 
394     //
395     // Skip the scrubber if the memory is allocated with scrubber skipped.
396     // The only use case is scrubber internal allocations.
397     //
398     if (pPmaAllocInfo->flags & PMA_ALLOCATE_NO_ZERO)
399     {
400         pmaFreeFlags |= PMA_FREE_SKIP_SCRUB;
401     }
402 
403     if (pPmaAllocInfo->bContig)
404     {
405         pmaFreePages(pPma, pPmaAllocInfo->pageArray, 1,
406                      pPmaAllocInfo->allocSize, pmaFreeFlags);
407     }
408     else
409     {
410         pmaFreePages(pPma, pPmaAllocInfo->pageArray,
411                      pPmaAllocInfo->pageCount,
412                      pPmaAllocInfo->pageSize, pmaFreeFlags);
413     }
414     portMemFree(pPmaAllocInfo);
415     pPmaAllocInfo = NULL;
416 }
417 
418 Heap*
419 vidmemGetHeap
420 (
421     OBJGPU  *pGpu,
422     Device  *pDevice,
423     NvBool   bSubheap
424 )
425 {
426     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
427     NV_STATUS      status         = NV_OK;
428 
429     if (bSubheap)
430     {
431         Heap *pHeap = memmgrGetDeviceSuballocator(pMemoryManager, bSubheap);
432 
433         NV_CHECK_OR_RETURN(LEVEL_ERROR, pHeap != NULL && pHeap->heapType == HEAP_TYPE_PHYS_MEM_SUBALLOCATOR, NULL);
434         return pHeap;
435     }
436 
437     if (IS_MIG_IN_USE(pGpu))
438     {
439         KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
440         Heap *pMemoryPartitionHeap = NULL;
441 
442         status = kmigmgrGetMemoryPartitionHeapFromDevice(pGpu, pKernelMIGManager, pDevice,
443                                                          &pMemoryPartitionHeap);
444         if (status == NV_OK)
445         {
446             if (pMemoryPartitionHeap != NULL)
447                 return pMemoryPartitionHeap;
448         }
449         else
450         {
451             NV_PRINTF(LEVEL_ERROR,
452                 "failed to get memory partition heap for hClient = 0x%x, hDevice = 0x%x\n",
453                 RES_GET_CLIENT_HANDLE(pDevice), RES_GET_HANDLE(pDevice));
454             return NULL;
455         }
456     }
457 
458     return MEMORY_MANAGER_GET_HEAP(pMemoryManager);
459 }
460 
461 static NV_STATUS
462 vidmemCopyConstruct
463 (
464     VideoMemory                     *pVideoMemory,
465     CALL_CONTEXT                    *pCallContext,
466     RS_RES_ALLOC_PARAMS_INTERNAL    *pParams
467 )
468 {
469     Memory    *pMemorySrc            = dynamicCast(pParams->pSrcRef->pResource, Memory);
470     OBJGPU    *pGpu                  = pMemorySrc->pGpu;
471     NV_STATUS  status;
472 
473     NV_ASSERT_OR_RETURN(!memdescGetCustomHeap(pMemorySrc->pMemDesc), NV_ERR_INVALID_ARGUMENT);
474 
475     SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY)
476         MEMORY_DESCRIPTOR *pSrcMemDesc = memdescGetMemDescFromGpu(pMemorySrc->pMemDesc, pGpu);
477         status = heapReference(pGpu, pSrcMemDesc->pHeap, pMemorySrc->HeapOwner,
478                                pSrcMemDesc);
479         NV_ASSERT(status == NV_OK);
480     SLI_LOOP_END
481 
482     return NV_OK;
483 }
484 
485 /*!
486  * vidmemConstruct
487  *
488  * @brief
489  *     This routine provides common allocation services used by the
490  *     following heap allocation functions:
491  *       NVOS32_FUNCTION_ALLOC_SIZE
492  *       NVOS32_FUNCTION_ALLOC_SIZE_RANGE
493  *       NVOS32_FUNCTION_ALLOC_TILED_PITCH_HEIGHT
494  *
495  * @param[in]  pVideoMemory      Pointer to VideoMemory object
496  * @param[in]  pCallContext      Pointer to the current CALL_CONTEXT.
497  * @param[in]  pParams           Pointer to the alloc params
498  *
499  * @return 'NV_OK'
500  *     Operation completed successfully.
501  * @return 'NV_ERR_NO_MEMORY'
502  *     There is not enough available memory to satisfy allocation request.
503  * @return 'NV_ERR_NOT_SUPPORTED'
504  *     Operation not supported on broken FB.
505  * @return 'NV_ERR_INSUFFICIENT_RESOURCES'
506  *     Not enough available resources to satisfy allocation request.
507  */
508 NV_STATUS
509 vidmemConstruct_IMPL
510 (
511     VideoMemory                  *pVideoMemory,
512     CALL_CONTEXT                 *pCallContext,
513     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
514 )
515 {
516     Memory                      *pMemory               = staticCast(pVideoMemory, Memory);
517     NV_MEMORY_ALLOCATION_PARAMS *pAllocData            = pParams->pAllocParams;
518     NvHandle                     hClient               = pCallContext->pClient->hClient;
519     NvHandle                     hParent               = pCallContext->pResourceRef->pParentRef->hResource;
520     MEMORY_ALLOCATION_REQUEST    allocRequest          = {0};
521     MEMORY_ALLOCATION_REQUEST   *pAllocRequest         = &allocRequest;
522     OBJGPU                      *pGpu                  = pMemory->pGpu;
523     MemoryManager               *pMemoryManager        = GPU_GET_MEMORY_MANAGER(pGpu);
524     Heap                        *pHeap;
525     NvBool                       bSubheap              = NV_FALSE;
526     MEMORY_DESCRIPTOR           *pTopLevelMemDesc      = NULL;
527     MEMORY_DESCRIPTOR           *pTempMemDesc          = NULL;
528     HWRESOURCE_INFO              hwResource;
529     RsClient                    *pRsClient             = pCallContext->pClient;
530     RsResourceRef               *pResourceRef          = pCallContext->pResourceRef;
531     RsResourceRef               *pDeviceRef;
532     Device                      *pDevice;
533     NvU32                        gpuCacheAttrib;
534     NvBool                       bIsPmaAlloc           = NV_FALSE;
535     NvU64                        sizeOut;
536     NvU64                        offsetOut;
537     NvU64                        offsetOutTemp;
538     NvBool                       bLockAcquired         = NV_FALSE;
539     NvU32                        attr                  = 0;
540     NvU32                        attr2                 = 0;
541     FB_ALLOC_INFO               *pFbAllocInfo          = NULL;
542     FB_ALLOC_PAGE_FORMAT        *pFbAllocPageFormat    = NULL;
543     NV_STATUS                    rmStatus              = NV_OK;
544 
545     NV_ASSERT_OK_OR_RETURN(
546         refFindAncestorOfType(pResourceRef, classId(Device), &pDeviceRef));
547 
548     pDevice = dynamicCast(pDeviceRef->pResource, Device);
549 
550     if (RS_IS_COPY_CTOR(pParams))
551     {
552         if (!rmDeviceGpuLockIsOwner(pGpu->gpuInstance) && !rmGpuLockIsOwner())
553         {
554             NV_ASSERT_OK_OR_GOTO(rmStatus,
555                                  rmDeviceGpuLocksAcquire(pGpu, GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_MEM),
556                                  done);
557 
558             bLockAcquired = NV_TRUE;
559         }
560 
561         rmStatus = vidmemCopyConstruct(pVideoMemory, pCallContext, pParams);
562         goto done;
563     }
564 
565     NV_CHECK_OK_OR_RETURN(LEVEL_WARNING, stdmemValidateParams(pGpu, hClient, pAllocData));
566     NV_CHECK_OR_RETURN(LEVEL_WARNING,
567                        DRF_VAL(OS32, _ATTR, _LOCATION, pAllocData->attr) == NVOS32_ATTR_LOCATION_VIDMEM &&
568                            !(pAllocData->flags & NVOS32_ALLOC_FLAGS_VIRTUAL),
569                        NV_ERR_INVALID_ARGUMENT);
570 
571     stdmemDumpInputAllocParams(pAllocData, pCallContext);
572 
573     bSubheap = FLD_TEST_DRF(OS32, _ATTR2, _ALLOCATE_FROM_SUBHEAP, _YES, pAllocData->attr2);
574     pHeap = vidmemGetHeap(pGpu, pDevice, bSubheap);
575     NV_CHECK_OR_RETURN(LEVEL_INFO, pHeap != NULL, NV_ERR_INVALID_STATE);
576 
577     if (gpuIsCCorApmFeatureEnabled(pGpu) &&
578         !FLD_TEST_DRF(OS32, _ATTR2, _MEMORY_PROTECTION, _UNPROTECTED, pAllocData->attr2))
579     {
580         pAllocData->flags |= NVOS32_ALLOC_FLAGS_PROTECTED;
581     }
582     else if (gpuIsCCFeatureEnabled(pGpu) &&
583              FLD_TEST_DRF(OS32, _ATTR2, _MEMORY_PROTECTION, _UNPROTECTED, pAllocData->attr2))
584     {
585         // CC-TODO: Remove this once non-CPR regions are created
586         NV_PRINTF(LEVEL_ERROR, "Non-CPR region not yet created\n");
587         NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
588     }
589     else if (!gpuIsCCorApmFeatureEnabled(pGpu) &&
590              FLD_TEST_DRF(OS32, _ATTR2, _MEMORY_PROTECTION, _PROTECTED, pAllocData->attr2))
591     {
592         NV_PRINTF(LEVEL_ERROR, "Protected memory not enabled but PROTECTED flag is set by client");
593         return NV_ERR_INVALID_ARGUMENT;
594     }
595 
596     pAllocRequest->classNum = NV01_MEMORY_LOCAL_USER;
597     pAllocRequest->pUserParams = pAllocData;
598     pAllocRequest->hMemory = pResourceRef->hResource;
599     pAllocRequest->hClient = hClient;
600     pAllocRequest->hParent = hParent;
601     pAllocRequest->pGpu = pGpu;
602     pAllocRequest->internalflags = NVOS32_ALLOC_INTERNAL_FLAGS_CLIENTALLOC;
603     pAllocRequest->pHwResource = &hwResource;
604 
605     // Unsure if we need to keep separate copies, but keeping old behavior for now.
606     sizeOut = pAllocData->size;
607     offsetOut = pAllocData->offset;
608 
609     bIsPmaAlloc = memmgrIsPmaInitialized(pMemoryManager) &&
610                   !bSubheap &&
611                   !(pAllocData->flags & NVOS32_ALLOC_FLAGS_WPR1) &&
612                   !(pAllocData->flags & NVOS32_ALLOC_FLAGS_WPR2) &&
613                   (!(pAllocData->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE) ||
614                       heapIsPmaManaged(pGpu, pHeap, pAllocData->offset, pAllocData->offset+pAllocData->size-1));
615 
616     // Scrub-on-free is not supported by heap. Make sure clients don't get unscrubbed allocations
617     NV_CHECK_OR_RETURN(LEVEL_WARNING,
618         !memmgrIsScrubOnFreeEnabled(pMemoryManager) || bIsPmaAlloc || bSubheap,
619         NV_ERR_INVALID_STATE);
620 
621     // Get the allocation from PMA if enabled.
622     if (bIsPmaAlloc)
623     {
624         SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
625         pAllocRequest->pGpu = pGpu;
626         rmStatus = _vidmemPmaAllocate(vidmemGetHeap(pGpu, pDevice, NV_FALSE), pAllocRequest);
627         if (NV_OK != rmStatus)
628             SLI_LOOP_GOTO(done);
629         SLI_LOOP_END;
630     }
631 
632     if (RMCFG_FEATURE_RM_BASIC_LOCK_MODEL)
633     {
634         //
635         // Can't move locking up as PMA locks need to be taken first.
636         // Acquire the lock *only after* PMA is done allocating.
637         //
638         if (!rmDeviceGpuLockIsOwner(pGpu->gpuInstance) && !rmGpuLockIsOwner())
639         {
640             rmStatus = rmDeviceGpuLocksAcquire(pGpu, GPUS_LOCK_FLAGS_NONE,
641                                                RM_LOCK_MODULES_MEM);
642             NV_ASSERT_OR_GOTO(NV_OK == rmStatus, done);
643 
644             bLockAcquired = NV_TRUE;
645         }
646         else
647         {
648             NV_ASSERT(0);
649         }
650     }
651 
652     {
653         //
654         // If using thwap to generate an allocation failure here, fail the
655         // alloc right away.
656         //
657         KernelRc *pKernelRc = GPU_GET_KERNEL_RC(pGpu);
658         if (pKernelRc != NULL &&
659             !krcTestAllowAlloc(pGpu, pKernelRc,
660                                NV_ROBUST_CHANNEL_ALLOCFAIL_HEAP))
661         {
662             rmStatus = NV_ERR_INSUFFICIENT_RESOURCES;
663             goto done;
664         }
665     }
666 
667     // Don't allow FB allocations if FB is broken unless it is a virtual allocation or running in L2 cache only mode
668     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) &&
669         !gpuIsCacheOnlyModeEnabled(pGpu))
670     {
671         NV_ASSERT_FAILED("Video memory requested despite BROKEN FB");
672         rmStatus = NV_ERR_NOT_SUPPORTED;
673         goto done;
674     }
675 
676     if (gpuIsDebuggerActive_HAL(pGpu))
677     {
678         // Bug 643431 - WAR for GR WFI timeouts when debugger is active
679         rmStatus = NV_ERR_BUSY_RETRY;
680         goto done;
681     }
682 
683     pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
684     NV_ASSERT_TRUE_OR_GOTO(rmStatus, pFbAllocInfo != NULL, NV_ERR_NO_MEMORY, done);
685 
686     pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
687     NV_ASSERT_TRUE_OR_GOTO(rmStatus, pFbAllocPageFormat != NULL, NV_ERR_NO_MEMORY, done);
688 
689     // Call heapAlloc to get memory.
690     if (gpumgrGetBcEnabledStatus(pGpu))
691     {
692         MEMORY_DESCRIPTOR *pPrev = NULL;
693 
694         // VGPU won't run in SLI. So no need to set subheap flags in memdesc.
695         NV_ASSERT(!bSubheap);
696 
697         // Create dummy top level memdesc
698         rmStatus = memdescCreate(&pTopLevelMemDesc, pGpu, RM_PAGE_SIZE, 0,
699             NV_TRUE,
700             ADDR_FBMEM,
701             NV_MEMORY_UNCACHED,
702             MEMDESC_FLAGS_DUMMY_TOPLEVEL);
703         if (rmStatus != NV_OK)
704             goto done;
705         pPrev = pTopLevelMemDesc;
706         pTopLevelMemDesc->_subDeviceAllocCount = gpumgrGetSubDeviceCountFromGpu(pGpu); // very important to have this here
707         pTopLevelMemDesc->_flags |=  MEMDESC_FLAGS_ALLOC_PER_SUBDEVICE;
708 
709         offsetOutTemp = ~((NvU64)0);
710         offsetOut = 0;
711         sizeOut = 0;
712         SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
713         {
714             // Call heapAlloc to get memory.
715             pAllocRequest->pMemDesc = NULL; // heapAlloc_IMPL needs a NULL pMemdesc in order for it to be allocated,
716 
717             portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO));
718             portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT));
719             pFbAllocInfo->pageFormat = pFbAllocPageFormat;
720 
721             memUtilsInitFBAllocInfo(pAllocRequest->pUserParams, pFbAllocInfo, hClient, hParent);
722 
723             rmStatus = memmgrAllocResources(pGpu, pMemoryManager, pAllocRequest, pFbAllocInfo);
724             if (rmStatus != NV_OK)
725                 SLI_LOOP_GOTO(done);
726 
727             rmStatus = vidmemAllocResources(pGpu, pMemoryManager, pAllocRequest, pFbAllocInfo,
728                                             vidmemGetHeap(pGpu, pDevice, NV_FALSE));
729             if (rmStatus != NV_OK)
730                 SLI_LOOP_GOTO(done);
731 
732             NV_ASSERT(pAllocRequest->pMemDesc);
733 
734             //
735             // Spoof the flags contiguity, size and alignment of heapAlloc'ed subdev memdesc
736             // to dummy top level memdesc we created
737             //
738             pTopLevelMemDesc->Alignment  = pAllocRequest->pMemDesc->Alignment;
739             pTopLevelMemDesc->_flags     = pAllocRequest->pMemDesc->_flags | MEMDESC_FLAGS_ALLOC_PER_SUBDEVICE | MEMDESC_FLAGS_DUMMY_TOPLEVEL;
740             pTopLevelMemDesc->Size       = pAllocRequest->pMemDesc->Size;
741             pTopLevelMemDesc->ActualSize = pAllocRequest->pMemDesc->ActualSize;
742             pTopLevelMemDesc->_pageSize  = pAllocRequest->pMemDesc->_pageSize;
743             pTopLevelMemDesc->pHeap      = pAllocRequest->pMemDesc->pHeap;
744 
745             // add pAllocData->pMemDesc for subdev to linked list
746             pPrev->_pNext         = pAllocRequest->pMemDesc;
747             pPrev                 = pAllocRequest->pMemDesc;
748 
749             //
750             // After Bugs 1967134, 1511574, 1448340, 1761278, 1993033 are implemented, remove the code below and
751             // always set offsetOut = ~0 for the broadcast case.
752             // Then remove the interface to remove the physical offset.
753             //
754             if (offsetOutTemp == ~((NvU64)0)) // 1st
755                 offsetOut = offsetOutTemp = memdescGetPhysAddr(pAllocRequest->pMemDesc, AT_GPU, 0);
756             else if (offsetOut != ~((NvU64)0))
757             {
758                 offsetOutTemp = memdescGetPhysAddr(pAllocRequest->pMemDesc, AT_GPU, 0);
759                 if (offsetOut != offsetOutTemp)
760                 {
761                     offsetOut = ~((NvU64)0);
762                 }
763             }
764 
765             NV_ASSERT(!sizeOut || pAllocRequest->pMemDesc->Size == sizeOut);
766             sizeOut = pAllocRequest->pMemDesc->Size;
767         }
768         SLI_LOOP_END;
769 
770         pTempMemDesc = memdescGetMemDescFromGpu(pTopLevelMemDesc, pGpu);
771     }
772     else
773     {
774 
775         portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO));
776         portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT));
777         pFbAllocInfo->pageFormat = pFbAllocPageFormat;
778 
779         memUtilsInitFBAllocInfo(pAllocRequest->pUserParams, pFbAllocInfo, hClient, hParent);
780 
781         rmStatus = memmgrAllocResources(pGpu, pMemoryManager, pAllocRequest, pFbAllocInfo);
782         if (rmStatus != NV_OK)
783             goto done;
784 
785         rmStatus = vidmemAllocResources(pGpu, pMemoryManager, pAllocRequest, pFbAllocInfo, pHeap);
786         if (rmStatus != NV_OK)
787             goto done;
788 
789         NV_ASSERT(pAllocRequest->pMemDesc);
790 
791         pTempMemDesc = pTopLevelMemDesc = pAllocRequest->pMemDesc;
792         offsetOut = memdescGetPhysAddr(pTempMemDesc, AT_GPU, 0);
793         sizeOut   = pTempMemDesc->Size;
794 
795         if (bSubheap)
796             memdescSetFlag(pTempMemDesc, MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE, NV_TRUE);
797     }
798 
799     pAllocData->limit = sizeOut - 1;
800 
801     if (bIsPmaAlloc)
802     {
803         // Cache the PMA_ALLOC_INFO structure.
804         SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
805         memdescGetMemDescFromGpu(pTopLevelMemDesc, pGpu)->pPmaAllocInfo = pAllocRequest->pPmaAllocInfo[gpumgrGetSubDeviceInstanceFromGpu(pGpu)];
806         SLI_LOOP_END;
807     }
808 
809     //
810     // Set the unprotected flag in memdesc. Some control calls will use
811     // this flag to determine if this memory lies in the protected or
812     // unprotected region and use that to gather statistics like total
813     // protected and unprotected memory usage by different clients, etc
814     //
815     if (gpuIsCCorApmFeatureEnabled(pGpu) &&
816         FLD_TEST_DRF(OS32, _ATTR2, _MEMORY_PROTECTION, _UNPROTECTED, pAllocData->attr2))
817     {
818         SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
819         memdescSetFlag(memdescGetMemDescFromGpu(pTopLevelMemDesc, pGpu),
820                        MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY, NV_TRUE);
821         SLI_LOOP_END;
822     }
823 
824     //
825     // Video memory is always locally transparently cached.  It does not require
826     // any cache managment.  Marked cached unconditionally.  Non-coherent peer
827     // caching is handled with an override at mapping time.
828     //
829     if (DRF_VAL(OS32, _ATTR2, _GPU_CACHEABLE, pAllocData->attr2) ==
830         NVOS32_ATTR2_GPU_CACHEABLE_DEFAULT)
831     {
832         pAllocData->attr2 = FLD_SET_DRF(OS32, _ATTR2, _GPU_CACHEABLE, _YES,
833                                         pAllocData->attr2);
834     }
835     gpuCacheAttrib = NV_MEMORY_CACHED;
836 
837     // ClientDB can set the pagesize for memdesc.
838     // With GPU SMMU mapping, this needs to be set on the SMMU memdesc.
839     // So SMMU allocation should happen before memConstructCommon()
840     // Eventaully SMMU allocation will be part of memdescAlloc().
841 
842     //
843     // There are a few cases where the heap will return an existing
844     // memdesc.  Only update attributes if it is new.
845     //
846     // @todo attr tracking should move into heapAlloc
847     //
848     if (pTempMemDesc->RefCount == 1)
849     {
850         SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY);
851         memdescSetGpuCacheAttrib(memdescGetMemDescFromGpu(pTopLevelMemDesc, pGpu), gpuCacheAttrib);
852         SLI_LOOP_END;
853 
854 
855         // An SMMU mapping will be added to FB allocations in the following cases:
856         // 1. RM clients forcing SMMU mapping via flags
857         //    GPU Arch verification with VPR is one such usecase.
858 
859         if (FLD_TEST_DRF(OS32, _ATTR2, _SMMU_ON_GPU, _ENABLE, pAllocData->attr2))
860         {
861             NV_ASSERT_FAILED("SMMU mapping allocation is not supported for ARMv7");
862             rmStatus = NV_ERR_NOT_SUPPORTED;
863 
864             memdescFree(pTopLevelMemDesc);
865             memdescDestroy(pTopLevelMemDesc);
866             goto done;
867         }
868     }
869 
870     rmStatus = memConstructCommon(pMemory, pAllocRequest->classNum, pAllocData->flags,
871                                   pTopLevelMemDesc, pAllocData->owner, pHeap, pAllocData->attr,
872                                   pAllocData->attr2, 0, pAllocData->type,
873                                   pAllocData->tag, &hwResource);
874     if (rmStatus != NV_OK)
875     {
876         memdescFree(pTopLevelMemDesc);
877         memdescDestroy(pTopLevelMemDesc);
878 
879         goto done;
880     }
881     NV_ASSERT(pMemory->pMemDesc);
882     NV_ASSERT(pMemory->pHeap);
883 
884     //
885     // vGPU:
886     //
887     // Since vGPU does all real hardware management in the
888     // host, if we are in guest OS (where IS_VIRTUAL(pGpu) is true),
889     // do an RPC to the host to do the hardware update.
890     //
891     // XXX: This is a hack for now. No Hw resources are assumed to be used in the call.
892     // The host is only requested to make an alias to the allocated heap.
893 
894     if (!IS_GSP_CLIENT(pGpu))
895     {
896         NV_RM_RPC_ALLOC_VIDMEM(pGpu,
897                                hClient,
898                                hParent,
899                                pAllocRequest->hMemory,
900                                pTopLevelMemDesc,
901                                sizeOut,
902                                attr,
903                                attr2,
904                                pAllocData->type,
905                                pAllocData->flags,
906                                pAllocData->height,
907                                pAllocData->width,
908                                pAllocData->format,
909                                pAllocData->comprCovg,
910                                pAllocData->zcullCovg,
911                                pAllocData->alignment,
912                                pAllocData->pitch,
913                                pAllocData->ctagOffset,
914                                rmStatus);
915 
916         if (rmStatus != NV_OK)
917         {
918             memDestructCommon(pMemory);
919             memdescFree(pTopLevelMemDesc);
920             memdescDestroy(pTopLevelMemDesc);
921             pTopLevelMemDesc = NULL;
922             goto done;
923         }
924 
925         pMemory->bRpcAlloc = NV_TRUE;
926     }
927 
928     if (RMCFG_MODULE_GPUACCT)
929     {
930         OBJGPU *pGpu = pMemory->pGpu;
931         OBJSYS *pSys = SYS_GET_INSTANCE();
932         GpuAccounting *pGpuAcct = SYS_GET_GPUACCT(pSys);
933         RmClient *pClient = dynamicCast(pRsClient, RmClient);
934         NvU64 fbUsage;
935         NV2080_CTRL_GPU_PID_INFO_DATA pidInfoData;
936         NV2080_CTRL_SMC_SUBSCRIPTION_INFO smcInfo;
937 
938         if (pGpu->getProperty(pGpu, PDB_PROP_GPU_ACCOUNTING_ON) &&
939             (pMemory->pSubDevice == NULL) &&  // Skipping for subdevice memory allocations. Was this intentional?
940             (pCallContext->secInfo.privLevel < RS_PRIV_LEVEL_KERNEL))
941         {
942             KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
943             NvBool bSmcGpuPartitioningEnabled = IS_MIG_IN_USE(pGpu);
944             MIG_INSTANCE_REF partitionRef = kmigmgrMakeNoMIGReference();
945             NvBool bGlobalInfo = NV_TRUE;
946             smcInfo.computeInstanceId = PARTITIONID_INVALID;
947             smcInfo.gpuInstanceId = PARTITIONID_INVALID;
948             //
949             // With SMC GPU partitioning enabled, get associated partition ref and
950             // only account for partitionLocal usages
951             //
952             if (bSmcGpuPartitioningEnabled)
953             {
954                 NV_CHECK_OK_OR_GOTO(rmStatus, LEVEL_ERROR,
955                                     kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager,
956                                                                     pDevice, &partitionRef),
957                                     done);
958                 bGlobalInfo = NV_FALSE;
959             }
960             portMemSet(&pidInfoData, 0, sizeof(NV2080_CTRL_GPU_PID_INFO_DATA));
961 
962             gpuFindClientInfoWithPidIterator(pGpu, pClient->ProcID,
963                                              pClient->SubProcessID,
964                                              classId(Memory),
965                                              &pidInfoData,
966                                              &smcInfo,
967                                              &partitionRef,
968                                              bGlobalInfo);
969 
970             // Only account for memory owned by the process.
971             fbUsage = pidInfoData.vidMemUsage.memPrivate +
972                 pidInfoData.vidMemUsage.memSharedOwned;
973 
974             gpuacctUpdateProcPeakFbUsage(pGpuAcct, pGpu->gpuInstance,
975                 pClient->ProcID, pClient->SubProcessID,fbUsage);
976         }
977     }
978 
979     pAllocData->size = sizeOut;
980     pAllocData->offset = offsetOut;
981 
982     stdmemDumpOutputAllocParams(pAllocData);
983 
984 done:
985     if (bSubheap && pTempMemDesc != NULL && rmStatus != NV_OK)
986         heapRemoveRef(pHeap);
987 
988     portMemFree(pFbAllocPageFormat);
989     portMemFree(pFbAllocInfo);
990 
991     if (bLockAcquired)
992     {
993         // UNLOCK: release GPUs lock
994         rmDeviceGpuLocksRelease(pGpu, GPUS_LOCK_FLAGS_NONE, NULL);
995     }
996 
997     if (bIsPmaAlloc && NV_OK != rmStatus)
998     {
999         SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
1000 
1001         if (pAllocRequest->pPmaAllocInfo[gpumgrGetSubDeviceInstanceFromGpu(pGpu)])
1002             vidmemPmaFree(pGpu, vidmemGetHeap(pGpu, pDevice, NV_FALSE),
1003                           pAllocRequest->pPmaAllocInfo[gpumgrGetSubDeviceInstanceFromGpu(pGpu)], 0);
1004         SLI_LOOP_END;
1005     }
1006 
1007     return rmStatus;
1008 }
1009 
1010 void
1011 vidmemDestruct_IMPL
1012 (
1013     VideoMemory        *pVideoMemory
1014 )
1015 {
1016     Memory             *pMemory        = staticCast(pVideoMemory, Memory);
1017     OBJGPU             *pGpu           = pMemory->pGpu;
1018     MEMORY_DESCRIPTOR  *pMemDesc       = pMemory->pMemDesc;
1019 
1020     // Free any association of the memory with existing third-party p2p object
1021     CliUnregisterMemoryFromThirdPartyP2P(pMemory);
1022 
1023     memDestructCommon(pMemory);
1024 
1025     // free the video memory based on how it was alloced ... a non-zero
1026     // heapOwner indicates it was heapAlloc-ed.
1027     if (!memdescGetCustomHeap(pMemDesc))
1028     {
1029         MemoryManager      *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
1030         NvHandle            hClient        = RES_GET_CLIENT_HANDLE(pVideoMemory);
1031         NvHandle            hParent        = RES_GET_PARENT_HANDLE(pVideoMemory);
1032         NvU32               heapOwner      = pMemory->HeapOwner;
1033         NV_STATUS           status;
1034 
1035         NV_PRINTF(LEVEL_INFO, "Function: FREE\n");
1036         NV_PRINTF(LEVEL_INFO, "   Owner: 0x%x\n", heapOwner);
1037         NV_PRINTF(LEVEL_INFO, " hMemory: 0x%x\n", RES_GET_HANDLE(pVideoMemory));
1038 
1039         //
1040         // memHandle (and the block's size/type) is returned, but not
1041         // needed ... the caller already has the correct handle to pass
1042         // to memDestructCommon
1043         //
1044         if (gpumgrGetBcEnabledStatus(pGpu) &&
1045             (memdescGetAddressSpace(memdescGetMemDescFromGpu(pMemDesc, pGpu)) == ADDR_FBMEM))
1046         {
1047             MEMORY_DESCRIPTOR *pNextMemDesc = NULL, *pSubdevMemDesc = NULL;
1048             pSubdevMemDesc = pMemDesc->_pNext;
1049 
1050             NV_ASSERT(pMemDesc->_subDeviceAllocCount > 1);
1051             NV_ASSERT(!IS_MIG_IN_USE(pGpu));
1052 
1053             SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY);
1054             if (pSubdevMemDesc == NULL)
1055             {
1056                 NV_ASSERT(0);
1057                 SLI_LOOP_BREAK;
1058             }
1059             // Unlink per-gpu memdesc for SLI client allocations before freeing heap
1060             pNextMemDesc = pSubdevMemDesc->_pNext;
1061 
1062             status = memmgrFree(pGpu,
1063                                 pMemoryManager,
1064                                 pSubdevMemDesc->pHeap,
1065                                 hClient,
1066                                 hParent, // device or subdevice
1067                                 0,
1068                                 heapOwner,
1069                                 pSubdevMemDesc);
1070             NV_ASSERT(status == NV_OK);
1071 
1072             pSubdevMemDesc = pNextMemDesc;
1073             SLI_LOOP_END;
1074 
1075             memdescFree(pMemDesc);
1076             memdescDestroy(pMemDesc);
1077         }
1078         else
1079         {
1080             Heap *pHeap = pMemDesc->pHeap;
1081 
1082             NV_ASSERT(pMemDesc->_subDeviceAllocCount == 1);
1083             status = memmgrFree(pGpu,
1084                                 pMemoryManager,
1085                                 pHeap,
1086                                 hClient,
1087                                 hParent, // device or subdevice
1088                                 0,
1089                                 heapOwner,
1090                                 pMemDesc);
1091             NV_ASSERT(status == NV_OK);
1092 
1093         }
1094     }
1095 }
1096 
1097 NV_STATUS
1098 vidmemAllocResources
1099 (
1100     OBJGPU                      *pGpu,
1101     MemoryManager               *pMemoryManager,
1102     MEMORY_ALLOCATION_REQUEST   *pAllocRequest,
1103     FB_ALLOC_INFO               *pFbAllocInfo,
1104     Heap                        *pHeap
1105 )
1106 {
1107     NV_STATUS                    status               = NV_OK;
1108     KernelMemorySystem          *pKernelMemorySystem  = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
1109     NV_MEMORY_ALLOCATION_PARAMS *pVidHeapAlloc        = pAllocRequest->pUserParams;
1110     NvU64                        requestedSize        = pVidHeapAlloc->size;
1111     HWRESOURCE_INFO             *pHwResource          = NULL;
1112     MEMORY_DESCRIPTOR           *pMemDesc             = NULL;
1113     NvBool                       bAllocedMemDesc      = NV_FALSE;
1114     NvBool                       bAllocedMemory       = NV_FALSE;
1115     NvBool                       bNoncontigAllowed    = NV_FALSE;
1116     NvBool                       bNoncontigAllocation = NV_FALSE;
1117     NvHandle                     hVASpace             = pVidHeapAlloc->hVASpace;
1118     NvBool                       bIsPmaOwned          = NV_FALSE;
1119     NvU32                        subdeviceInst        = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
1120     NvBool                       bContig              = NV_TRUE;
1121 
1122     const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
1123         kmemsysGetStaticConfig(pGpu, pKernelMemorySystem);
1124 
1125     // Most vidmem allocations external to RM get routed to PMA.
1126     bIsPmaOwned = (pAllocRequest->pPmaAllocInfo[subdeviceInst] != NULL);
1127 
1128     // check if pMemorySystemConfig is not yet initialized on Offload client.
1129     if (pMemorySystemConfig == NULL)
1130     {
1131         status = NV_ERR_INVALID_STATE;
1132         NV_ASSERT(0);
1133         goto failed;
1134     }
1135 
1136     if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_VIRTUAL_ONLY)
1137     {
1138         NV_PRINTF(LEVEL_WARNING,
1139                   "Virtual-only flag used with physical allocation\n");
1140         status = NV_ERR_INVALID_ARGUMENT;
1141         goto failed;
1142     }
1143     //
1144     // In NUMA systems, the memory allocation comes from kernel
1145     // and kernel doesn't support fixed address allocation.
1146     //
1147     if ((pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE) &&
1148         bIsPmaOwned &&
1149         osNumaOnliningEnabled(pGpu->pOsGpuInfo))
1150     {
1151         NV_PRINTF(LEVEL_WARNING,
1152                   "NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE for PMA cannot be "
1153                   "accommodated for NUMA systems\n");
1154         status = NV_ERR_INVALID_ARGUMENT;
1155         goto failed;
1156     }
1157     if (FLD_TEST_DRF(OS32, _ATTR2, _32BIT_POINTER, _ENABLE, pVidHeapAlloc->attr2))
1158     {
1159         NV_PRINTF(LEVEL_WARNING,
1160                   "Virtual-only 32-bit pointer attr used with physical allocation\n");
1161         status = NV_ERR_INVALID_ARGUMENT;
1162         goto failed;
1163     }
1164     if (hVASpace != 0)
1165     {
1166         NV_PRINTF(LEVEL_WARNING,
1167                   "VA space handle used with physical allocation\n");
1168         status = NV_ERR_INVALID_ARGUMENT;
1169         goto failed;
1170     }
1171 
1172     // Prior to this change, heap was silently ignoring non-contig Vidmem allocation requests.
1173     // With this change to allow non-contig vidmem allocation, I was getting a DVS Extended Sanity failures & regression on Windows.
1174     // It seems Windows is making some allocations with non-contig flag, but was expecting contig allocation.
1175     // So enable the non-contig path only for verif platforms.
1176     //
1177     bContig = FLD_TEST_DRF(OS32, _ATTR, _PHYSICALITY, _DEFAULT, pVidHeapAlloc->attr) ?
1178         !pHeap->getProperty(pHeap, PDB_PROP_HEAP_NONCONTIG_ALLOC_BY_DEFAULT) :
1179         !FLD_TEST_DRF(OS32, _ATTR, _PHYSICALITY, _NONCONTIGUOUS, pVidHeapAlloc->attr);
1180 
1181     NV_CHECK_OK_OR_GOTO(status, LEVEL_WARNING, memUtilsAllocMemDesc(pGpu, pAllocRequest, pFbAllocInfo, &pMemDesc, pHeap,
1182                                                                   ADDR_FBMEM, bContig, &bAllocedMemDesc), failed);
1183 
1184 #ifndef NV_DISABLE_NONCONTIG_ALLOC
1185     bNoncontigAllowed =
1186         (!bContig || FLD_TEST_DRF(OS32, _ATTR, _PHYSICALITY, _ALLOW_NONCONTIGUOUS, pVidHeapAlloc->attr)) &&
1187         pMemoryManager->bAllowNoncontiguousAllocation &&
1188         !FLD_TEST_DRF(OS32, _ATTR, _FORMAT, _SWIZZLED, pVidHeapAlloc->attr);
1189 #endif
1190 
1191     if (bIsPmaOwned)
1192     {
1193         pFbAllocInfo->offset = pMemDesc->_pteArray[0];
1194 
1195         if (pMemoryManager->bEnableDynamicGranularityPageArrays == NV_TRUE)
1196         {
1197             //
1198             // set pagearray granularity if dynamic memdesc pagesize is enabled
1199             // this ensures consistency in calculation of page count
1200             //
1201             pMemDesc->pageArrayGranularity = pAllocRequest->pPmaAllocInfo[subdeviceInst]->pageSize;
1202         }
1203 
1204         if (bContig)
1205         {
1206                 NV_PRINTF(LEVEL_INFO, "---> PMA Path taken contiguous\n");
1207                 pVidHeapAlloc->attr = FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY,
1208                                                   _CONTIGUOUS,
1209                                                   pVidHeapAlloc->attr);
1210 
1211                 memdescDescribe(pAllocRequest->pMemDesc, ADDR_FBMEM,
1212                                 pAllocRequest->pPmaAllocInfo[subdeviceInst]->pageArray[0],
1213                                 pFbAllocInfo->adjustedSize);
1214         }
1215         else
1216         {
1217             NV_PRINTF(LEVEL_INFO, "---> PMA Path taken discontiguous\n");
1218             NV_ASSERT(!bContig && bNoncontigAllowed);
1219             pVidHeapAlloc->attr = FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY,
1220                                           _NONCONTIGUOUS,
1221                                           pVidHeapAlloc->attr);
1222             memdescFillPages(pAllocRequest->pMemDesc, 0,
1223                          pAllocRequest->pPmaAllocInfo[subdeviceInst]->pageArray,
1224                          pAllocRequest->pPmaAllocInfo[subdeviceInst]->pageCount,
1225                          pAllocRequest->pPmaAllocInfo[subdeviceInst]->pageSize);
1226         }
1227     }
1228     else
1229     {
1230         OBJHEAP_ALLOC_DATA allocData = { 0 };
1231 
1232         bNoncontigAllocation = !bContig;
1233 
1234         allocData.alignment = pVidHeapAlloc->alignment;
1235         allocData.allocSize = pFbAllocInfo->size + pFbAllocInfo->pad;
1236 
1237         status = heapAlloc(pGpu,
1238                            pFbAllocInfo->hClient,
1239                            pHeap,
1240                            pAllocRequest,
1241                            pAllocRequest->hMemory,
1242                            &allocData,
1243                            pFbAllocInfo,
1244                            &pHwResource,
1245                            &bNoncontigAllocation,
1246                            bNoncontigAllowed,
1247                            bAllocedMemDesc);
1248 
1249         // heapAlloc might create a new memdesc for compbit/discontig case
1250         pMemDesc = pAllocRequest->pMemDesc;
1251 
1252         if (status != NV_OK)
1253         {
1254             goto failed;
1255         }
1256 
1257         bAllocedMemory = NV_TRUE;
1258     }
1259 
1260     if (!bIsPmaOwned && (pVidHeapAlloc->type != NVOS32_TYPE_PMA))
1261     {
1262         NvU64 size, numBlocks;
1263         NvU32 i;
1264 
1265         //
1266         // Pre-fill cache to prevent FB read accesses if in cache only mode and not doing one time pre-fill
1267         // Also, only need to fill the *requested* size and not the actual allocation size
1268         // These might not work with noncontig allocation since they assume
1269         // physically contig memory
1270         //
1271         if (!bNoncontigAllocation &&
1272             gpuIsCacheOnlyModeEnabled(pGpu) &&
1273             !pMemorySystemConfig->bL2PreFill)
1274         {
1275             NV_STATUS preFillStatus;
1276 
1277             requestedSize = (requestedSize == 0) ? pVidHeapAlloc->size : requestedSize;
1278             preFillStatus = kmemsysPreFillCacheOnlyMemory_HAL(pGpu, pKernelMemorySystem,
1279                     pFbAllocInfo->offset, requestedSize);
1280             NV_ASSERT(preFillStatus == NV_OK);
1281         }
1282 
1283         if (memdescGetContiguity(pMemDesc, AT_GPU))
1284         {
1285             size = pFbAllocInfo->adjustedSize;
1286             numBlocks = 1; // One contiguous page
1287         }
1288         else
1289         {
1290             // Only 4k-sized noncontig pages supported currently
1291             size = RM_PAGE_SIZE;
1292             numBlocks = pMemDesc->PageCount;
1293         }
1294 
1295         for (i = 0; i < numBlocks; i++)
1296         {
1297             // Ensures memory is fully initialized
1298             memmgrScrubMemory_HAL(pGpu, pMemoryManager, memdescGetPte(pMemDesc, AT_GPU, i), size);
1299         }
1300     }
1301 
1302     // get possibly updated surface attributes
1303     pVidHeapAlloc->attr = pFbAllocInfo->retAttr;
1304     pVidHeapAlloc->attr2 = pFbAllocInfo->retAttr2;
1305 
1306     // update contiguity attribute to reflect memdesc
1307     if (memdescGetContiguity(pAllocRequest->pMemDesc, AT_GPU))
1308     {
1309         pVidHeapAlloc->attr = FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY,
1310                                           _CONTIGUOUS,
1311                                           pVidHeapAlloc->attr);
1312     }
1313     else
1314     {
1315         pVidHeapAlloc->attr = FLD_SET_DRF(OS32, _ATTR, _PHYSICALITY,
1316                                           _NONCONTIGUOUS,
1317                                           pVidHeapAlloc->attr);
1318     }
1319 
1320     // TODO remove once we don't have hwResource in MEM_BLOCK
1321     if (pHwResource != NULL)
1322     {
1323         pHwResource->attr       = pFbAllocInfo->retAttr;
1324         pHwResource->attr2      = pFbAllocInfo->retAttr2;
1325         pHwResource->hwResId    = pFbAllocInfo->hwResId;
1326         pHwResource->comprCovg  = pFbAllocInfo->comprCovg;
1327         pHwResource->ctagOffset = pFbAllocInfo->ctagOffset;
1328     }
1329 
1330     pVidHeapAlloc->offset = pFbAllocInfo->offset;
1331 
1332     if (pAllocRequest->pHwResource != NULL)
1333     {
1334         pAllocRequest->pHwResource->attr       = pFbAllocInfo->retAttr;
1335         pAllocRequest->pHwResource->attr2      = pFbAllocInfo->retAttr2;
1336         pAllocRequest->pHwResource->hwResId    = pFbAllocInfo->hwResId;
1337         pAllocRequest->pHwResource->comprCovg  = pFbAllocInfo->comprCovg;
1338         pAllocRequest->pHwResource->ctagOffset = pFbAllocInfo->ctagOffset;
1339         pAllocRequest->pHwResource->hwResId    = pFbAllocInfo->hwResId;
1340     }
1341 
1342     return NV_OK;
1343 
1344 failed:
1345     if (bAllocedMemory)
1346     {
1347         memmgrFree(pGpu, pMemoryManager, pHeap,
1348                    pFbAllocInfo->hClient, pFbAllocInfo->hDevice, 0,
1349                    pVidHeapAlloc->owner,
1350                    pMemDesc);
1351     }
1352     else
1353     {
1354         memmgrFreeHwResources(pGpu, pMemoryManager, pFbAllocInfo);
1355     }
1356 
1357     if (bAllocedMemDesc)
1358     {
1359         memdescDestroy(pAllocRequest->pMemDesc);
1360         pAllocRequest->pMemDesc = NULL;
1361     }
1362 
1363     return status;
1364 }
1365 
1366 NV_STATUS
1367 vidmemCheckCopyPermissions_IMPL
1368 (
1369     VideoMemory        *pVideoMemory,
1370     OBJGPU             *pDstGpu,
1371     Device             *pDstDevice
1372 )
1373 {
1374     Memory           *pMemory               = staticCast(pVideoMemory, Memory);
1375     OBJGPU           *pSrcGpu               = pMemory->pGpu;
1376     NvHandle          hSrcClient            = RES_GET_CLIENT_HANDLE(pVideoMemory);
1377     NvHandle          hDstClient            = RES_GET_CLIENT_HANDLE(pDstDevice);
1378     KernelMIGManager *pSrcKernelMIGManager  = GPU_GET_KERNEL_MIG_MANAGER(pSrcGpu);
1379     KernelMIGManager *pDstKernelMIGManager  = GPU_GET_KERNEL_MIG_MANAGER(pDstGpu);
1380     NvBool            bSrcClientKernel      = (rmclientGetCachedPrivilegeByHandle(hSrcClient) >= RS_PRIV_LEVEL_KERNEL);
1381     NvBool            bDstClientKernel      = (rmclientGetCachedPrivilegeByHandle(hDstClient) >= RS_PRIV_LEVEL_KERNEL);
1382 
1383     //
1384     // XXX: In case of MIG memory, duping across GPU instances is not allowed
1385     // Bug 2815350 - Due to this bug, allow kernel clients to bypass this check
1386     //
1387     if (!bDstClientKernel && (IS_MIG_IN_USE(pSrcGpu) || IS_MIG_IN_USE(pDstGpu)))
1388     {
1389         //
1390         // Due to Bug 2815350 we have to take an exception for kernel clients,
1391         // hence we can't use a direct instanceRef check.
1392         // Rather than defaulting to heap based checks, keeping the
1393         // instanceRef check in else as that's what we should only have
1394         // when Bug 2815350 is fixed.
1395         // Both clients are kernel - Force subscription check. No exception needed.
1396         // Only SrcClientKernel - Enforce subscription check in dstClient
1397         //      DstClientKernel - Ignore any enforcement as kernel clients are
1398         //                        allowed to dup without any enforcement
1399         //
1400 
1401         if (bSrcClientKernel)
1402         {
1403             // Get memory partition heap from both clients and compare
1404             Heap *pDstClientHeap = NULL;
1405             NV_CHECK_OK_OR_RETURN(LEVEL_WARNING,
1406                                   kmigmgrGetMemoryPartitionHeapFromDevice(pDstGpu, pDstKernelMIGManager,
1407                                                                           pDstDevice, &pDstClientHeap));
1408 
1409             // Make sure memory is coming from same heaps
1410             if (pDstClientHeap != pMemory->pHeap)
1411             {
1412                  NV_PRINTF(LEVEL_WARNING,
1413                           "Duping outside of GPU instance not allowed with MIG\n");
1414                 return NV_ERR_NOT_SUPPORTED;
1415             }
1416         }
1417         else
1418         {
1419             //
1420             // Due to Bug 2815350 we have to take an exception for kernel clients,
1421             // hence we can't use a direct instanceRef check
1422             //
1423             MIG_INSTANCE_REF srcInstRef;
1424             MIG_INSTANCE_REF dstInstRef;
1425             RsResourceRef *pSrcDeviceRef;
1426             Device *pSrcDevice;
1427 
1428             NV_ASSERT_OK_OR_RETURN(
1429                 refFindAncestorOfType(RES_GET_REF(pMemory), classId(Device), &pSrcDeviceRef));
1430 
1431             pSrcDevice = dynamicCast(pSrcDeviceRef->pResource, Device);
1432 
1433             // Check instance subscription of source and destination clients
1434             NV_CHECK_OK_OR_RETURN(LEVEL_WARNING,
1435                                   kmigmgrGetInstanceRefFromDevice(pSrcGpu, pSrcKernelMIGManager,
1436                                                                   pSrcDevice, &srcInstRef));
1437             NV_CHECK_OK_OR_RETURN(LEVEL_WARNING,
1438                                   kmigmgrGetInstanceRefFromDevice(pDstGpu, pDstKernelMIGManager,
1439                                                                   pDstDevice, &dstInstRef));
1440 
1441             //
1442             // Memory duping is allowed accross compute instances. so ignore
1443             // compute instance differences
1444             //
1445             srcInstRef = kmigmgrMakeGIReference(srcInstRef.pKernelMIGGpuInstance);
1446             dstInstRef = kmigmgrMakeGIReference(dstInstRef.pKernelMIGGpuInstance);
1447             if (!kmigmgrAreMIGReferencesSame(&srcInstRef, &dstInstRef))
1448             {
1449                 NV_PRINTF(LEVEL_WARNING,
1450                           "GPU instance subscription differ between Source and Destination clients\n");
1451                 return NV_ERR_NOT_SUPPORTED;
1452             }
1453         }
1454     }
1455 
1456     return NV_OK;
1457 }
1458