1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /*!
25  * @file
26  * @brief  Collection of interfaces for HWPM streamout
27  */
28 
29 /* ------------------------ Includes --------------------------------------- */
30 #include "gpu/hwpm/kern_hwpm.h"
31 #include "lib/ref_count.h"
32 #include "gpu/gpu.h"
33 #include "gpu/mmu/kern_gmmu.h"
34 #include "mem_mgr/vaspace.h"
35 #include "mem_mgr/virt_mem_mgr.h"
36 #include "gpu/mem_mgr/mem_mgr.h"
37 #include "gpu/mem_mgr/virt_mem_allocator.h"
38 #include "resserv/rs_client.h"
39 #include "vgpu/rpc.h"
40 #include "virtualization/hypervisor/hypervisor.h"
41 #include "gpu/bus/kern_bus.h"
42 
43 #include "ctrl/ctrl90cc.h"
44 #include "ctrl/ctrlb0cc.h"
45 #include "class/cl90f1.h" //FERMI_VASPACE_A
46 #include "rmapi/rs_utils.h"
47 #include "nvrm_registry.h"
48 
49 /* ------------------------ Macros ----------------------------------------- */
50 #define PERF_PER_CTX_VASPACE_SIZE   (4*1024*1024*1024ULL)
51 
52 /* ------------------------ Public Functions  ------------------------------ */
53 
54 static NV_STATUS
_hwpmStreamoutAllocPmaMapping(OBJGPU * pGpu,KernelHwpm * pKernelHwpm,OBJVASPACE * pPmaVAS,MEMORY_DESCRIPTOR * pMemDesc,NvU64 virtualAddress)55 _hwpmStreamoutAllocPmaMapping
56 (
57     OBJGPU            *pGpu,
58     KernelHwpm        *pKernelHwpm,
59     OBJVASPACE        *pPmaVAS,
60     MEMORY_DESCRIPTOR *pMemDesc,
61     NvU64              virtualAddress
62 )
63 {
64     VirtMemAllocator *pDma = GPU_GET_DMA(pGpu);
65     NvU32 flags = DRF_DEF(OS46, _FLAGS, _DMA_UNICAST_REUSE_ALLOC, _TRUE);
66 
67     if (RMCFG_FEATURE_PLATFORM_GSP ||
68         (memdescGetCpuCacheAttrib(pMemDesc) == NV_MEMORY_CACHED))
69     {
70         flags |= FLD_SET_DRF(OS46, _FLAGS, _CACHE_SNOOP, _ENABLE, flags);
71     }
72 
73     // Map it in PMA VA
74     return dmaAllocMapping_HAL(pGpu, pDma, pPmaVAS, pMemDesc, &virtualAddress, flags, NULL, KMIGMGR_SWIZZID_INVALID);
75 }
76 
77 static NV_STATUS
_hwpmStreamoutAllocCpuMapping(OBJGPU * pGpu,KernelHwpm * pKernelHwpm,OBJVASPACE * pPmaVAS,MEMORY_DESCRIPTOR * pMemDesc,NvP64 * ppCpuAddr,NvP64 * ppPriv)78 _hwpmStreamoutAllocCpuMapping
79 (
80     OBJGPU            *pGpu,
81     KernelHwpm        *pKernelHwpm,
82     OBJVASPACE        *pPmaVAS,
83     MEMORY_DESCRIPTOR *pMemDesc,
84     NvP64             *ppCpuAddr,
85     NvP64             *ppPriv
86 )
87 {
88     NvP64 pAddr64 = NvP64_NULL;
89     NvU8 *pBuf = NULL;
90     NV_STATUS status = NV_OK;
91 
92     if (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM)
93     {
94         pBuf = kbusMapRmAperture_HAL(pGpu, pMemDesc);
95         if (pBuf == NULL)
96         {
97             NV_PRINTF(LEVEL_ERROR, "busMapRmAperture_HAL failed\n");
98             status = NV_ERR_GENERIC;
99             goto _hwpmStreamoutAllocCpuMapping_fail;
100         }
101         pAddr64 = NV_PTR_TO_NvP64(pBuf);
102     }
103     else
104     {
105         status = memdescMap(pMemDesc, 0, pMemDesc->Size, NV_TRUE, NV_PROTECT_READ_WRITE,
106             &pAddr64, ppPriv);
107         if (status != NV_OK)
108         {
109             NV_PRINTF(LEVEL_ERROR, "memdescMap failed: 0x%x\n", status);
110             goto _hwpmStreamoutAllocCpuMapping_fail;
111         }
112     }
113 
114     *ppCpuAddr = pAddr64;
115 
116     return NV_OK;
117 
118 _hwpmStreamoutAllocCpuMapping_fail:
119     NV_PRINTF(LEVEL_ERROR, "Error: 0x%x\n", status);
120 
121     return status;
122 }
123 
124 static void
_hwpmStreamoutFreeCpuMapping(OBJGPU * pGpu,KernelHwpm * pKernelHwpm,OBJVASPACE * pPmaVAS,MEMORY_DESCRIPTOR * pMemDesc,NvP64 pCpuAddr,NvP64 pPriv)125 _hwpmStreamoutFreeCpuMapping
126 (
127     OBJGPU            *pGpu,
128     KernelHwpm        *pKernelHwpm,
129     OBJVASPACE        *pPmaVAS,
130     MEMORY_DESCRIPTOR *pMemDesc,
131     NvP64              pCpuAddr,
132     NvP64              pPriv
133 )
134 {
135     NvU8 *pCpuAddrTmp = NvP64_VALUE(pCpuAddr);
136 
137     if (pCpuAddrTmp != NULL)
138     {
139         if (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM)
140         {
141             kbusUnmapRmAperture_HAL(pGpu, pMemDesc, &pCpuAddrTmp, NV_TRUE);
142         }
143         else
144         {
145             memdescUnmap(pMemDesc, NV_TRUE, osGetCurrentProcess(), pCpuAddr, pPriv);
146         }
147     }
148 }
149 
150 /*!
151  * @brief: Allocates a PMA stream for HWPM to streamout records.
152  *
153  * Each PMA stream can have upto 4GB of VA space, and PMA stream can not straddle
154  * 4GB boundary.
155  * Caller needs to provide two memory buffers, one for streaming records, and other
156  * for streaming mem bytes (unread bytes).
157  */
158 NV_STATUS
khwpmStreamoutAllocPmaStream_IMPL(OBJGPU * pGpu,KernelHwpm * pKernelHwpm,NvU64 profilerId,MEMORY_DESCRIPTOR * pRecordBufDesc,MEMORY_DESCRIPTOR * pNumBytesBufDesc,NvU32 pmaChIdx,HWPM_PMA_STREAM * pPmaStream)159 khwpmStreamoutAllocPmaStream_IMPL
160 (
161     OBJGPU            *pGpu,
162     KernelHwpm        *pKernelHwpm,
163     NvU64              profilerId,
164     MEMORY_DESCRIPTOR *pRecordBufDesc,
165     MEMORY_DESCRIPTOR *pNumBytesBufDesc,
166     NvU32              pmaChIdx,
167     HWPM_PMA_STREAM   *pPmaStream
168 )
169 {
170     OBJVASPACE       *pPmaVAS;
171     NvU64             virtSize;
172     NvU64             virtualAddress = 0;
173     NvU64             virtualAddress2 = 0;
174     NvU64             vaAlign;
175     NvU64             pageSize;
176     NvU64             vaSizeRequested;
177     VAS_ALLOC_FLAGS   allocFlags = {0};
178     NV_STATUS         status = NV_OK;
179     NvP64             pCpuAddr = NvP64_NULL;
180     NvP64             pPriv = NvP64_NULL;
181     NvU32             gfid;
182     const NvU32       bpcIdx = pmaChIdx / pKernelHwpm->maxChannelPerCblock;
183     OBJREFCNT        *pRefcnt;
184     NvBool            bRefCnted = NV_FALSE;
185 
186     NV_CHECK_OR_RETURN(LEVEL_ERROR, (bpcIdx < pKernelHwpm->maxCblocks), NV_ERR_INVALID_ARGUMENT);
187 
188     pRefcnt = pKernelHwpm->streamoutState[bpcIdx].pPmaVasRefcnt;
189     if (pRefcnt == NULL)
190     {
191         return NV_ERR_INVALID_STATE;
192     }
193 
194     NV_ASSERT_OK_OR_RETURN(refcntRequestReference(pRefcnt, profilerId, REFCNT_STATE_ENABLED, NV_FALSE));
195     bRefCnted = NV_TRUE;
196 
197     pPmaVAS = pKernelHwpm->streamoutState[bpcIdx].pPmaVAS;
198     if (pPmaVAS == NULL)
199     {
200         status = NV_ERR_INVALID_STATE;
201         goto hwpmStreamoutAllocPmaStream_fail;
202     }
203 
204     if (pPmaStream->bValid)
205     {
206         status = NV_ERR_INVALID_STATE;
207         goto hwpmStreamoutAllocPmaStream_fail;
208     }
209 
210     pageSize = vaspaceGetBigPageSize(pPmaVAS);
211     vaSizeRequested = RM_ALIGN_UP(pRecordBufDesc->Size, pageSize) + RM_ALIGN_UP(pNumBytesBufDesc->Size, pageSize);
212     if (vaSizeRequested > PERF_PER_CTX_VASPACE_SIZE)
213     {
214         status = NV_ERR_INVALID_ARGUMENT;
215         goto hwpmStreamoutAllocPmaStream_fail;
216     }
217 
218     allocFlags.bLazy = NV_TRUE;
219     virtSize = PERF_PER_CTX_VASPACE_SIZE;
220     vaAlign = virtSize;
221     status = vaspaceAlloc(pPmaVAS, virtSize, vaAlign,
222                           vaspaceGetVaStart(pPmaVAS), vaspaceGetVaLimit(pPmaVAS),
223                           0, allocFlags, &virtualAddress);
224     if (status != NV_OK)
225     {
226         NV_PRINTF(LEVEL_ERROR, "vaspaceAlloc failed: 0x%08x\n", status);
227         goto hwpmStreamoutAllocPmaStream_fail;
228     }
229 
230     status = _hwpmStreamoutAllocPmaMapping(pGpu, pKernelHwpm, pPmaVAS, pRecordBufDesc, virtualAddress);
231     if (status != NV_OK)
232     {
233         NV_PRINTF(LEVEL_ERROR,
234                   "Failed to map records buffer to pma vaspace: 0x%08x\n",
235                   status);
236         goto hwpmStreamoutAllocPmaStream_fail;
237     }
238 
239     // memBytes va start right after record buffer va.
240     virtualAddress2 = virtualAddress + RM_ALIGN_UP(pRecordBufDesc->Size, pageSize);
241     status = _hwpmStreamoutAllocPmaMapping(pGpu, pKernelHwpm, pPmaVAS, pNumBytesBufDesc, virtualAddress2);
242     if (status != NV_OK)
243     {
244         NV_PRINTF(LEVEL_ERROR,
245                   "Failed to map available bytes buffer to pma vaspace: 0x%08x\n",
246                   status);
247         goto hwpmStreamoutAllocPmaStream_fail;
248     }
249 
250     pPmaStream->flags = 0;
251 
252     // CPU mapping of membytes buffer is not required on vGPU host, will be handled on guest
253     NV_ASSERT_OK_OR_GOTO(status, vgpuGetCallingContextGfid(pGpu, &gfid),
254         hwpmStreamoutAllocPmaStream_fail);
255     if (!(hypervisorIsVgxHyper() || (RMCFG_FEATURE_PLATFORM_GSP && IS_GFID_VF(gfid))))
256     {
257         // If this is a WSL profiler using DMA remapping support, we already have
258         // the CPUVA in pNumBytesBufDesc->_pMemData
259         if (pNumBytesBufDesc->_pMemData)
260         {
261             pCpuAddr = pNumBytesBufDesc->_pMemData;
262             pPriv = pNumBytesBufDesc->_pMemData;
263 
264             // Set the _pMemData fields to NULL. Otherwise, osDestroyMemCreatedFromOsDescriptor()
265             // will interpret the _pMemData as a pointer to a SYS_MEM_INFO structure.
266             pRecordBufDesc->_pMemData = NvP64_NULL;
267             pNumBytesBufDesc->_pMemData = NvP64_NULL;
268             pPmaStream->flags |= NV_HWPM_STREAM_FLAGS_CPUVA_EXTERNAL;
269             status = NV_OK;
270         }
271         else
272         {
273             status = _hwpmStreamoutAllocCpuMapping(pGpu, pKernelHwpm, pPmaVAS, pNumBytesBufDesc, &pCpuAddr, &pPriv);
274             if (status != NV_OK)
275             {
276                 NV_PRINTF(LEVEL_ERROR,
277                           "Failed to map available bytes buffer to cpu vaspace: 0x%08x\n",
278                           status);
279                 goto hwpmStreamoutAllocPmaStream_fail;
280             }
281         }
282     }
283 
284     pPmaStream->pRecordBufDesc = pRecordBufDesc;
285     pPmaStream->pNumBytesBufDesc = pNumBytesBufDesc;
286     pPmaStream->pNumBytesCpuAddr = pCpuAddr;
287     pPmaStream->pNumBytesCpuAddrPriv = pPriv;
288     pPmaStream->vaddr = virtualAddress;
289     pPmaStream->vaddrRecordBuf = virtualAddress;
290     pPmaStream->vaddrNumBytesBuf = virtualAddress2;
291     pPmaStream->size = pRecordBufDesc->Size;
292     pPmaStream->pmaChannelIdx = pmaChIdx;
293     pPmaStream->bValid = NV_TRUE;
294 
295     return NV_OK;
296 
297 hwpmStreamoutAllocPmaStream_fail:
298     if (pCpuAddr != NvP64_NULL)
299     {
300         _hwpmStreamoutFreeCpuMapping(pGpu, pKernelHwpm, pPmaVAS, pNumBytesBufDesc, pCpuAddr, pPriv);
301     }
302 
303     if (virtualAddress != 0)
304     {
305         // free va allocations and mappings
306         vaspaceFree(pPmaVAS, virtualAddress);
307     }
308 
309     if (bRefCnted)
310     {
311         status = refcntReleaseReferences(pRefcnt, profilerId, NV_FALSE);
312         if (status != NV_OK)
313         {
314             NV_PRINTF(LEVEL_ERROR, "Releasing pPmaVasRefcnt failed on pmChIdx-%d.\n", pmaChIdx);
315         }
316     }
317 
318     return status;
319 }
320 
321 /*!
322  * @brief: Frees a PMA stream.
323  */
324 NV_STATUS
khwpmStreamoutFreePmaStream_IMPL(OBJGPU * pGpu,KernelHwpm * pKernelHwpm,NvU64 profilerId,HWPM_PMA_STREAM * pPmaStream,NvU32 pmaChIdx)325 khwpmStreamoutFreePmaStream_IMPL
326 (
327     OBJGPU          *pGpu,
328     KernelHwpm      *pKernelHwpm,
329     NvU64            profilerId,
330     HWPM_PMA_STREAM *pPmaStream,
331     NvU32            pmaChIdx
332 )
333 {
334     NV_STATUS   status = NV_OK;
335     OBJVASPACE *pPmaVAS;
336     const NvU32 bpcIdx = pmaChIdx / pKernelHwpm->maxChannelPerCblock;
337     OBJREFCNT  *pRefcnt;
338 
339     NV_ASSERT_OR_RETURN(bpcIdx < pKernelHwpm->maxCblocks, NV_ERR_INVALID_ARGUMENT);
340 
341     pRefcnt = pKernelHwpm->streamoutState[bpcIdx].pPmaVasRefcnt;
342     if (pRefcnt == NULL)
343     {
344         return NV_ERR_INVALID_STATE;
345     }
346 
347     pPmaVAS = pKernelHwpm->streamoutState[bpcIdx].pPmaVAS;
348     if (pPmaVAS == NULL)
349     {
350         return NV_ERR_INVALID_STATE;
351     }
352 
353     if (!pPmaStream->bValid)
354     {
355         return NV_ERR_INVALID_ARGUMENT;
356     }
357 
358     // If there is a CPU address and it was not allocated externally, free
359     // the internal CPU mapping.
360     if ((pPmaStream->pNumBytesCpuAddr != NvP64_NULL) &&
361         (!(pPmaStream->flags & NV_HWPM_STREAM_FLAGS_CPUVA_EXTERNAL)))
362     {
363         _hwpmStreamoutFreeCpuMapping(pGpu, pKernelHwpm, pPmaVAS, pPmaStream->pNumBytesBufDesc, pPmaStream->pNumBytesCpuAddr,
364             pPmaStream->pNumBytesCpuAddrPriv);
365     }
366     pPmaStream->pNumBytesCpuAddr = NvP64_NULL;
367     pPmaStream->pNumBytesCpuAddrPriv = NvP64_NULL;
368 
369     if (pPmaStream->vaddr != 0)
370     {
371         vaspaceFree(pPmaVAS, pPmaStream->vaddr);
372     }
373     pPmaStream->vaddr = 0;
374     pPmaStream->size = 0;
375 
376     memdescDestroy(pPmaStream->pRecordBufDesc);
377     pPmaStream->pRecordBufDesc = NULL;
378     pPmaStream->vaddrRecordBuf = 0;
379 
380     memdescDestroy(pPmaStream->pNumBytesBufDesc);
381     pPmaStream->pNumBytesBufDesc = NULL;
382     pPmaStream->vaddrNumBytesBuf = 0;
383 
384     pPmaStream->pmaChannelIdx = INVALID_PMA_CHANNEL_IDX;
385     pPmaStream->bValid = NV_FALSE;
386 
387     status = refcntReleaseReferences(pRefcnt, profilerId, NV_FALSE);
388     if (status != NV_OK)
389     {
390         NV_PRINTF(LEVEL_ERROR, "Releasing pPmaVasRefcnt failed on pmChIdx-%d.\n", pmaChIdx);
391     }
392 
393     return status;
394 }
395 
396 static NV_STATUS
khwpmInstBlkConstruct(OBJGPU * pGpu,KernelHwpm * pKernelHwpm,NvU32 bpcIdx)397 khwpmInstBlkConstruct(OBJGPU *pGpu, KernelHwpm *pKernelHwpm, NvU32 bpcIdx)
398 {
399     MemoryManager     *pMemoryManager  = GPU_GET_MEMORY_MANAGER(pGpu);
400     NV_STATUS          status;
401     NvU32              allocFlags      = MEMDESC_FLAGS_NONE;
402     MEMORY_DESCRIPTOR *pInstBlkMemDesc = NULL;
403     NvU32              addrSpace       = ADDR_FBMEM;
404     NvU32              attr            = NV_MEMORY_WRITECOMBINED;
405 
406     memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC_4, _HWPM_PMA, pGpu->instLocOverrides4),
407                            "HWPM PMA instblk", &addrSpace, &attr);
408 
409     if ((status = memdescCreate(&pInstBlkMemDesc, pGpu,
410                                 GF100_BUS_INSTANCEBLOCK_SIZE,
411                                 GF100_BUS_INSTANCEBLOCK_SIZE,
412                                 NV_TRUE,
413                                 addrSpace,
414                                 attr,
415                                 allocFlags)) != NV_OK)
416     {
417         goto constructInstBlkMemDesc_fail;
418     }
419 
420     if ((memdescGetAddressSpace(pInstBlkMemDesc) == ADDR_SYSMEM) &&
421         (gpuIsInstanceMemoryAlwaysCached(pGpu)))
422     {
423         memdescSetGpuCacheAttrib(pInstBlkMemDesc, NV_MEMORY_CACHED);
424     }
425 
426     memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_59,
427                     pInstBlkMemDesc);
428     if (status != NV_OK)
429     {
430         NV_PRINTF(LEVEL_ERROR, "couldn't allocate PERF instblk!\n");
431         goto constructInstBlkMemDesc_fail;
432     }
433 
434     status = memmgrMemDescMemSet(pMemoryManager, pInstBlkMemDesc, 0, TRANSFER_FLAGS_NONE);
435     if (NV_OK != status)
436     {
437         goto constructInstBlkMemDesc_fail;
438     }
439 
440     pKernelHwpm->streamoutState[bpcIdx].pInstBlkMemDesc = pInstBlkMemDesc;
441 
442     return NV_OK;
443 
444 constructInstBlkMemDesc_fail:
445     if (pInstBlkMemDesc != NULL)
446     {
447         memdescFree(pInstBlkMemDesc);
448         memdescDestroy(pInstBlkMemDesc);
449     }
450     return status;
451 }
452 
453 static NV_STATUS
khwpmStreamoutInstBlkDestruct(OBJGPU * pGpu,KernelHwpm * pKernelHwpm,NvU32 bpcIdx)454 khwpmStreamoutInstBlkDestruct(OBJGPU *pGpu, KernelHwpm *pKernelHwpm, NvU32 bpcIdx)
455 {
456     // Free the Instblk Surface resources
457     if (pKernelHwpm->streamoutState[bpcIdx].pInstBlkMemDesc != NULL)
458     {
459         memdescFree(pKernelHwpm->streamoutState[bpcIdx].pInstBlkMemDesc);
460         memdescDestroy(pKernelHwpm->streamoutState[bpcIdx].pInstBlkMemDesc);
461         pKernelHwpm->streamoutState[bpcIdx].pInstBlkMemDesc = NULL;
462     }
463 
464     return NV_OK;
465 }
466 
467 /*!
468  * @brief: Creates VA Space and Inst block corresponding to a
469  * given Channel ID (for full SRIOV guest)
470  */
471 NV_STATUS
khwpmStreamoutCreatePmaVaSpace_IMPL(OBJGPU * pGpu,KernelHwpm * pKernelHwpm,NvU32 bpcIdx)472 khwpmStreamoutCreatePmaVaSpace_IMPL
473 (
474     OBJGPU     *pGpu,
475     KernelHwpm *pKernelHwpm,
476     NvU32       bpcIdx
477 )
478 {
479     OBJVMM     *pVmm               = SYS_GET_VMM(SYS_GET_INSTANCE());
480     KernelGmmu *pKernelGmmu        = GPU_GET_KERNEL_GMMU(pGpu);
481     OBJVASPACE *pVAS               = NULL;
482     NvU64       vaSpaceStartAddr   = pKernelHwpm->vaSpaceBase;
483     NvU64       vaRangeLimit       = pKernelHwpm->vaSpaceBase + pKernelHwpm->vaSpaceSize - 1;
484     NvBool      bRootPageDirPinned = NV_FALSE;
485     NvU32       flags;
486     NV_STATUS   status;
487     INST_BLK_INIT_PARAMS params = {0};
488 
489     NV_CHECK_OR_RETURN(LEVEL_ERROR, (bpcIdx < pKernelHwpm->maxCblocks), NV_ERR_INVALID_ARGUMENT);
490     NV_ASSERT_OR_RETURN((pKernelHwpm->streamoutState[bpcIdx].pPmaVAS == NULL), NV_ERR_INVALID_STATE);
491 
492     status = khwpmInstBlkConstruct(pGpu, pKernelHwpm, bpcIdx);
493     if (status != NV_OK)
494     {
495         NV_PRINTF(LEVEL_ERROR,
496                   "Failed to construct PMA Instance block. Status 0x%x\n",
497                   status);
498         goto khwpmStreamoutCreatePmaVaSpace_fail;
499     }
500 
501     // Initialize a heap for PERF VASpace
502     flags = DRF_DEF(_VASPACE, _FLAGS, _BIG_PAGE_SIZE, _DEFAULT) |
503             VASPACE_FLAGS_ENABLE_VMM |
504             VASPACE_FLAGS_HWPM;
505 
506     status = vmmCreateVaspace(pVmm, FERMI_VASPACE_A, 0, NVBIT(pGpu->gpuInstance),
507                               vaSpaceStartAddr, vaRangeLimit, 0, 0, NULL,
508                               flags, &pVAS);
509     if (status != NV_OK)
510     {
511         NV_PRINTF(LEVEL_ERROR,
512                   "Could not construct PMA vaspace object. Status 0x%x\n",
513                   status);
514         pVAS = NULL;
515         goto khwpmStreamoutCreatePmaVaSpace_fail;
516     }
517 
518     status = vaspacePinRootPageDir(pVAS, pGpu);
519     if (status != NV_OK)
520     {
521         NV_PRINTF(LEVEL_ERROR, "Error Locking down VA space root.\n");
522         goto khwpmStreamoutCreatePmaVaSpace_fail;
523     }
524     bRootPageDirPinned = NV_TRUE;
525 
526     status = kgmmuInstBlkInit(pKernelGmmu, pKernelHwpm->streamoutState[bpcIdx].pInstBlkMemDesc,
527                               pVAS, FIFO_PDB_IDX_BASE, &params);
528     if (status != NV_OK)
529     {
530         NV_PRINTF(LEVEL_ERROR,
531                   "Error initializing HWPM PMA Instance Block.\n");
532         goto khwpmStreamoutCreatePmaVaSpace_fail;
533     }
534 
535     pKernelHwpm->streamoutState[bpcIdx].pPmaVAS = pVAS;
536 
537     return NV_OK;
538 
539 khwpmStreamoutCreatePmaVaSpace_fail:
540     if (bRootPageDirPinned)
541     {
542         vaspaceUnpinRootPageDir(pVAS, pGpu);
543     }
544     if (pVAS != NULL)
545     {
546         vmmDestroyVaspace(pVmm, pVAS);
547     }
548     khwpmStreamoutInstBlkDestruct(pGpu, pKernelHwpm, bpcIdx);
549     pKernelHwpm->streamoutState[bpcIdx].pPmaVAS = NULL;
550 
551     return status;
552 }
553 
554 /*!
555  * @brief: Frees VA Space and Inst block (for full SRIOV guest)
556  */
557 NV_STATUS
khwpmStreamoutFreePmaVaSpace_IMPL(OBJGPU * pGpu,KernelHwpm * pKernelHwpm,NvU32 bpcIdx)558 khwpmStreamoutFreePmaVaSpace_IMPL
559 (
560     OBJGPU     *pGpu,
561     KernelHwpm *pKernelHwpm,
562     NvU32       bpcIdx
563 )
564 {
565     OBJSYS     *pSys = SYS_GET_INSTANCE();
566     OBJVMM     *pVmm = SYS_GET_VMM(pSys);
567     OBJVASPACE *pVAS;
568 
569     NV_CHECK_OR_RETURN(LEVEL_ERROR, (bpcIdx < pKernelHwpm->maxCblocks), NV_ERR_INVALID_ARGUMENT);
570 
571     pVAS = pKernelHwpm->streamoutState[bpcIdx].pPmaVAS;
572     if (pVAS != NULL)
573     {
574         vaspaceUnpinRootPageDir(pVAS, pGpu);
575         vmmDestroyVaspace(pVmm, pVAS);
576         khwpmStreamoutInstBlkDestruct(pGpu, pKernelHwpm, bpcIdx);
577         pKernelHwpm->streamoutState[bpcIdx].pPmaVAS = NULL;
578     }
579 
580     // Reset GFID in SRIOV mode
581     if (gpuIsSriovEnabled(pGpu))
582     {
583         khwpmPmaStreamSriovSetGfid_HAL(pGpu, pKernelHwpm, bpcIdx, 0);
584     }
585 
586     return NV_OK;
587 }
588