1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "gpu/hwpm/profiler_v2.h"
25 #include "gpu/hwpm/kern_hwpm.h"
26 #include "vgpu/rpc.h"
27 
28 static NV_INLINE NvBool
_isDeviceProfilingPermitted(OBJGPU * pGpu,ProfilerBase * pProf,API_SECURITY_INFO * pSecInfo)29 _isDeviceProfilingPermitted(OBJGPU *pGpu, ProfilerBase *pProf, API_SECURITY_INFO *pSecInfo)
30 {
31     if (pSecInfo->privLevel >= RS_PRIV_LEVEL_USER_ROOT)
32     {
33         return NV_TRUE;
34     }
35 
36     if (!gpuIsRmProfilingPrivileged(pGpu))
37     {
38         return NV_TRUE;
39     }
40 
41     return NV_FALSE;
42 }
43 
44 static NvBool
_isMemoryProfilingPermitted(OBJGPU * pGpu,ProfilerBase * pProf)45 _isMemoryProfilingPermitted(OBJGPU *pGpu, ProfilerBase *pProf)
46 {
47     NvBool bSmcGpuPartitioningEnabled = IS_MIG_IN_USE(pGpu);
48     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
49     Device *pDevice = GPU_RES_GET_DEVICE(pProf);
50 
51     if (bSmcGpuPartitioningEnabled && !kmigmgrIsDeviceUsingDeviceProfiling(pGpu, pKernelMIGManager, pDevice))
52     {
53         MIG_INSTANCE_REF ref;
54 
55         if (kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager, pDevice, &ref) != NV_OK)
56             return NV_FALSE;
57 
58         if (!kmigmgrIsMIGReferenceValid(&ref))
59             return NV_FALSE;
60 
61         NV_ASSERT_OR_RETURN((ref.pKernelMIGGpuInstance != NULL) && (ref.pMIGComputeInstance != NULL), NV_FALSE);
62         return (ref.pKernelMIGGpuInstance->resourceAllocation.gpcCount ==
63                 ref.pMIGComputeInstance->resourceAllocation.gpcCount);
64 
65     }
66 
67     return NV_TRUE;
68 }
69 
70 NV_STATUS
profilerBaseConstruct_IMPL(ProfilerBase * pProf,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)71 profilerBaseConstruct_IMPL
72 (
73     ProfilerBase *pProf,
74     CALL_CONTEXT *pCallContext,
75     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
76 )
77 {
78     return profilerBaseConstructState_HAL(pProf, pCallContext, pParams);
79 }
80 
81 NV_STATUS
profilerBaseConstructState_IMPL(ProfilerBase * pProf,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)82 profilerBaseConstructState_IMPL
83 (
84     ProfilerBase *pProf,
85     CALL_CONTEXT *pCallContext,
86     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
87 )
88 {
89     RsClient *pRsClient = pCallContext->pClient;
90 
91     pProf->profilerId = NV_REQUESTER_CLIENT_OBJECT(pRsClient->hClient, pCallContext->pResourceRef->hResource);
92     pProf->bMmaBoostDisabled = NV_FALSE;
93 
94     return NV_OK;
95 }
96 
97 void
profilerBaseDestruct_IMPL(ProfilerBase * pProf)98 profilerBaseDestruct_IMPL
99 (
100     ProfilerBase *pProf
101 )
102 {
103     profilerBaseDestructState_HAL(pProf);
104 }
105 
106 static NV_STATUS
_profilerPollForUpdatedMembytes(ProfilerBase * pProfBase,OBJGPU * pGpu,KernelHwpm * pKernelHwpm,NvU32 pmaChIdx)107 _profilerPollForUpdatedMembytes(ProfilerBase *pProfBase, OBJGPU *pGpu, KernelHwpm *pKernelHwpm, NvU32 pmaChIdx)
108 {
109     NV_STATUS status = NV_OK;
110     RMTIMEOUT timeout = {0};
111     volatile NvU32 *pMemBytesAddr = NvP64_VALUE(pProfBase->pPmaStreamList[pmaChIdx].pNumBytesCpuAddr);
112 
113     if (pMemBytesAddr == NULL)
114     {
115         NV_PRINTF(LEVEL_ERROR, "Invalid MEM_BYTES_ADDR.\n");
116         return NV_ERR_INVALID_STATE;
117     }
118 
119     threadStateResetTimeout(pGpu);
120     gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, 0);
121 
122     while (*pMemBytesAddr == NVB0CC_AVAILABLE_BYTES_DEFAULT_VALUE)
123     {
124         if (status == NV_ERR_TIMEOUT)
125         {
126             NV_PRINTF(LEVEL_ERROR,
127                       "timeout occurred while waiting for PM streamout to idle.\n");
128             break;
129         }
130         osSpinLoop();
131         status = gpuCheckTimeout(pGpu, &timeout);
132     }
133 
134     NV_PRINTF(LEVEL_INFO, "status=0x%08x, *MEM_BYTES_ADDR=0x%08x.\n", status,
135               *pMemBytesAddr);
136 
137     return status;
138 }
139 
140 /*
141  * This function does the following:
142  * 1. Initialize membytes buffer on guest, ensuring no membytes streamout is in progress.
143  * 2. Issue RPC to vGPU host to idle PMA channel and trigger membytes streaming.
144  * 3. If required, wait on guest until updated membytes value is received
145  */
profilerBaseQuiesceStreamout_IMPL(ProfilerBase * pProf,OBJGPU * pGpu,KernelHwpm * pKernelHwpm,NvU32 pmaChIdx)146 NV_STATUS profilerBaseQuiesceStreamout_IMPL(ProfilerBase *pProf, OBJGPU *pGpu, KernelHwpm *pKernelHwpm, NvU32 pmaChIdx)
147 {
148     NV_STATUS rmStatus = NV_OK;
149     CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
150     NVB0CC_CTRL_INTERNAL_QUIESCE_PMA_CHANNEL_PARAMS pmaIdleParams = {0};
151 
152     if (pProf->pPmaStreamList == NULL)
153         return NV_ERR_INVALID_STATE;
154 
155     volatile NvU32 *pMemBytesAddr = NvP64_VALUE(pProf->pPmaStreamList[pmaChIdx].pNumBytesCpuAddr);
156 
157     if (pMemBytesAddr == NULL)
158     {
159         NV_PRINTF(LEVEL_ERROR, "Invalid MEM_BYTES_ADDR.\n");
160         return NV_ERR_INVALID_STATE;
161     }
162 
163     // Check if any membytes streamout is in progress
164     if (*pMemBytesAddr == NVB0CC_AVAILABLE_BYTES_DEFAULT_VALUE)
165     {
166         // Complete any pending membytes streamout
167         rmStatus = _profilerPollForUpdatedMembytes(pProf, pGpu, pKernelHwpm, pmaChIdx);
168     }
169 
170     *pMemBytesAddr = NVB0CC_AVAILABLE_BYTES_DEFAULT_VALUE;
171 
172     pmaIdleParams.pmaChannelIdx = pmaChIdx;
173 
174     // Issue RPC to quiesce PMA channel
175     NV_RM_RPC_CONTROL(pGpu,
176                       pCallContext->pClient->hClient,
177                       pCallContext->pResourceRef->hResource,
178                       NVB0CC_CTRL_CMD_INTERNAL_QUIESCE_PMA_CHANNEL,
179                       &pmaIdleParams, sizeof(NVB0CC_CTRL_INTERNAL_QUIESCE_PMA_CHANNEL_PARAMS),
180                       rmStatus);
181 
182     if (rmStatus != NV_OK)
183     {
184         NV_PRINTF(LEVEL_ERROR, "Waiting for PMA to be idle failed with error 0x%x\n",
185                   rmStatus);
186         return rmStatus;
187     }
188 
189     // If membytes streaming was triggered, wait on guest for it to complete
190     if (pmaIdleParams.bMembytesPollingRequired)
191     {
192         rmStatus = _profilerPollForUpdatedMembytes(pProf, pGpu, pKernelHwpm, pmaChIdx);
193     }
194 
195     return rmStatus;
196 }
197 
198 void
profilerBaseDestructState_VF(ProfilerBase * pProf)199 profilerBaseDestructState_VF
200 (
201     ProfilerBase *pProf
202 )
203 {
204     OBJGPU *pGpu = GPU_RES_GET_GPU(pProf);
205     KernelHwpm *pKernelHwpm = GPU_GET_KERNEL_HWPM(pGpu);
206     NvU32 pmaChIdx;
207     CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
208     NV_STATUS rmStatus = NV_OK;
209 
210     if (pProf->pPmaStreamList == NULL)
211         return;
212 
213     // Handle quiesce streamout on guest, then issue RPC to free Profiler
214     // object on host, which will handle rest of the teardown
215     for (pmaChIdx = 0; pmaChIdx < pKernelHwpm->maxPmaChannels; pmaChIdx++)
216     {
217         if (!pProf->pPmaStreamList[pmaChIdx].bValid)
218         {
219             continue;
220         }
221 
222         rmStatus = profilerBaseQuiesceStreamout(pProf, pGpu, pKernelHwpm, pmaChIdx);
223     }
224 
225     // Issue RPC to vGPU host to free Profiler object allocated on host
226     NV_RM_RPC_FREE(pGpu,
227                    pCallContext->pClient->hClient,
228                    pCallContext->pResourceRef->pParentRef->hResource,
229                    pCallContext->pResourceRef->hResource,
230                    rmStatus);
231 
232     // Free membytes CPU mapping on guest
233     for (pmaChIdx = 0; pmaChIdx < pKernelHwpm->maxPmaChannels; pmaChIdx++)
234     {
235         if (!pProf->pPmaStreamList[pmaChIdx].bValid)
236         {
237             continue;
238         }
239 
240         if (IS_SRIOV_FULL_GUEST(pGpu))
241         {
242             khwpmStreamoutFreePmaStream(pGpu, pKernelHwpm, pProf->profilerId,
243                                         &pProf->pPmaStreamList[pmaChIdx], pmaChIdx);
244             continue;
245         }
246 
247         if (pProf->pPmaStreamList[pmaChIdx].pNumBytesCpuAddr != NvP64_NULL )
248         {
249             memdescUnmap(pProf->pPmaStreamList[pmaChIdx].pNumBytesBufDesc, NV_TRUE, osGetCurrentProcess(),
250                          pProf->pPmaStreamList[pmaChIdx].pNumBytesCpuAddr,
251                          pProf->pPmaStreamList[pmaChIdx].pNumBytesCpuAddrPriv);
252         }
253 
254         if (pProf->pPmaStreamList[pmaChIdx].pNumBytesBufDesc != NULL )
255         {
256             memdescFree(pProf->pPmaStreamList[pmaChIdx].pNumBytesBufDesc);
257             memdescDestroy(pProf->pPmaStreamList[pmaChIdx].pNumBytesBufDesc);
258         }
259 
260         pProf->pPmaStreamList[pmaChIdx].bValid = NV_FALSE;
261     }
262 
263     portMemFree(pProf->pPmaStreamList);
264     portMemFree(pProf->pBindPointAllocated);
265 }
266 
267 NV_STATUS
profilerDevConstruct_IMPL(ProfilerDev * pProfDev,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)268 profilerDevConstruct_IMPL
269 (
270     ProfilerDev *pProfDev,
271     CALL_CONTEXT *pCallContext,
272     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
273 )
274 {
275     PROFILER_CLIENT_PERMISSIONS clientPermissions = {0};
276 
277     if (!profilerDevQueryCapabilities_HAL(pProfDev, pCallContext, pParams,
278                                             &clientPermissions))
279     {
280         return NV_ERR_INSUFFICIENT_PERMISSIONS;
281     }
282 
283     return profilerDevConstructState_HAL(pProfDev, pCallContext, pParams, clientPermissions);
284 }
285 
286 NvBool
profilerDevQueryCapabilities_IMPL(ProfilerDev * pProfDev,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams,PROFILER_CLIENT_PERMISSIONS * pClientPermissions)287 profilerDevQueryCapabilities_IMPL
288 (
289     ProfilerDev *pProfDev,
290     CALL_CONTEXT *pCallContext,
291     RS_RES_ALLOC_PARAMS_INTERNAL *pParams,
292     PROFILER_CLIENT_PERMISSIONS *pClientPermissions
293 )
294 {
295     OBJGPU              *pGpu                   = GPU_RES_GET_GPU(pProfDev);
296     ProfilerBase        *pProfBase              = staticCast(pProfDev, ProfilerBase);
297     API_SECURITY_INFO   *pSecInfo               = pParams->pSecInfo;
298     NvBool               bAnyProfilingPermitted = NV_FALSE;
299 
300     pClientPermissions->bMemoryProfilingPermitted =
301         _isMemoryProfilingPermitted(pGpu, pProfBase);
302 
303     pClientPermissions->bAdminProfilingPermitted = NV_FALSE;
304     if (pSecInfo->privLevel >= RS_PRIV_LEVEL_USER_ROOT)
305     {
306         bAnyProfilingPermitted = NV_TRUE;
307         pClientPermissions->bAdminProfilingPermitted = NV_TRUE;
308     }
309 
310     pClientPermissions->bDevProfilingPermitted =
311         _isDeviceProfilingPermitted(pGpu, pProfBase, pSecInfo);
312 
313     if (pClientPermissions->bDevProfilingPermitted)
314     {
315         bAnyProfilingPermitted = NV_TRUE;
316     }
317 
318     return bAnyProfilingPermitted;
319 }
320 
321 /*
322  * To be called on vGPU guest only
323  * Profiler object will not be fully initialized on vGPU guest,
324  * this request will be passed on to vGPU host.
325  * Initialize pPmaStreamList on guest to store details PMA stream
326  */
327 static NV_STATUS
_profilerDevConstructVgpuGuest(ProfilerBase * pProfBase,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)328 _profilerDevConstructVgpuGuest
329 (
330     ProfilerBase *pProfBase,
331     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
332 )
333 {
334     OBJGPU *pGpu = GPU_RES_GET_GPU(pProfBase);
335     HWPM_PMA_STREAM *pPmaStreamList = NULL;
336     NvBool *pBindPointAllocated = NULL;
337 
338     // Allocate the pPmaStreamList to store info about memaddr buffer CPU mapping
339     pPmaStreamList = portMemAllocNonPaged(sizeof(HWPM_PMA_STREAM) * pGpu->pKernelHwpm->maxPmaChannels);
340     if (pPmaStreamList == NULL)
341     {
342         return NV_ERR_NO_MEMORY;
343     }
344 
345     portMemSet(pPmaStreamList, 0, sizeof(HWPM_PMA_STREAM) * pGpu->pKernelHwpm->maxPmaChannels);
346 
347     pBindPointAllocated = portMemAllocNonPaged(sizeof(NvBool) * pGpu->pKernelHwpm->maxPmaChannels);
348     if (pBindPointAllocated == NULL)
349     {
350         portMemFree(pPmaStreamList);
351         return NV_ERR_NO_MEMORY;
352     }
353 
354     portMemSet(pBindPointAllocated, NV_FALSE, sizeof(NvBool) * pGpu->pKernelHwpm->maxPmaChannels);
355 
356     pProfBase->pPmaStreamList = pPmaStreamList;
357     pProfBase->pBindPointAllocated = pBindPointAllocated;
358 
359     return NV_OK;
360 }
361 
362 NV_STATUS
profilerDevConstructState_VF(ProfilerDev * pProfDev,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams,PROFILER_CLIENT_PERMISSIONS clientPermissions)363 profilerDevConstructState_VF
364 (
365     ProfilerDev *pProfDev,
366     CALL_CONTEXT *pCallContext,
367     RS_RES_ALLOC_PARAMS_INTERNAL *pParams,
368     PROFILER_CLIENT_PERMISSIONS clientPermissions
369 )
370 {
371     OBJGPU          *pGpu       = GPU_RES_GET_GPU(pProfDev);
372     ProfilerBase    *pProfBase  = staticCast(pProfDev, ProfilerBase);
373     NV_STATUS        rmStatus   = NV_OK;
374 
375     NV_ASSERT_OK_OR_GOTO(rmStatus,
376                          _profilerDevConstructVgpuGuest(pProfBase, pParams),
377                          profilerDevConstruct_VF_exit);
378 
379     // Issue RPC to allocate Profiler object on vGPU host as well
380     NV_RM_RPC_ALLOC_OBJECT(pGpu,
381                            pCallContext->pClient->hClient,
382                            pCallContext->pResourceRef->pParentRef->hResource,
383                            pCallContext->pResourceRef->hResource,
384                            MAXWELL_PROFILER_DEVICE,
385                            pParams->pAllocParams,
386                            pParams->paramsSize,
387                            rmStatus);
388 
389 profilerDevConstruct_VF_exit:
390     return rmStatus;
391 }
392 
393 NV_STATUS
profilerDevConstructState_IMPL(ProfilerDev * pProfDev,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pAllocParams,PROFILER_CLIENT_PERMISSIONS clientPermissions)394 profilerDevConstructState_IMPL
395 (
396     ProfilerDev *pProfDev,
397     CALL_CONTEXT *pCallContext,
398     RS_RES_ALLOC_PARAMS_INTERNAL *pAllocParams,
399     PROFILER_CLIENT_PERMISSIONS clientPermissions
400 )
401 {
402     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, profilerDevConstructStatePrologue_HAL(pProfDev,
403                             pCallContext, pAllocParams));
404 
405     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, profilerDevConstructStateInterlude_HAL(pProfDev,
406                             pCallContext, pAllocParams, clientPermissions));
407 
408     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, profilerDevConstructStateEpilogue_HAL(pProfDev,
409                             pCallContext, pAllocParams));
410 
411     return NV_OK;
412 }
413 
414 NV_STATUS
profilerDevConstructStatePrologue_FWCLIENT(ProfilerDev * pProfDev,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pAllocParams)415 profilerDevConstructStatePrologue_FWCLIENT
416 (
417     ProfilerDev *pProfDev,
418     CALL_CONTEXT *pCallContext,
419     RS_RES_ALLOC_PARAMS_INTERNAL *pAllocParams
420 )
421 {
422     OBJGPU     *pGpu        = GPU_RES_GET_GPU(pProfDev);
423     NvHandle    hClient     = RES_GET_CLIENT_HANDLE(pProfDev);
424     NvHandle    hParent     = RES_GET_PARENT_HANDLE(pProfDev);
425     NvHandle    hObject     = RES_GET_HANDLE(pProfDev);
426     NvU32       class       = RES_GET_EXT_CLASS_ID(pProfDev);
427     NV_STATUS   status      = NV_OK;
428 
429     NV_RM_RPC_ALLOC_OBJECT(pGpu, hClient, hParent, hObject, class,
430         pAllocParams->pAllocParams, pAllocParams->paramsSize, status);
431 
432     return status;
433 }
434 
435 NV_STATUS
profilerDevConstructStateInterlude_IMPL(ProfilerDev * pProfDev,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pAllocParams,PROFILER_CLIENT_PERMISSIONS clientPermissions)436 profilerDevConstructStateInterlude_IMPL
437 (
438     ProfilerDev *pProfDev,
439     CALL_CONTEXT *pCallContext,
440     RS_RES_ALLOC_PARAMS_INTERNAL *pAllocParams,
441     PROFILER_CLIENT_PERMISSIONS clientPermissions
442 )
443 {
444     OBJGPU         *pGpu        = GPU_RES_GET_GPU(pProfDev);
445     RM_API         *pRmApi      = GPU_GET_PHYSICAL_RMAPI(pGpu);
446     NvHandle        hClient     = RES_GET_CLIENT_HANDLE(pProfDev);
447     NvHandle        hObject     = RES_GET_HANDLE(pProfDev);
448 
449     NVB0CC_CTRL_INTERNAL_PERMISSIONS_INIT_PARAMS params = {0};
450 
451     params.bDevProfilingPermitted = clientPermissions.bDevProfilingPermitted;
452     params.bAdminProfilingPermitted = clientPermissions.bAdminProfilingPermitted;
453     params.bMemoryProfilingPermitted = clientPermissions.bMemoryProfilingPermitted;
454 
455     return pRmApi->Control(pRmApi,
456                            hClient,
457                            hObject,
458                            NVB0CC_CTRL_CMD_INTERNAL_PERMISSIONS_INIT,
459                            &params, sizeof(params));
460 }
461 
462 void
profilerDevDestruct_IMPL(ProfilerDev * pProfDev)463 profilerDevDestruct_IMPL
464 (
465     ProfilerDev *pProfDev
466 )
467 {
468     profilerDevDestructState_HAL(pProfDev);
469 }
470 
471 void
profilerDevDestructState_FWCLIENT(ProfilerDev * pProfDev)472 profilerDevDestructState_FWCLIENT
473 (
474     ProfilerDev *pProfDev
475 )
476 {
477     NvHandle                     hClient;
478     NvHandle                     hParent;
479     NvHandle                     hObject;
480     RS_RES_FREE_PARAMS_INTERNAL *pParams;
481     CALL_CONTEXT                *pCallContext;
482     OBJGPU                      *pGpu            = GPU_RES_GET_GPU(pProfDev);
483     NV_STATUS                    status          = NV_OK;
484 
485     resGetFreeParams(staticCast(pProfDev, RsResource), &pCallContext, &pParams);
486     hClient = pCallContext->pClient->hClient;
487     hParent = pCallContext->pResourceRef->pParentRef->hResource;
488     hObject = pCallContext->pResourceRef->hResource;
489 
490     NV_RM_RPC_FREE(pGpu, hClient, hParent, hObject, status);
491 }
492