1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "gpu/hwpm/profiler_v2.h"
25 #include "gpu/hwpm/kern_hwpm.h"
26 #include "vgpu/rpc.h"
27
28 static NV_INLINE NvBool
_isDeviceProfilingPermitted(OBJGPU * pGpu,ProfilerBase * pProf,API_SECURITY_INFO * pSecInfo)29 _isDeviceProfilingPermitted(OBJGPU *pGpu, ProfilerBase *pProf, API_SECURITY_INFO *pSecInfo)
30 {
31 if (pSecInfo->privLevel >= RS_PRIV_LEVEL_USER_ROOT)
32 {
33 return NV_TRUE;
34 }
35
36 if (!gpuIsRmProfilingPrivileged(pGpu))
37 {
38 return NV_TRUE;
39 }
40
41 return NV_FALSE;
42 }
43
44 static NvBool
_isMemoryProfilingPermitted(OBJGPU * pGpu,ProfilerBase * pProf)45 _isMemoryProfilingPermitted(OBJGPU *pGpu, ProfilerBase *pProf)
46 {
47 NvBool bSmcGpuPartitioningEnabled = IS_MIG_IN_USE(pGpu);
48 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
49 Device *pDevice = GPU_RES_GET_DEVICE(pProf);
50
51 if (bSmcGpuPartitioningEnabled && !kmigmgrIsDeviceUsingDeviceProfiling(pGpu, pKernelMIGManager, pDevice))
52 {
53 MIG_INSTANCE_REF ref;
54
55 if (kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager, pDevice, &ref) != NV_OK)
56 return NV_FALSE;
57
58 if (!kmigmgrIsMIGReferenceValid(&ref))
59 return NV_FALSE;
60
61 NV_ASSERT_OR_RETURN((ref.pKernelMIGGpuInstance != NULL) && (ref.pMIGComputeInstance != NULL), NV_FALSE);
62 return (ref.pKernelMIGGpuInstance->resourceAllocation.gpcCount ==
63 ref.pMIGComputeInstance->resourceAllocation.gpcCount);
64
65 }
66
67 return NV_TRUE;
68 }
69
70 NV_STATUS
profilerBaseConstruct_IMPL(ProfilerBase * pProf,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)71 profilerBaseConstruct_IMPL
72 (
73 ProfilerBase *pProf,
74 CALL_CONTEXT *pCallContext,
75 RS_RES_ALLOC_PARAMS_INTERNAL *pParams
76 )
77 {
78 return profilerBaseConstructState_HAL(pProf, pCallContext, pParams);
79 }
80
81 NV_STATUS
profilerBaseConstructState_IMPL(ProfilerBase * pProf,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)82 profilerBaseConstructState_IMPL
83 (
84 ProfilerBase *pProf,
85 CALL_CONTEXT *pCallContext,
86 RS_RES_ALLOC_PARAMS_INTERNAL *pParams
87 )
88 {
89 RsClient *pRsClient = pCallContext->pClient;
90
91 pProf->profilerId = NV_REQUESTER_CLIENT_OBJECT(pRsClient->hClient, pCallContext->pResourceRef->hResource);
92 pProf->bMmaBoostDisabled = NV_FALSE;
93
94 return NV_OK;
95 }
96
97 void
profilerBaseDestruct_IMPL(ProfilerBase * pProf)98 profilerBaseDestruct_IMPL
99 (
100 ProfilerBase *pProf
101 )
102 {
103 profilerBaseDestructState_HAL(pProf);
104 }
105
106 static NV_STATUS
_profilerPollForUpdatedMembytes(ProfilerBase * pProfBase,OBJGPU * pGpu,KernelHwpm * pKernelHwpm,NvU32 pmaChIdx)107 _profilerPollForUpdatedMembytes(ProfilerBase *pProfBase, OBJGPU *pGpu, KernelHwpm *pKernelHwpm, NvU32 pmaChIdx)
108 {
109 NV_STATUS status = NV_OK;
110 RMTIMEOUT timeout = {0};
111 volatile NvU32 *pMemBytesAddr = NvP64_VALUE(pProfBase->pPmaStreamList[pmaChIdx].pNumBytesCpuAddr);
112
113 if (pMemBytesAddr == NULL)
114 {
115 NV_PRINTF(LEVEL_ERROR, "Invalid MEM_BYTES_ADDR.\n");
116 return NV_ERR_INVALID_STATE;
117 }
118
119 threadStateResetTimeout(pGpu);
120 gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, 0);
121
122 while (*pMemBytesAddr == NVB0CC_AVAILABLE_BYTES_DEFAULT_VALUE)
123 {
124 if (status == NV_ERR_TIMEOUT)
125 {
126 NV_PRINTF(LEVEL_ERROR,
127 "timeout occurred while waiting for PM streamout to idle.\n");
128 break;
129 }
130 osSpinLoop();
131 status = gpuCheckTimeout(pGpu, &timeout);
132 }
133
134 NV_PRINTF(LEVEL_INFO, "status=0x%08x, *MEM_BYTES_ADDR=0x%08x.\n", status,
135 *pMemBytesAddr);
136
137 return status;
138 }
139
140 /*
141 * This function does the following:
142 * 1. Initialize membytes buffer on guest, ensuring no membytes streamout is in progress.
143 * 2. Issue RPC to vGPU host to idle PMA channel and trigger membytes streaming.
144 * 3. If required, wait on guest until updated membytes value is received
145 */
profilerBaseQuiesceStreamout_IMPL(ProfilerBase * pProf,OBJGPU * pGpu,KernelHwpm * pKernelHwpm,NvU32 pmaChIdx)146 NV_STATUS profilerBaseQuiesceStreamout_IMPL(ProfilerBase *pProf, OBJGPU *pGpu, KernelHwpm *pKernelHwpm, NvU32 pmaChIdx)
147 {
148 NV_STATUS rmStatus = NV_OK;
149 CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
150 NVB0CC_CTRL_INTERNAL_QUIESCE_PMA_CHANNEL_PARAMS pmaIdleParams = {0};
151
152 if (pProf->pPmaStreamList == NULL)
153 return NV_ERR_INVALID_STATE;
154
155 volatile NvU32 *pMemBytesAddr = NvP64_VALUE(pProf->pPmaStreamList[pmaChIdx].pNumBytesCpuAddr);
156
157 if (pMemBytesAddr == NULL)
158 {
159 NV_PRINTF(LEVEL_ERROR, "Invalid MEM_BYTES_ADDR.\n");
160 return NV_ERR_INVALID_STATE;
161 }
162
163 // Check if any membytes streamout is in progress
164 if (*pMemBytesAddr == NVB0CC_AVAILABLE_BYTES_DEFAULT_VALUE)
165 {
166 // Complete any pending membytes streamout
167 rmStatus = _profilerPollForUpdatedMembytes(pProf, pGpu, pKernelHwpm, pmaChIdx);
168 }
169
170 *pMemBytesAddr = NVB0CC_AVAILABLE_BYTES_DEFAULT_VALUE;
171
172 pmaIdleParams.pmaChannelIdx = pmaChIdx;
173
174 // Issue RPC to quiesce PMA channel
175 NV_RM_RPC_CONTROL(pGpu,
176 pCallContext->pClient->hClient,
177 pCallContext->pResourceRef->hResource,
178 NVB0CC_CTRL_CMD_INTERNAL_QUIESCE_PMA_CHANNEL,
179 &pmaIdleParams, sizeof(NVB0CC_CTRL_INTERNAL_QUIESCE_PMA_CHANNEL_PARAMS),
180 rmStatus);
181
182 if (rmStatus != NV_OK)
183 {
184 NV_PRINTF(LEVEL_ERROR, "Waiting for PMA to be idle failed with error 0x%x\n",
185 rmStatus);
186 return rmStatus;
187 }
188
189 // If membytes streaming was triggered, wait on guest for it to complete
190 if (pmaIdleParams.bMembytesPollingRequired)
191 {
192 rmStatus = _profilerPollForUpdatedMembytes(pProf, pGpu, pKernelHwpm, pmaChIdx);
193 }
194
195 return rmStatus;
196 }
197
198 void
profilerBaseDestructState_VF(ProfilerBase * pProf)199 profilerBaseDestructState_VF
200 (
201 ProfilerBase *pProf
202 )
203 {
204 OBJGPU *pGpu = GPU_RES_GET_GPU(pProf);
205 KernelHwpm *pKernelHwpm = GPU_GET_KERNEL_HWPM(pGpu);
206 NvU32 pmaChIdx;
207 CALL_CONTEXT *pCallContext = resservGetTlsCallContext();
208 NV_STATUS rmStatus = NV_OK;
209
210 if (pProf->pPmaStreamList == NULL)
211 return;
212
213 // Handle quiesce streamout on guest, then issue RPC to free Profiler
214 // object on host, which will handle rest of the teardown
215 for (pmaChIdx = 0; pmaChIdx < pKernelHwpm->maxPmaChannels; pmaChIdx++)
216 {
217 if (!pProf->pPmaStreamList[pmaChIdx].bValid)
218 {
219 continue;
220 }
221
222 rmStatus = profilerBaseQuiesceStreamout(pProf, pGpu, pKernelHwpm, pmaChIdx);
223 }
224
225 // Issue RPC to vGPU host to free Profiler object allocated on host
226 NV_RM_RPC_FREE(pGpu,
227 pCallContext->pClient->hClient,
228 pCallContext->pResourceRef->pParentRef->hResource,
229 pCallContext->pResourceRef->hResource,
230 rmStatus);
231
232 // Free membytes CPU mapping on guest
233 for (pmaChIdx = 0; pmaChIdx < pKernelHwpm->maxPmaChannels; pmaChIdx++)
234 {
235 if (!pProf->pPmaStreamList[pmaChIdx].bValid)
236 {
237 continue;
238 }
239
240 if (IS_SRIOV_FULL_GUEST(pGpu))
241 {
242 khwpmStreamoutFreePmaStream(pGpu, pKernelHwpm, pProf->profilerId,
243 &pProf->pPmaStreamList[pmaChIdx], pmaChIdx);
244 continue;
245 }
246
247 if (pProf->pPmaStreamList[pmaChIdx].pNumBytesCpuAddr != NvP64_NULL )
248 {
249 memdescUnmap(pProf->pPmaStreamList[pmaChIdx].pNumBytesBufDesc, NV_TRUE, osGetCurrentProcess(),
250 pProf->pPmaStreamList[pmaChIdx].pNumBytesCpuAddr,
251 pProf->pPmaStreamList[pmaChIdx].pNumBytesCpuAddrPriv);
252 }
253
254 if (pProf->pPmaStreamList[pmaChIdx].pNumBytesBufDesc != NULL )
255 {
256 memdescFree(pProf->pPmaStreamList[pmaChIdx].pNumBytesBufDesc);
257 memdescDestroy(pProf->pPmaStreamList[pmaChIdx].pNumBytesBufDesc);
258 }
259
260 pProf->pPmaStreamList[pmaChIdx].bValid = NV_FALSE;
261 }
262
263 portMemFree(pProf->pPmaStreamList);
264 portMemFree(pProf->pBindPointAllocated);
265 }
266
267 NV_STATUS
profilerDevConstruct_IMPL(ProfilerDev * pProfDev,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)268 profilerDevConstruct_IMPL
269 (
270 ProfilerDev *pProfDev,
271 CALL_CONTEXT *pCallContext,
272 RS_RES_ALLOC_PARAMS_INTERNAL *pParams
273 )
274 {
275 PROFILER_CLIENT_PERMISSIONS clientPermissions = {0};
276
277 if (!profilerDevQueryCapabilities_HAL(pProfDev, pCallContext, pParams,
278 &clientPermissions))
279 {
280 return NV_ERR_INSUFFICIENT_PERMISSIONS;
281 }
282
283 return profilerDevConstructState_HAL(pProfDev, pCallContext, pParams, clientPermissions);
284 }
285
286 NvBool
profilerDevQueryCapabilities_IMPL(ProfilerDev * pProfDev,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams,PROFILER_CLIENT_PERMISSIONS * pClientPermissions)287 profilerDevQueryCapabilities_IMPL
288 (
289 ProfilerDev *pProfDev,
290 CALL_CONTEXT *pCallContext,
291 RS_RES_ALLOC_PARAMS_INTERNAL *pParams,
292 PROFILER_CLIENT_PERMISSIONS *pClientPermissions
293 )
294 {
295 OBJGPU *pGpu = GPU_RES_GET_GPU(pProfDev);
296 ProfilerBase *pProfBase = staticCast(pProfDev, ProfilerBase);
297 API_SECURITY_INFO *pSecInfo = pParams->pSecInfo;
298 NvBool bAnyProfilingPermitted = NV_FALSE;
299
300 pClientPermissions->bMemoryProfilingPermitted =
301 _isMemoryProfilingPermitted(pGpu, pProfBase);
302
303 pClientPermissions->bAdminProfilingPermitted = NV_FALSE;
304 if (pSecInfo->privLevel >= RS_PRIV_LEVEL_USER_ROOT)
305 {
306 bAnyProfilingPermitted = NV_TRUE;
307 pClientPermissions->bAdminProfilingPermitted = NV_TRUE;
308 }
309
310 pClientPermissions->bDevProfilingPermitted =
311 _isDeviceProfilingPermitted(pGpu, pProfBase, pSecInfo);
312
313 if (pClientPermissions->bDevProfilingPermitted)
314 {
315 bAnyProfilingPermitted = NV_TRUE;
316 }
317
318 return bAnyProfilingPermitted;
319 }
320
321 /*
322 * To be called on vGPU guest only
323 * Profiler object will not be fully initialized on vGPU guest,
324 * this request will be passed on to vGPU host.
325 * Initialize pPmaStreamList on guest to store details PMA stream
326 */
327 static NV_STATUS
_profilerDevConstructVgpuGuest(ProfilerBase * pProfBase,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)328 _profilerDevConstructVgpuGuest
329 (
330 ProfilerBase *pProfBase,
331 RS_RES_ALLOC_PARAMS_INTERNAL *pParams
332 )
333 {
334 OBJGPU *pGpu = GPU_RES_GET_GPU(pProfBase);
335 HWPM_PMA_STREAM *pPmaStreamList = NULL;
336 NvBool *pBindPointAllocated = NULL;
337
338 // Allocate the pPmaStreamList to store info about memaddr buffer CPU mapping
339 pPmaStreamList = portMemAllocNonPaged(sizeof(HWPM_PMA_STREAM) * pGpu->pKernelHwpm->maxPmaChannels);
340 if (pPmaStreamList == NULL)
341 {
342 return NV_ERR_NO_MEMORY;
343 }
344
345 portMemSet(pPmaStreamList, 0, sizeof(HWPM_PMA_STREAM) * pGpu->pKernelHwpm->maxPmaChannels);
346
347 pBindPointAllocated = portMemAllocNonPaged(sizeof(NvBool) * pGpu->pKernelHwpm->maxPmaChannels);
348 if (pBindPointAllocated == NULL)
349 {
350 portMemFree(pPmaStreamList);
351 return NV_ERR_NO_MEMORY;
352 }
353
354 portMemSet(pBindPointAllocated, NV_FALSE, sizeof(NvBool) * pGpu->pKernelHwpm->maxPmaChannels);
355
356 pProfBase->pPmaStreamList = pPmaStreamList;
357 pProfBase->pBindPointAllocated = pBindPointAllocated;
358
359 return NV_OK;
360 }
361
362 NV_STATUS
profilerDevConstructState_VF(ProfilerDev * pProfDev,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams,PROFILER_CLIENT_PERMISSIONS clientPermissions)363 profilerDevConstructState_VF
364 (
365 ProfilerDev *pProfDev,
366 CALL_CONTEXT *pCallContext,
367 RS_RES_ALLOC_PARAMS_INTERNAL *pParams,
368 PROFILER_CLIENT_PERMISSIONS clientPermissions
369 )
370 {
371 OBJGPU *pGpu = GPU_RES_GET_GPU(pProfDev);
372 ProfilerBase *pProfBase = staticCast(pProfDev, ProfilerBase);
373 NV_STATUS rmStatus = NV_OK;
374
375 NV_ASSERT_OK_OR_GOTO(rmStatus,
376 _profilerDevConstructVgpuGuest(pProfBase, pParams),
377 profilerDevConstruct_VF_exit);
378
379 // Issue RPC to allocate Profiler object on vGPU host as well
380 NV_RM_RPC_ALLOC_OBJECT(pGpu,
381 pCallContext->pClient->hClient,
382 pCallContext->pResourceRef->pParentRef->hResource,
383 pCallContext->pResourceRef->hResource,
384 MAXWELL_PROFILER_DEVICE,
385 pParams->pAllocParams,
386 pParams->paramsSize,
387 rmStatus);
388
389 profilerDevConstruct_VF_exit:
390 return rmStatus;
391 }
392
393 NV_STATUS
profilerDevConstructState_IMPL(ProfilerDev * pProfDev,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pAllocParams,PROFILER_CLIENT_PERMISSIONS clientPermissions)394 profilerDevConstructState_IMPL
395 (
396 ProfilerDev *pProfDev,
397 CALL_CONTEXT *pCallContext,
398 RS_RES_ALLOC_PARAMS_INTERNAL *pAllocParams,
399 PROFILER_CLIENT_PERMISSIONS clientPermissions
400 )
401 {
402 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, profilerDevConstructStatePrologue_HAL(pProfDev,
403 pCallContext, pAllocParams));
404
405 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, profilerDevConstructStateInterlude_HAL(pProfDev,
406 pCallContext, pAllocParams, clientPermissions));
407
408 NV_CHECK_OK_OR_RETURN(LEVEL_ERROR, profilerDevConstructStateEpilogue_HAL(pProfDev,
409 pCallContext, pAllocParams));
410
411 return NV_OK;
412 }
413
414 NV_STATUS
profilerDevConstructStatePrologue_FWCLIENT(ProfilerDev * pProfDev,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pAllocParams)415 profilerDevConstructStatePrologue_FWCLIENT
416 (
417 ProfilerDev *pProfDev,
418 CALL_CONTEXT *pCallContext,
419 RS_RES_ALLOC_PARAMS_INTERNAL *pAllocParams
420 )
421 {
422 OBJGPU *pGpu = GPU_RES_GET_GPU(pProfDev);
423 NvHandle hClient = RES_GET_CLIENT_HANDLE(pProfDev);
424 NvHandle hParent = RES_GET_PARENT_HANDLE(pProfDev);
425 NvHandle hObject = RES_GET_HANDLE(pProfDev);
426 NvU32 class = RES_GET_EXT_CLASS_ID(pProfDev);
427 NV_STATUS status = NV_OK;
428
429 NV_RM_RPC_ALLOC_OBJECT(pGpu, hClient, hParent, hObject, class,
430 pAllocParams->pAllocParams, pAllocParams->paramsSize, status);
431
432 return status;
433 }
434
435 NV_STATUS
profilerDevConstructStateInterlude_IMPL(ProfilerDev * pProfDev,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pAllocParams,PROFILER_CLIENT_PERMISSIONS clientPermissions)436 profilerDevConstructStateInterlude_IMPL
437 (
438 ProfilerDev *pProfDev,
439 CALL_CONTEXT *pCallContext,
440 RS_RES_ALLOC_PARAMS_INTERNAL *pAllocParams,
441 PROFILER_CLIENT_PERMISSIONS clientPermissions
442 )
443 {
444 OBJGPU *pGpu = GPU_RES_GET_GPU(pProfDev);
445 RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
446 NvHandle hClient = RES_GET_CLIENT_HANDLE(pProfDev);
447 NvHandle hObject = RES_GET_HANDLE(pProfDev);
448
449 NVB0CC_CTRL_INTERNAL_PERMISSIONS_INIT_PARAMS params = {0};
450
451 params.bDevProfilingPermitted = clientPermissions.bDevProfilingPermitted;
452 params.bAdminProfilingPermitted = clientPermissions.bAdminProfilingPermitted;
453 params.bMemoryProfilingPermitted = clientPermissions.bMemoryProfilingPermitted;
454
455 return pRmApi->Control(pRmApi,
456 hClient,
457 hObject,
458 NVB0CC_CTRL_CMD_INTERNAL_PERMISSIONS_INIT,
459 ¶ms, sizeof(params));
460 }
461
462 void
profilerDevDestruct_IMPL(ProfilerDev * pProfDev)463 profilerDevDestruct_IMPL
464 (
465 ProfilerDev *pProfDev
466 )
467 {
468 profilerDevDestructState_HAL(pProfDev);
469 }
470
471 void
profilerDevDestructState_FWCLIENT(ProfilerDev * pProfDev)472 profilerDevDestructState_FWCLIENT
473 (
474 ProfilerDev *pProfDev
475 )
476 {
477 NvHandle hClient;
478 NvHandle hParent;
479 NvHandle hObject;
480 RS_RES_FREE_PARAMS_INTERNAL *pParams;
481 CALL_CONTEXT *pCallContext;
482 OBJGPU *pGpu = GPU_RES_GET_GPU(pProfDev);
483 NV_STATUS status = NV_OK;
484
485 resGetFreeParams(staticCast(pProfDev, RsResource), &pCallContext, &pParams);
486 hClient = pCallContext->pClient->hClient;
487 hParent = pCallContext->pResourceRef->pParentRef->hResource;
488 hObject = pCallContext->pResourceRef->hResource;
489
490 NV_RM_RPC_FREE(pGpu, hClient, hParent, hObject, status);
491 }
492