1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2012-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "core/core.h"
25
26 #include "os/os.h"
27 #include "gpu_mgr/gpu_mgr.h"
28 #include "virtualization/hypervisor/hypervisor.h"
29
30 #include "vgpu/rpc.h"
31 #include "gpu/nvenc/nvencsession.h"
32 #include "rmapi/rs_utils.h"
33 #include "core/locks.h"
34
35 static NvU32 sessionCounter = 0x01;
36
37 static void _gpuNvEncSessionDataProcessingCallback(POBJGPU pGpu, void *data);
38
39 NV_STATUS
nvencsessionConstruct_IMPL(NvencSession * pNvencSession,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)40 nvencsessionConstruct_IMPL
41 (
42 NvencSession *pNvencSession,
43 CALL_CONTEXT *pCallContext,
44 RS_RES_ALLOC_PARAMS_INTERNAL *pParams
45 )
46 {
47 NVA0BC_ALLOC_PARAMETERS *pNvA0BCAllocParams = pParams->pAllocParams;
48 NvHandle hNvencSessionHandle = pCallContext->pResourceRef->hResource;
49 PNVENC_SESSION_LIST_ITEM pNvencSessionListItem = NULL;
50 OBJGPU *pGpu = GPU_RES_GET_GPU(pNvencSession);
51 RsClient *pRsClient = pCallContext->pClient;
52 NvHandle hClient = pRsClient->hClient;
53 RmClient *pClient = dynamicCast(pRsClient, RmClient);
54 NV_STATUS status = NV_OK;
55
56 if (listCount(&(pGpu->nvencSessionList)) >= NV2080_CTRL_GPU_NVENC_SESSION_INFO_MAX_COPYOUT_ENTRIES)
57 {
58 NV_PRINTF(LEVEL_WARNING,
59 "Creating NVENC session above max copyout limit.\n");
60 }
61
62 if (pClient == NULL)
63 {
64 return NV_ERR_INVALID_CLIENT;
65 }
66
67 if (IS_VIRTUAL(pGpu))
68 {
69 NV_RM_RPC_ALLOC_OBJECT(pGpu,
70 pParams->hClient,
71 pParams->hParent,
72 pParams->hResource,
73 pParams->externalClassId,
74 pNvA0BCAllocParams,
75 sizeof(*pNvA0BCAllocParams),
76 status);
77
78 if (status != NV_OK)
79 {
80 return status;
81 }
82 }
83
84 if (pNvA0BCAllocParams->version == NVA0BC_ALLOC_PARAMS_VER_1)
85 {
86 pNvencSession->version = NVA0BC_ALLOC_PARAMS_VER_1;
87 }
88 else
89 {
90 pNvencSession->version = NVA0BC_ALLOC_PARAMS_VER_0;
91 }
92
93 if ((pNvencSession->version == NVA0BC_ALLOC_PARAMS_VER_1) &&
94 (!hypervisorIsVgxHyper()))
95 {
96 status = memGetByHandle(pRsClient, pNvA0BCAllocParams->hMem, &(pNvencSession->pMemory));
97 if (status != NV_OK)
98 {
99 NV_PRINTF(LEVEL_ERROR,
100 "Unable to find mem corresponding to handle : 0x%0x.\n",
101 pNvA0BCAllocParams->hMem);
102 pNvencSession->pMemory = NULL;
103 return status;
104 }
105
106 status = memdescMap(pNvencSession->pMemory->pMemDesc, // Descriptor
107 0, // Offset
108 NVENC_SESSION_INFO_SIZE_V1, // Length
109 NV_TRUE, // Kernel
110 NV_PROTECT_READABLE, // Protect
111 &(pNvencSession->pSessionStatsBuffer), // pAddress
112 &(pNvencSession->pPriv)); // pPriv
113 if (status != NV_OK)
114 {
115 NV_PRINTF(LEVEL_ERROR,
116 "Error mapping memory to CPU VA space, error : 0x%0x.\n",
117 status);
118 pNvencSession->pMemory = NULL;
119 return status;
120 }
121 }
122 else
123 {
124 pNvencSession->pMemory = NULL;
125 pNvencSession->pSessionStatsBuffer = NvP64_NULL;
126 pNvencSession->pPriv = NvP64_NULL;
127 }
128
129 pNvencSessionListItem = listAppendNew(&(pGpu->nvencSessionList));
130
131 if (pNvencSessionListItem == NULL)
132 {
133 return NV_ERR_NO_MEMORY;
134 }
135
136 portMemSet(pNvencSessionListItem, 0, sizeof(NVENC_SESSION_LIST_ITEM));
137
138 /*
139 * When this class is allocated for NMOS or vGPU VM, subProcessId is 0x00.
140 * However when allocated for host vGPU NVRM, processId represents plugin
141 * and subProcessId represent NVENC session process Id within that VM.
142 */
143
144 pNvencSession->handle = hNvencSessionHandle;
145 pNvencSession->nvencSessionEntry.sessionId = sessionCounter++;
146 pNvencSession->nvencSessionEntry.processId = pClient->ProcID;
147 pNvencSession->nvencSessionEntry.subProcessId = pClient->SubProcessID;
148 pNvencSession->nvencSessionEntry.codecType = pNvA0BCAllocParams->codecType;
149 pNvencSession->nvencSessionEntry.hResolution = pNvA0BCAllocParams->hResolution;
150 pNvencSession->nvencSessionEntry.vResolution = pNvA0BCAllocParams->vResolution;
151 pNvencSession->nvencSessionEntry.averageEncodeFps = 0;
152 pNvencSession->nvencSessionEntry.averageEncodeLatency = 0;
153
154 pNvencSessionListItem->hClient = hClient;
155 pNvencSessionListItem->sessionPtr = pNvencSession;
156
157 if ((pNvencSession->version == NVA0BC_ALLOC_PARAMS_VER_1) &&
158 (!hypervisorIsVgxHyper()) &&
159 (listCount(&(pGpu->nvencSessionList)) == 1))
160 {
161 // Register 1Hz timer callback for this GPU.
162 pGpu->bNvEncSessionDataProcessingWorkItemPending = NV_FALSE;
163 status = osSchedule1HzCallback(pGpu,
164 _gpuNvEncSessionDataProcessingCallback,
165 NULL,
166 NV_OS_1HZ_REPEAT);
167 }
168
169 return NV_OK;
170 }
171
172 void
nvencsessionDestruct_IMPL(NvencSession * pNvencSession)173 nvencsessionDestruct_IMPL
174 (
175 NvencSession *pNvencSession
176 )
177 {
178 RS_RES_FREE_PARAMS_INTERNAL *pParams;
179 PNVENC_SESSION_LIST_ITEM pNvencSessionListItem;
180 PNVENC_SESSION_LIST_ITEM pNvencSessionListItemNext;
181 OBJGPU *pGpu = GPU_RES_GET_GPU(pNvencSession);
182 CALL_CONTEXT *pCallContext;
183 RsClient *pRsClient;
184 NvHandle hClient;
185 NvHandle hParent;
186 NvHandle hNvencSessionHandle;
187 NV_STATUS status = NV_OK;
188
189 resGetFreeParams(staticCast(pNvencSession, RsResource), &pCallContext, &pParams);
190
191 pRsClient = pCallContext->pClient;
192 hClient = pRsClient->hClient;
193 hParent = pCallContext->pResourceRef->pParentRef->hResource;
194 hNvencSessionHandle = pCallContext->pResourceRef->hResource;
195
196 if (pNvencSession->pMemory)
197 {
198 memdescUnmap(pNvencSession->pMemory->pMemDesc,
199 NV_TRUE,
200 osGetCurrentProcess(),
201 pNvencSession->pSessionStatsBuffer,
202 pNvencSession->pPriv);
203 }
204
205 if (IS_VIRTUAL(pGpu))
206 {
207 NV_RM_RPC_FREE(pGpu, hClient, hParent, hNvencSessionHandle, status);
208 NV_ASSERT(NV_OK == status);
209 }
210
211 for (pNvencSessionListItem = listHead(&(pGpu->nvencSessionList));
212 pNvencSessionListItem != NULL;
213 pNvencSessionListItem = pNvencSessionListItemNext)
214 {
215 pNvencSessionListItemNext = listNext(&(pGpu->nvencSessionList), pNvencSessionListItem);
216
217 if (pNvencSessionListItem != NULL && (pNvencSessionListItem->sessionPtr == pNvencSession))
218 {
219 listRemove(&(pGpu->nvencSessionList), pNvencSessionListItem);
220 }
221 }
222
223 if ((pNvencSession->version == NVA0BC_ALLOC_PARAMS_VER_1) &&
224 (!hypervisorIsVgxHyper()) &&
225 (listCount(&(pGpu->nvencSessionList)) == 0))
226 {
227 // Stop 1Hz callback for this GPU.
228 osRemove1HzCallback(pGpu, _gpuNvEncSessionDataProcessingCallback, NULL);
229 }
230
231 pParams->status = status;
232 }
233
234 static void
_gpuNvEncSessionProcessBuffer(POBJGPU pGpu,NvencSession * pNvencSession)235 _gpuNvEncSessionProcessBuffer(POBJGPU pGpu, NvencSession *pNvencSession)
236 {
237 NvU32 frameCount;
238 NvU32 currIndex;
239 NvU32 latestFrameIndex;
240 NvU32 latestFrameId;
241 NvU64 latestFrameEndTS;
242 NvU64 processedFrameCount;
243 NvU64 timeTakenToEncodeNs;
244 NvS64 timeDiffFrameTS;
245 NVENC_SESSION_INFO_V1 *pSessionInfoBuffer;
246 NVENC_SESSION_INFO_V1 *pLocalSessionInfoBuffer;
247 NVENC_SESSION_INFO_ENTRY_V1 *pSubmissionTSEntry;
248 NVENC_SESSION_INFO_ENTRY_V1 *pStartTSEntry;
249 NVENC_SESSION_INFO_ENTRY_V1 *pEndTSEntry;
250 NVENC_SESSION_INFO_REGION_1_ENTRY_V1 *pRegion1;
251
252 pSessionInfoBuffer = (NVENC_SESSION_INFO_V1 *)NvP64_VALUE(pNvencSession->pSessionStatsBuffer);
253 if (pSessionInfoBuffer == NULL)
254 {
255 NV_PRINTF(LEVEL_INFO, "GPU : 0x%0x, NvEnc session stats buffer pointer is null.\n", pGpu->gpuId);
256 return;
257 }
258
259 pLocalSessionInfoBuffer = portMemAllocNonPaged(sizeof(NVENC_SESSION_INFO_V1));
260 if (pLocalSessionInfoBuffer == NULL)
261 {
262 NV_PRINTF(LEVEL_INFO, "GPU : 0x%0x, Failed to allocate memory for local stats buffer.\n", pGpu->gpuId);
263 return;
264 }
265
266 // Reset local buffer.
267 portMemSet(pLocalSessionInfoBuffer, 0, sizeof(NVENC_SESSION_INFO_V1));
268
269 // Copy all the frame data from memory allocated by UMD to local buffer.
270 portMemCopy(pLocalSessionInfoBuffer, sizeof(NVENC_SESSION_INFO_V1), pSessionInfoBuffer, sizeof(NVENC_SESSION_INFO_V1));
271
272 pRegion1 = pLocalSessionInfoBuffer->region1.frameInfo;
273
274 // Detect if UMD has not written all NVENC_SESSION_INFO_REGION_1_MAX_ENTRIES_COUNT_V1 entries.
275 // If yes, then pick the index next to last processed index from previous callback.
276 // If not, then we need to find lowest frame no from where we should start reading the timestamp data.
277 if (pRegion1[pNvencSession->lastProcessedIndex].submissionTSEntry.frameId == pNvencSession->lastProcessedFrameId)
278 {
279 currIndex = (pNvencSession->lastProcessedIndex + 1) % NVENC_SESSION_INFO_REGION_1_MAX_ENTRIES_COUNT_V1;
280 }
281 else
282 {
283 NvU32 oldestIndex = NVENC_SESSION_INFO_REGION_1_MAX_ENTRIES_COUNT_V1 - 1;
284 NvU32 minFrameId = pRegion1[oldestIndex].submissionTSEntry.frameId;
285
286 while (oldestIndex > 0)
287 {
288 oldestIndex--;
289 if (minFrameId > pRegion1[oldestIndex].submissionTSEntry.frameId)
290 {
291 minFrameId = pRegion1[oldestIndex].submissionTSEntry.frameId;
292 }
293 else
294 {
295 oldestIndex++;
296 break;
297 }
298 }
299 currIndex = oldestIndex;
300 }
301
302 timeTakenToEncodeNs = 0;
303 processedFrameCount = 0;
304 latestFrameIndex = currIndex;
305 if (pNvencSession->lastProcessedFrameTS == 0)
306 {
307 pNvencSession->lastProcessedFrameTS = pLocalSessionInfoBuffer->region2.frameInfo[currIndex].startTSEntry.timestamp;
308 }
309
310 // Loop through all entries, find correct ones and use them for calculating average FPS and latency.
311 for (frameCount = 0;
312 frameCount < NVENC_SESSION_INFO_REGION_1_MAX_ENTRIES_COUNT_V1;
313 frameCount++, (currIndex = (currIndex + 1) % NVENC_SESSION_INFO_REGION_1_MAX_ENTRIES_COUNT_V1))
314 {
315 pSubmissionTSEntry = &(pLocalSessionInfoBuffer->region1.frameInfo[currIndex].submissionTSEntry);
316 pStartTSEntry = &(pLocalSessionInfoBuffer->region2.frameInfo[currIndex].startTSEntry);
317 pEndTSEntry = &(pLocalSessionInfoBuffer->region2.frameInfo[currIndex].endTSEntry);
318
319 // Validation : Check if last processed frame id is less than current frame id.
320 if (pRegion1[latestFrameIndex].submissionTSEntry.frameId > pSubmissionTSEntry->frameId)
321 {
322 break;
323 }
324
325 // Validation : Check if submission-start-end frame ids match.
326 if ((pSubmissionTSEntry->frameId != pStartTSEntry->frameId) || (pStartTSEntry->frameId != pEndTSEntry->frameId))
327 {
328 continue;
329 }
330 // Validation : Check if submission-start-end timestamps are in incrementing order.
331 if ((pSubmissionTSEntry->timestamp > pStartTSEntry->timestamp) || (pStartTSEntry->timestamp > pEndTSEntry->timestamp))
332 {
333 continue;
334 }
335
336 // Update latest processed frame index.
337 latestFrameIndex = currIndex;
338
339 // Add the difference of end timestamp and submission timestamp to total time taken.
340 timeTakenToEncodeNs += (pEndTSEntry->timestamp - pSubmissionTSEntry->timestamp);
341 processedFrameCount++;
342 }
343
344 if (processedFrameCount > 0)
345 {
346 latestFrameId = pLocalSessionInfoBuffer->region1.frameInfo[latestFrameIndex].submissionTSEntry.frameId;
347 latestFrameEndTS = pLocalSessionInfoBuffer->region2.frameInfo[latestFrameIndex].endTSEntry.timestamp;
348
349 // Calculate average latency.
350 timeTakenToEncodeNs /= processedFrameCount;
351 // averageEncodeLatency is in micro second.
352 pNvencSession->nvencSessionEntry.averageEncodeLatency =
353 ((timeTakenToEncodeNs / 1000) < 0xFFFFFFFF) ? (timeTakenToEncodeNs / 1000) : 0xFFFFFFFF;
354
355 // Calculate average FPS.
356 // Find time difference between latest processed frame end TS and last processed frame end TS in last callback.
357 // Same is done for findng processed frame count.
358 // This would provide a better average FPS value.
359 timeDiffFrameTS = latestFrameEndTS - pNvencSession->lastProcessedFrameTS;
360 if (timeDiffFrameTS > 0)
361 {
362 processedFrameCount = latestFrameId - pNvencSession->lastProcessedFrameId;
363 pNvencSession->nvencSessionEntry.averageEncodeFps = ((processedFrameCount * 1000 * 1000 * 1000) / timeDiffFrameTS);
364 }
365 else
366 {
367 pNvencSession->nvencSessionEntry.averageEncodeLatency = 0;
368 pNvencSession->nvencSessionEntry.averageEncodeFps = 0;
369 }
370
371 pNvencSession->lastProcessedIndex = latestFrameIndex;
372 pNvencSession->lastProcessedFrameId = latestFrameId;
373 pNvencSession->lastProcessedFrameTS = latestFrameEndTS;
374 }
375 else
376 {
377 pNvencSession->nvencSessionEntry.averageEncodeLatency = 0;
378 pNvencSession->nvencSessionEntry.averageEncodeFps = 0;
379 }
380
381 portMemFree(pLocalSessionInfoBuffer);
382 }
383
_gpuNvEncSessionDataProcessing(OBJGPU * pGpu)384 static void _gpuNvEncSessionDataProcessing(OBJGPU *pGpu)
385 {
386 PNVENC_SESSION_LIST_ITEM pNvencSessionListItem;
387 PNVENC_SESSION_LIST_ITEM pNvencSessionListItemNext;
388 NV_STATUS status = NV_OK;
389 NVA0BC_CTRL_NVENC_SW_SESSION_UPDATE_INFO_PARAMS rpcParams = {0};
390
391 // Loop through all sessions.
392 for (pNvencSessionListItem = listHead(&(pGpu->nvencSessionList));
393 pNvencSessionListItem != NULL;
394 pNvencSessionListItem = pNvencSessionListItemNext)
395 {
396 pNvencSessionListItemNext = listNext(&(pGpu->nvencSessionList), pNvencSessionListItem);
397 if (pNvencSessionListItem != NULL && pNvencSessionListItem->sessionPtr)
398 {
399 _gpuNvEncSessionProcessBuffer(pGpu, pNvencSessionListItem->sessionPtr);
400
401 if (IS_VIRTUAL(pGpu) && (pGpu->encSessionStatsReportingState == NV2080_CTRL_GPU_INFO_NVENC_STATS_REPORTING_STATE_ENABLED))
402 {
403 rpcParams.hResolution = pNvencSessionListItem->sessionPtr->nvencSessionEntry.hResolution;
404 rpcParams.vResolution = pNvencSessionListItem->sessionPtr->nvencSessionEntry.vResolution;
405 rpcParams.averageEncodeLatency = pNvencSessionListItem->sessionPtr->nvencSessionEntry.averageEncodeLatency;
406 rpcParams.averageEncodeFps = pNvencSessionListItem->sessionPtr->nvencSessionEntry.averageEncodeFps;
407 rpcParams.timestampBufferSize = 0;
408
409 NV_RM_RPC_CONTROL(pGpu,
410 pNvencSessionListItem->hClient,
411 pNvencSessionListItem->sessionPtr->handle,
412 NVA0BC_CTRL_CMD_NVENC_SW_SESSION_UPDATE_INFO,
413 &rpcParams,
414 sizeof(NVA0BC_CTRL_NVENC_SW_SESSION_UPDATE_INFO_PARAMS),
415 status);
416 }
417 }
418 }
419 }
420
_gpuNvEncSessionDataProcessingWorkItem(NvU32 gpuInstance,void * pArgs)421 static void _gpuNvEncSessionDataProcessingWorkItem(NvU32 gpuInstance, void *pArgs)
422 {
423 OBJGPU *pGpu;
424
425 pGpu = gpumgrGetGpu(gpuInstance);
426 if (pGpu == NULL)
427 {
428 NV_PRINTF(LEVEL_ERROR, "NVENC Sessions GPU instance is invalid\n");
429 return;
430 }
431
432 _gpuNvEncSessionDataProcessing(pGpu);
433 pGpu->bNvEncSessionDataProcessingWorkItemPending = NV_FALSE;
434 }
435
436 static void
_gpuNvEncSessionDataProcessingCallback(POBJGPU pGpu,void * data)437 _gpuNvEncSessionDataProcessingCallback(POBJGPU pGpu, void *data)
438 {
439 NV_STATUS status;
440
441 if (!pGpu->bNvEncSessionDataProcessingWorkItemPending)
442 {
443 status = osQueueWorkItemWithFlags(pGpu,
444 _gpuNvEncSessionDataProcessingWorkItem,
445 NULL,
446 OS_QUEUE_WORKITEM_FLAGS_LOCK_SEMA
447 | OS_QUEUE_WORKITEM_FLAGS_LOCK_GPU_GROUP_DEVICE_RW);
448 if (status != NV_OK)
449 {
450 NV_PRINTF(LEVEL_ERROR,
451 "NVENC session queuing async callback failed, status=%x\n",
452 status);
453
454 // Call directly to do NVENC session data processing
455 _gpuNvEncSessionDataProcessing(pGpu);
456 }
457 else
458 {
459 pGpu->bNvEncSessionDataProcessingWorkItemPending = NV_TRUE;
460 }
461 }
462 }
463