1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2012-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "core/core.h"
25 
26 #include "os/os.h"
27 #include "gpu_mgr/gpu_mgr.h"
28 #include "virtualization/hypervisor/hypervisor.h"
29 
30 #include "vgpu/rpc.h"
31 #include "gpu/nvenc/nvencsession.h"
32 #include "rmapi/rs_utils.h"
33 #include "core/locks.h"
34 
35 static NvU32 sessionCounter = 0x01;
36 
37 static void _gpuNvEncSessionDataProcessingCallback(POBJGPU pGpu, void *data);
38 
39 NV_STATUS
nvencsessionConstruct_IMPL(NvencSession * pNvencSession,CALL_CONTEXT * pCallContext,RS_RES_ALLOC_PARAMS_INTERNAL * pParams)40 nvencsessionConstruct_IMPL
41 (
42     NvencSession                 *pNvencSession,
43     CALL_CONTEXT                 *pCallContext,
44     RS_RES_ALLOC_PARAMS_INTERNAL *pParams
45 )
46 {
47     NVA0BC_ALLOC_PARAMETERS *pNvA0BCAllocParams = pParams->pAllocParams;
48     NvHandle hNvencSessionHandle = pCallContext->pResourceRef->hResource;
49     PNVENC_SESSION_LIST_ITEM pNvencSessionListItem = NULL;
50     OBJGPU    *pGpu = GPU_RES_GET_GPU(pNvencSession);
51     RsClient  *pRsClient = pCallContext->pClient;
52     NvHandle  hClient = pRsClient->hClient;
53     RmClient  *pClient = dynamicCast(pRsClient, RmClient);
54     NV_STATUS status = NV_OK;
55 
56     if (listCount(&(pGpu->nvencSessionList)) >= NV2080_CTRL_GPU_NVENC_SESSION_INFO_MAX_COPYOUT_ENTRIES)
57     {
58         NV_PRINTF(LEVEL_WARNING,
59                   "Creating NVENC session above max copyout limit.\n");
60     }
61 
62     if (pClient == NULL)
63     {
64         return NV_ERR_INVALID_CLIENT;
65     }
66 
67     if (IS_VIRTUAL(pGpu))
68     {
69         NV_RM_RPC_ALLOC_OBJECT(pGpu,
70                                pParams->hClient,
71                                pParams->hParent,
72                                pParams->hResource,
73                                pParams->externalClassId,
74                                pNvA0BCAllocParams,
75                                sizeof(*pNvA0BCAllocParams),
76                                status);
77 
78         if (status != NV_OK)
79         {
80             return status;
81         }
82     }
83 
84     if (pNvA0BCAllocParams->version == NVA0BC_ALLOC_PARAMS_VER_1)
85     {
86         pNvencSession->version = NVA0BC_ALLOC_PARAMS_VER_1;
87     }
88     else
89     {
90         pNvencSession->version = NVA0BC_ALLOC_PARAMS_VER_0;
91     }
92 
93     if ((pNvencSession->version == NVA0BC_ALLOC_PARAMS_VER_1) &&
94         (!hypervisorIsVgxHyper()))
95     {
96         status = memGetByHandle(pRsClient, pNvA0BCAllocParams->hMem, &(pNvencSession->pMemory));
97         if (status != NV_OK)
98         {
99             NV_PRINTF(LEVEL_ERROR,
100                       "Unable to find mem corresponding to handle : 0x%0x.\n",
101                       pNvA0BCAllocParams->hMem);
102             pNvencSession->pMemory = NULL;
103             return status;
104         }
105 
106         status = memdescMap(pNvencSession->pMemory->pMemDesc,      // Descriptor
107                             0,                                     // Offset
108                             NVENC_SESSION_INFO_SIZE_V1,            // Length
109                             NV_TRUE,                               // Kernel
110                             NV_PROTECT_READABLE,                   // Protect
111                             &(pNvencSession->pSessionStatsBuffer), // pAddress
112                             &(pNvencSession->pPriv));              // pPriv
113         if (status != NV_OK)
114         {
115             NV_PRINTF(LEVEL_ERROR,
116                       "Error mapping memory to CPU VA space, error : 0x%0x.\n",
117                       status);
118             pNvencSession->pMemory = NULL;
119             return status;
120         }
121     }
122     else
123     {
124         pNvencSession->pMemory              = NULL;
125         pNvencSession->pSessionStatsBuffer  = NvP64_NULL;
126         pNvencSession->pPriv                = NvP64_NULL;
127     }
128 
129     pNvencSessionListItem = listAppendNew(&(pGpu->nvencSessionList));
130 
131     if (pNvencSessionListItem == NULL)
132     {
133         return NV_ERR_NO_MEMORY;
134     }
135 
136     portMemSet(pNvencSessionListItem, 0, sizeof(NVENC_SESSION_LIST_ITEM));
137 
138     /*
139      * When this class is allocated for NMOS or vGPU VM, subProcessId is 0x00.
140      * However when allocated for host vGPU NVRM, processId represents plugin
141      * and subProcessId represent NVENC session process Id within that VM.
142      */
143 
144     pNvencSession->handle                                   = hNvencSessionHandle;
145     pNvencSession->nvencSessionEntry.sessionId              = sessionCounter++;
146     pNvencSession->nvencSessionEntry.processId              = pClient->ProcID;
147     pNvencSession->nvencSessionEntry.subProcessId           = pClient->SubProcessID;
148     pNvencSession->nvencSessionEntry.codecType              = pNvA0BCAllocParams->codecType;
149     pNvencSession->nvencSessionEntry.hResolution            = pNvA0BCAllocParams->hResolution;
150     pNvencSession->nvencSessionEntry.vResolution            = pNvA0BCAllocParams->vResolution;
151     pNvencSession->nvencSessionEntry.averageEncodeFps       = 0;
152     pNvencSession->nvencSessionEntry.averageEncodeLatency   = 0;
153 
154     pNvencSessionListItem->hClient    = hClient;
155     pNvencSessionListItem->sessionPtr = pNvencSession;
156 
157     if ((pNvencSession->version  == NVA0BC_ALLOC_PARAMS_VER_1) &&
158         (!hypervisorIsVgxHyper())                              &&
159         (listCount(&(pGpu->nvencSessionList)) == 1))
160     {
161         // Register 1Hz timer callback for this GPU.
162         pGpu->bNvEncSessionDataProcessingWorkItemPending = NV_FALSE;
163         status = osSchedule1HzCallback(pGpu,
164                                        _gpuNvEncSessionDataProcessingCallback,
165                                        NULL,
166                                        NV_OS_1HZ_REPEAT);
167     }
168 
169     return NV_OK;
170 }
171 
172 void
nvencsessionDestruct_IMPL(NvencSession * pNvencSession)173 nvencsessionDestruct_IMPL
174 (
175     NvencSession *pNvencSession
176 )
177 {
178     RS_RES_FREE_PARAMS_INTERNAL *pParams;
179     PNVENC_SESSION_LIST_ITEM  pNvencSessionListItem;
180     PNVENC_SESSION_LIST_ITEM  pNvencSessionListItemNext;
181     OBJGPU        *pGpu = GPU_RES_GET_GPU(pNvencSession);
182     CALL_CONTEXT  *pCallContext;
183     RsClient      *pRsClient;
184     NvHandle        hClient;
185     NvHandle        hParent;
186     NvHandle        hNvencSessionHandle;
187     NV_STATUS status = NV_OK;
188 
189     resGetFreeParams(staticCast(pNvencSession, RsResource), &pCallContext, &pParams);
190 
191     pRsClient           = pCallContext->pClient;
192     hClient             = pRsClient->hClient;
193     hParent             = pCallContext->pResourceRef->pParentRef->hResource;
194     hNvencSessionHandle = pCallContext->pResourceRef->hResource;
195 
196     if (pNvencSession->pMemory)
197     {
198         memdescUnmap(pNvencSession->pMemory->pMemDesc,
199                      NV_TRUE,
200                      osGetCurrentProcess(),
201                      pNvencSession->pSessionStatsBuffer,
202                      pNvencSession->pPriv);
203     }
204 
205     if (IS_VIRTUAL(pGpu))
206     {
207         NV_RM_RPC_FREE(pGpu, hClient, hParent, hNvencSessionHandle, status);
208         NV_ASSERT(NV_OK == status);
209     }
210 
211     for (pNvencSessionListItem = listHead(&(pGpu->nvencSessionList));
212          pNvencSessionListItem != NULL;
213          pNvencSessionListItem = pNvencSessionListItemNext)
214     {
215         pNvencSessionListItemNext = listNext(&(pGpu->nvencSessionList), pNvencSessionListItem);
216 
217         if (pNvencSessionListItem != NULL && (pNvencSessionListItem->sessionPtr == pNvencSession))
218         {
219             listRemove(&(pGpu->nvencSessionList), pNvencSessionListItem);
220         }
221     }
222 
223     if ((pNvencSession->version  == NVA0BC_ALLOC_PARAMS_VER_1) &&
224         (!hypervisorIsVgxHyper())                              &&
225         (listCount(&(pGpu->nvencSessionList)) == 0))
226     {
227         // Stop 1Hz callback for this GPU.
228         osRemove1HzCallback(pGpu, _gpuNvEncSessionDataProcessingCallback, NULL);
229     }
230 
231     pParams->status = status;
232 }
233 
234 static void
_gpuNvEncSessionProcessBuffer(POBJGPU pGpu,NvencSession * pNvencSession)235 _gpuNvEncSessionProcessBuffer(POBJGPU pGpu, NvencSession *pNvencSession)
236 {
237     NvU32 frameCount;
238     NvU32 currIndex;
239     NvU32 latestFrameIndex;
240     NvU32 latestFrameId;
241     NvU64 latestFrameEndTS;
242     NvU64 processedFrameCount;
243     NvU64 timeTakenToEncodeNs;
244     NvS64 timeDiffFrameTS;
245     NVENC_SESSION_INFO_V1 *pSessionInfoBuffer;
246     NVENC_SESSION_INFO_V1 *pLocalSessionInfoBuffer;
247     NVENC_SESSION_INFO_ENTRY_V1 *pSubmissionTSEntry;
248     NVENC_SESSION_INFO_ENTRY_V1 *pStartTSEntry;
249     NVENC_SESSION_INFO_ENTRY_V1 *pEndTSEntry;
250     NVENC_SESSION_INFO_REGION_1_ENTRY_V1 *pRegion1;
251 
252     pSessionInfoBuffer = (NVENC_SESSION_INFO_V1 *)NvP64_VALUE(pNvencSession->pSessionStatsBuffer);
253     if (pSessionInfoBuffer == NULL)
254     {
255         NV_PRINTF(LEVEL_INFO, "GPU : 0x%0x, NvEnc session stats buffer pointer is null.\n", pGpu->gpuId);
256         return;
257     }
258 
259     pLocalSessionInfoBuffer = portMemAllocNonPaged(sizeof(NVENC_SESSION_INFO_V1));
260     if (pLocalSessionInfoBuffer == NULL)
261     {
262         NV_PRINTF(LEVEL_INFO, "GPU : 0x%0x, Failed to allocate memory for local stats buffer.\n", pGpu->gpuId);
263         return;
264     }
265 
266     // Reset local buffer.
267     portMemSet(pLocalSessionInfoBuffer, 0, sizeof(NVENC_SESSION_INFO_V1));
268 
269     // Copy all the frame data from memory allocated by UMD to local buffer.
270     portMemCopy(pLocalSessionInfoBuffer, sizeof(NVENC_SESSION_INFO_V1), pSessionInfoBuffer, sizeof(NVENC_SESSION_INFO_V1));
271 
272     pRegion1 = pLocalSessionInfoBuffer->region1.frameInfo;
273 
274     // Detect if UMD has not written all NVENC_SESSION_INFO_REGION_1_MAX_ENTRIES_COUNT_V1 entries.
275     // If yes, then pick the index next to last processed index from previous callback.
276     // If not, then we need to find lowest frame no from where we should start reading the timestamp data.
277     if (pRegion1[pNvencSession->lastProcessedIndex].submissionTSEntry.frameId == pNvencSession->lastProcessedFrameId)
278     {
279         currIndex = (pNvencSession->lastProcessedIndex + 1) % NVENC_SESSION_INFO_REGION_1_MAX_ENTRIES_COUNT_V1;
280     }
281     else
282     {
283         NvU32 oldestIndex = NVENC_SESSION_INFO_REGION_1_MAX_ENTRIES_COUNT_V1 - 1;
284         NvU32 minFrameId  = pRegion1[oldestIndex].submissionTSEntry.frameId;
285 
286         while (oldestIndex > 0)
287         {
288             oldestIndex--;
289             if (minFrameId > pRegion1[oldestIndex].submissionTSEntry.frameId)
290             {
291                 minFrameId = pRegion1[oldestIndex].submissionTSEntry.frameId;
292             }
293             else
294             {
295                 oldestIndex++;
296                 break;
297             }
298         }
299         currIndex = oldestIndex;
300     }
301 
302     timeTakenToEncodeNs  = 0;
303     processedFrameCount  = 0;
304     latestFrameIndex     = currIndex;
305     if (pNvencSession->lastProcessedFrameTS == 0)
306     {
307         pNvencSession->lastProcessedFrameTS = pLocalSessionInfoBuffer->region2.frameInfo[currIndex].startTSEntry.timestamp;
308     }
309 
310     // Loop through all entries, find correct ones and use them for calculating average FPS and latency.
311     for (frameCount = 0;
312          frameCount < NVENC_SESSION_INFO_REGION_1_MAX_ENTRIES_COUNT_V1;
313          frameCount++, (currIndex = (currIndex + 1) % NVENC_SESSION_INFO_REGION_1_MAX_ENTRIES_COUNT_V1))
314     {
315         pSubmissionTSEntry = &(pLocalSessionInfoBuffer->region1.frameInfo[currIndex].submissionTSEntry);
316         pStartTSEntry      = &(pLocalSessionInfoBuffer->region2.frameInfo[currIndex].startTSEntry);
317         pEndTSEntry        = &(pLocalSessionInfoBuffer->region2.frameInfo[currIndex].endTSEntry);
318 
319         // Validation : Check if last processed frame id is less than current frame id.
320         if (pRegion1[latestFrameIndex].submissionTSEntry.frameId > pSubmissionTSEntry->frameId)
321         {
322             break;
323         }
324 
325         // Validation : Check if submission-start-end frame ids match.
326         if ((pSubmissionTSEntry->frameId != pStartTSEntry->frameId) || (pStartTSEntry->frameId != pEndTSEntry->frameId))
327         {
328             continue;
329         }
330         // Validation : Check if submission-start-end timestamps are in incrementing order.
331         if ((pSubmissionTSEntry->timestamp > pStartTSEntry->timestamp) || (pStartTSEntry->timestamp > pEndTSEntry->timestamp))
332         {
333             continue;
334         }
335 
336         // Update latest processed frame index.
337         latestFrameIndex = currIndex;
338 
339         // Add the difference of end timestamp and submission timestamp to total time taken.
340         timeTakenToEncodeNs += (pEndTSEntry->timestamp - pSubmissionTSEntry->timestamp);
341         processedFrameCount++;
342     }
343 
344     if (processedFrameCount > 0)
345     {
346         latestFrameId    = pLocalSessionInfoBuffer->region1.frameInfo[latestFrameIndex].submissionTSEntry.frameId;
347         latestFrameEndTS = pLocalSessionInfoBuffer->region2.frameInfo[latestFrameIndex].endTSEntry.timestamp;
348 
349         // Calculate average latency.
350         timeTakenToEncodeNs /= processedFrameCount;
351         // averageEncodeLatency is in micro second.
352         pNvencSession->nvencSessionEntry.averageEncodeLatency =
353             ((timeTakenToEncodeNs / 1000) < 0xFFFFFFFF) ? (timeTakenToEncodeNs / 1000) : 0xFFFFFFFF;
354 
355         // Calculate average FPS.
356         // Find time difference between latest processed frame end TS and last processed frame end TS in last callback.
357         // Same is done for findng processed frame count.
358         // This would provide a better average FPS value.
359         timeDiffFrameTS = latestFrameEndTS - pNvencSession->lastProcessedFrameTS;
360         if (timeDiffFrameTS > 0)
361         {
362             processedFrameCount = latestFrameId - pNvencSession->lastProcessedFrameId;
363             pNvencSession->nvencSessionEntry.averageEncodeFps = ((processedFrameCount * 1000 * 1000 * 1000) / timeDiffFrameTS);
364         }
365         else
366         {
367             pNvencSession->nvencSessionEntry.averageEncodeLatency = 0;
368             pNvencSession->nvencSessionEntry.averageEncodeFps     = 0;
369         }
370 
371         pNvencSession->lastProcessedIndex   = latestFrameIndex;
372         pNvencSession->lastProcessedFrameId = latestFrameId;
373         pNvencSession->lastProcessedFrameTS = latestFrameEndTS;
374     }
375     else
376     {
377         pNvencSession->nvencSessionEntry.averageEncodeLatency = 0;
378         pNvencSession->nvencSessionEntry.averageEncodeFps     = 0;
379     }
380 
381     portMemFree(pLocalSessionInfoBuffer);
382 }
383 
_gpuNvEncSessionDataProcessing(OBJGPU * pGpu)384 static void _gpuNvEncSessionDataProcessing(OBJGPU *pGpu)
385 {
386     PNVENC_SESSION_LIST_ITEM  pNvencSessionListItem;
387     PNVENC_SESSION_LIST_ITEM  pNvencSessionListItemNext;
388     NV_STATUS status = NV_OK;
389     NVA0BC_CTRL_NVENC_SW_SESSION_UPDATE_INFO_PARAMS rpcParams = {0};
390 
391     // Loop through all sessions.
392     for (pNvencSessionListItem = listHead(&(pGpu->nvencSessionList));
393          pNvencSessionListItem != NULL;
394          pNvencSessionListItem = pNvencSessionListItemNext)
395     {
396         pNvencSessionListItemNext = listNext(&(pGpu->nvencSessionList), pNvencSessionListItem);
397         if (pNvencSessionListItem != NULL && pNvencSessionListItem->sessionPtr)
398         {
399             _gpuNvEncSessionProcessBuffer(pGpu, pNvencSessionListItem->sessionPtr);
400 
401             if (IS_VIRTUAL(pGpu) && (pGpu->encSessionStatsReportingState == NV2080_CTRL_GPU_INFO_NVENC_STATS_REPORTING_STATE_ENABLED))
402             {
403                 rpcParams.hResolution           = pNvencSessionListItem->sessionPtr->nvencSessionEntry.hResolution;
404                 rpcParams.vResolution           = pNvencSessionListItem->sessionPtr->nvencSessionEntry.vResolution;
405                 rpcParams.averageEncodeLatency  = pNvencSessionListItem->sessionPtr->nvencSessionEntry.averageEncodeLatency;
406                 rpcParams.averageEncodeFps      = pNvencSessionListItem->sessionPtr->nvencSessionEntry.averageEncodeFps;
407                 rpcParams.timestampBufferSize   = 0;
408 
409                 NV_RM_RPC_CONTROL(pGpu,
410                                   pNvencSessionListItem->hClient,
411                                   pNvencSessionListItem->sessionPtr->handle,
412                                   NVA0BC_CTRL_CMD_NVENC_SW_SESSION_UPDATE_INFO,
413                                   &rpcParams,
414                                   sizeof(NVA0BC_CTRL_NVENC_SW_SESSION_UPDATE_INFO_PARAMS),
415                                   status);
416             }
417         }
418     }
419 }
420 
_gpuNvEncSessionDataProcessingWorkItem(NvU32 gpuInstance,void * pArgs)421 static void _gpuNvEncSessionDataProcessingWorkItem(NvU32 gpuInstance, void *pArgs)
422 {
423     OBJGPU *pGpu;
424 
425     pGpu = gpumgrGetGpu(gpuInstance);
426     if (pGpu == NULL)
427     {
428         NV_PRINTF(LEVEL_ERROR, "NVENC Sessions GPU instance is invalid\n");
429         return;
430     }
431 
432     _gpuNvEncSessionDataProcessing(pGpu);
433     pGpu->bNvEncSessionDataProcessingWorkItemPending = NV_FALSE;
434 }
435 
436 static void
_gpuNvEncSessionDataProcessingCallback(POBJGPU pGpu,void * data)437 _gpuNvEncSessionDataProcessingCallback(POBJGPU pGpu, void *data)
438 {
439     NV_STATUS   status;
440 
441     if (!pGpu->bNvEncSessionDataProcessingWorkItemPending)
442     {
443         status = osQueueWorkItemWithFlags(pGpu,
444                                           _gpuNvEncSessionDataProcessingWorkItem,
445                                           NULL,
446                                           OS_QUEUE_WORKITEM_FLAGS_LOCK_SEMA
447                                           | OS_QUEUE_WORKITEM_FLAGS_LOCK_GPU_GROUP_DEVICE_RW);
448         if (status != NV_OK)
449         {
450             NV_PRINTF(LEVEL_ERROR,
451                       "NVENC session queuing async callback failed, status=%x\n",
452                       status);
453 
454             // Call directly to do NVENC session data processing
455             _gpuNvEncSessionDataProcessing(pGpu);
456         }
457         else
458         {
459             pGpu->bNvEncSessionDataProcessingWorkItemPending = NV_TRUE;
460         }
461     }
462 }
463