1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2008-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 //******************************************************************************
25 //
26 //   Description:
27 //       This module implements event ring buffers and
28 //       the vGPU interrupt handler.
29 //
30 //******************************************************************************
31 
32 #include "vgpu/vgpu_events.h"
33 
34 #include "kernel/core/core.h"
35 #include "kernel/core/locks.h"
36 #include "kernel/core/system.h"
37 #include "kernel/gpu/gpu.h"
38 #include "kernel/gpu/mem_mgr/heap.h"
39 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h"
40 #include "kernel/gpu/rc/kernel_rc.h"
41 #include "kernel/rmapi/event.h"
42 #include "kernel/gpu/nvlink/kernel_nvlink.h"
43 #include "kernel/gpu/gpu_fabric_probe.h"
44 #include "gpu/bus/kern_bus.h"
45 #include "gpu/device/device.h"
46 #include "gpu/mem_sys/kern_mem_sys.h"
47 
48 #include "Nvcm.h"
49 #include "gpu/mem_mgr/mem_desc.h"
50 #include "objtmr.h"
51 #include "os/os.h"
52 #include "vgpu/dev_vgpu.h"
53 #include "vgpu/rpc.h"
54 #include "vgpu/vgpu_util.h"
55 
56 #ifndef RM_PAGE_SIZE
57 #include "gpu/mem_mgr/virt_mem_allocator_common.h"
58 #endif
59 
60 #include "gpu/bus/kern_bus.h"
61 
62 // workitem callback for pstate change event
63 static void
64 _rmPstateEventCallback(NvU32 gpuInstance, void *pPstateParams);
65 
66 static inline NvU32 _readEventBufPut(OBJGPU *pGpu, OBJVGPU *pVGpu)
67 {
68     return pVGpu->gspResponseBuf->v1.putEventBuf;
69 }
70 
71 static inline NvU32 _readEventBufGet(OBJGPU *pGpu, OBJVGPU *pVGpu)
72 {
73     return pVGpu->gspCtrlBuf->v1.getEventBuf;
74 }
75 
76 static inline void _writeEventBufGet(OBJGPU *pGpu, OBJVGPU *pVGpu, NvU32 val)
77 {
78     pVGpu->gspCtrlBuf->v1.getEventBuf = val;
79 }
80 
81 // This function is called when the first device is allocated.
82 // Different address space configs on different arch
83 // AD10x
84 //      - Memory is always allocated on FBMEM
85 // GH100+
86 //      - Memory is allocated on SYSMEM when BAR2 is in physical mode
87 //      - memory is allocated on FBMEM when BAR2 switches virtual mode
88 // GH180
89 //      - Memory is allocated on SYSMEM initially
90 //      - Memory is allocated on FBMEM after C2C mapping is completed
91 //
92 NV_STATUS _setupGspEventInfrastructure(OBJGPU *pGpu, OBJVGPU *pVGpu)
93 {
94     NV_STATUS status;
95     NV_ADDRESS_SPACE addressSpace = ADDR_FBMEM;
96     NvU32 memFlags = 0;
97     KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
98 
99     if (kbusIsPhysicalBar2InitPagetableEnabled(pKernelBus))
100         memFlags = MEMDESC_FLAGS_CPU_ONLY;
101 
102     if (IsGH100orBetter(pGpu) && (!kbusIsBar2Initialized(pKernelBus)))
103         addressSpace = ADDR_SYSMEM;
104 
105     status = _allocRpcMemDesc(pGpu,
106                               RM_PAGE_SIZE,
107                               NV_MEMORY_CONTIGUOUS,
108                               addressSpace,
109                               memFlags,
110                               &pVGpu->eventRing.mem.pMemDesc,
111                               (void**)&pVGpu->eventRing.mem.pMemory,
112                               (void**)&pVGpu->eventRing.mem.pPriv);
113     if (status != NV_OK)
114     {
115         NV_PRINTF(LEVEL_ERROR, "_setupGspEventInfrastructure: GSP Event buf memory setup failed: 0x%x\n", status);
116         return status;
117     }
118 
119     pVGpu->eventRing.mem.pfn = memdescGetPte(pVGpu->eventRing.mem.pMemDesc, AT_GPU, 0) >> RM_PAGE_SHIFT;
120 
121     portMemSet(pVGpu->eventRing.mem.pMemory, 0, memdescGetSize(pVGpu->eventRing.mem.pMemDesc));
122 
123     return NV_OK;
124 }
125 
126 // This function is called when the device is freed.
127 void _teardownGspEventInfrastructure(OBJGPU *pGpu, OBJVGPU *pVGpu)
128 {
129     pVGpu->eventRing.mem.pfn = 0;
130 
131     _freeRpcMemDesc(pGpu,
132                     &pVGpu->eventRing.mem.pMemDesc,
133                     (void**)&pVGpu->eventRing.mem.pMemory,
134                     (void**)&pVGpu->eventRing.mem.pPriv);
135 }
136 
137 // Check if a VGPU event is pending
138 NvBool
139 vgpuGetPendingEvent(OBJGPU *pGpu, THREAD_STATE_NODE *pThreadState)
140 {
141     OBJVGPU *pVGpu = GPU_GET_VGPU(pGpu);
142 
143     if (pVGpu->bGspPlugin)
144     {
145         if (_readEventBufPut(pGpu, pVGpu) != _readEventBufGet(pGpu, pVGpu))
146         {
147             return NV_TRUE;
148         }
149     }
150 
151     return NV_FALSE;
152 }
153 
154 static void
155 vgpuRcErrorRecovery
156 (
157     OBJGPU *pGpu,
158     NvU32   chID,
159     NvU32   exceptType,
160     RM_ENGINE_TYPE rmEngineType
161 )
162 {
163     CHID_MGR                *pChidMgr = NULL;
164     KernelChannel           *pKernelChannel;
165     KernelFifo              *pKernelFifo      = GPU_GET_KERNEL_FIFO(pGpu);
166     NV_STATUS                status           = NV_OK;
167     FIFO_MMU_EXCEPTION_DATA  mmuExceptionData = {0};
168 
169     status = kfifoGetChidMgrFromType(pGpu, pKernelFifo, ENGINE_INFO_TYPE_RM_ENGINE_TYPE,
170                                      (NvU32)rmEngineType, &pChidMgr);
171     NV_ASSERT_OR_RETURN_VOID(status == NV_OK);
172 
173     pKernelChannel = kfifoChidMgrGetKernelChannel(pGpu, pKernelFifo, pChidMgr,
174                                                   chID);
175     NV_CHECK_OR_RETURN_VOID(LEVEL_ERROR, pKernelChannel != NULL);
176 
177     //
178     // In case of SRIOV, the notifier is updated before invoking the RPC for RC
179     // in case of an MMU fault. So skip notifying here if the exception type is
180     // 31 and SRIOV is enabled
181     //
182     if (exceptType != ROBUST_CHANNEL_FIFO_ERROR_MMU_ERR_FLT ||
183         (exceptType == ROBUST_CHANNEL_FIFO_ERROR_MMU_ERR_FLT &&
184          IS_VIRTUAL_WITHOUT_SRIOV(pGpu)))
185     {
186         // send the error notifier to clients
187         krcErrorSetNotifier(pGpu, GPU_GET_KERNEL_RC(pGpu),
188                             pKernelChannel,
189                             exceptType,
190                             rmEngineType,
191                             RC_NOTIFIER_SCOPE_TSG);
192     }
193 
194     // call back to KMD in case win7
195     krcErrorInvokeCallback(pGpu, GPU_GET_KERNEL_RC(pGpu),
196                            pKernelChannel,
197                            &mmuExceptionData,
198                            exceptType,
199                            ROBUST_CHANNEL_ERROR_RECOVERY_LEVEL_FATAL,
200                            rmEngineType,
201                            INVALID_RCDB_RCDIAG_INDEX);
202 }
203 
204 void vgpuServiceEventGuestAllocated(OBJGPU *pGpu, OBJVGPU *pVGpu, VGPU_EVENT_BUF_ENTRY *pEventEntry)
205 {
206     NV_STATUS status = NV_OK;
207     PEVENTNOTIFICATION *ppEventNotification;
208     NvHandle hClient = pEventEntry->hClient;
209     NvHandle hObject = pEventEntry->hObject;
210     NvU32 notifyIdx  = pEventEntry->notifyIndex;
211 
212     // Find the PEVENTNOTIFICATION list of this object
213     status = CliGetEventNotificationList(hClient, hObject, NULL, &ppEventNotification);
214     if (status == NV_OK)
215     {
216         // Wake up all clients registered to listen for this event
217         notifyEvents(pGpu, *ppEventNotification,
218                      notifyIdx, 0, 0,
219                      NV_OK, NV_OS_WRITE_THEN_AWAKEN);
220     }
221 }
222 
223 void vgpuServiceEventRC(OBJGPU *pGpu, OBJVGPU *pVGpu, VGPU_EVENT_BUF_ENTRY *pEventEntry)
224 {
225     NV_STATUS status = NV_OK;
226     NvHandle hClient = pEventEntry->hClient;
227     NvHandle hObject = pEventEntry->hObject;
228     NvU32 exceptType = pEventEntry->info32;
229     NvU32 nv2080EngineID = pEventEntry->info16 & DRF_SHIFTMASK(NV_VGPU_EV_NOTIFIER_INFO16_VALUE);
230     NvU32 chID       = pEventEntry->rcChid;
231     RM_ENGINE_TYPE rmEngineType = gpuGetRmEngineType(nv2080EngineID);
232 
233     NV_PRINTF(LEVEL_ERROR,
234               "ROBUST_CHANNEL error occurred (hClient = 0x%x hFifo = 0x%x chID = %d exceptType = %d engineID = 0x%x (0x%x)) ...\n",
235               hClient, hObject, chID, exceptType, nv2080EngineID, rmEngineType);
236 
237     if (IS_MIG_IN_USE(pGpu))
238     {
239         KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
240         RsClient         *pClient           = NULL;
241         Device           *pDevice           = NULL;
242         MIG_INSTANCE_REF  ref;
243 
244         NV_ASSERT_OK_OR_ELSE(status,
245             serverGetClientUnderLock(&g_resServ, hClient, &pClient), return);
246 
247         NV_ASSERT_OK_OR_ELSE(status,
248             deviceGetByGpu(pClient, pGpu, NV_TRUE, &pDevice), return);
249 
250         NV_ASSERT_OK_OR_ELSE(status,
251             kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager, pDevice, &ref),
252             return);
253 
254         NV_ASSERT_OK_OR_ELSE(status,
255             kmigmgrGetLocalToGlobalEngineType(pGpu, pKernelMIGManager, ref,
256                                               rmEngineType, &rmEngineType),
257             return);
258     }
259 
260     vgpuRcErrorRecovery(pGpu, chID, exceptType, rmEngineType);
261 }
262 
263 void vgpuServiceEventVnc(OBJGPU *pGpu, OBJVGPU *pVGpu)
264 {
265     NV_STATUS status = NV_OK;
266 
267     pVGpu->bVncConnected = !!(*(NvU32 *)(pVGpu->shared_memory +
268                             (NV_VGPU_SHARED_MEMORY_POINTER_VNC / sizeof(NvU32))));
269 
270     if (pVGpu->bVncConnected)
271     {
272 
273         /* Set surface property without comparison with cached, when console vnc connects */
274         NV_RM_RPC_SET_SURFACE_PROPERTIES(pGpu, pVGpu->last_surface_info.hClient,
275                                          &(pVGpu->last_surface_info.last_surface), NV_TRUE, status);
276         if (status != NV_OK) {
277             NV_PRINTF(LEVEL_ERROR,
278                       "SET_SURFACE_PROPERTY RPC failed with error : 0x%x\n",
279                       status);
280         }
281     }
282 }
283 
284 void vgpuServiceEventPstate(OBJGPU *pGpu, OBJVGPU *pVGpu)
285 {
286     NV_STATUS status = NV_OK;
287     NvU32 *pCurrPstate;
288 
289     pCurrPstate = (pVGpu->shared_memory + (NV_VGPU_SHARED_MEMORY_POINTER_CURRENT_PSTATE / sizeof(NvU32)));
290 
291     //Schedule OS workitem to call pstate change notifier
292     status = osQueueWorkItemWithFlags(pGpu,
293                                       _rmPstateEventCallback,
294                                       (void *)pCurrPstate,
295                                       OS_QUEUE_WORKITEM_FLAGS_LOCK_GPU_GROUP_DEVICE_RW |
296                                       OS_QUEUE_WORKITEM_FLAGS_DONT_FREE_PARAMS);
297     if (status != NV_OK)
298     {
299         NV_PRINTF(LEVEL_ERROR, "Failed to schedule Pstate callback! 0x%x\n",
300                   status);
301     }
302 }
303 
304 void vgpuServiceEventEcc(OBJGPU *pGpu, OBJVGPU *pVGpu)
305 {
306     VGPU_STATIC_INFO *pVSI = GPU_GET_STATIC_INFO(pGpu);
307     NvU32 ecc_type  = * (NvU32 *)(pVGpu->shared_memory + (NV_VGPU_SHARED_MEMORY_POINTER_ECC_TYPE / sizeof(NvU32)));
308     NvU32 ecc_count = * (NvU32 *)(pVGpu->shared_memory + (NV_VGPU_SHARED_MEMORY_POINTER_ECC_ERROR_COUNT / sizeof(NvU32)));
309     NvU32 ecc_unit  = * (NvU32 *)(pVGpu->shared_memory + (NV_VGPU_SHARED_MEMORY_POINTER_ECC_UNIT / sizeof(NvU32)));
310 
311     if (ecc_unit < NV2080_CTRL_GPU_ECC_UNIT_COUNT)
312     {
313         if (ecc_type == NV2080_NOTIFIERS_ECC_SBE)
314         {
315             pVSI->eccStatus.units[ecc_unit].sbe.count              += ecc_count;
316             pVSI->eccStatus.units[ecc_unit].sbeNonResettable.count += ecc_count;
317         }
318         else
319         {
320             pVSI->eccStatus.units[ecc_unit].dbe.count              += ecc_count;
321             pVSI->eccStatus.units[ecc_unit].dbeNonResettable.count += ecc_count;
322         }
323 
324     }
325 
326     if (gpuIsGlobalPoisonFuseEnabled(pGpu))
327     {
328         pVSI->eccStatus.bFatalPoisonError  = !!(* (NvU32 *)(pVGpu->shared_memory +
329                                         (NV_VGPU_SHARED_MEMORY_POINTER_ECC_POISON_ERROR / sizeof(NvU32))));
330     }
331 
332     if (ecc_count)
333     {
334 
335         gpuNotifySubDeviceEvent(pGpu, ecc_type, NULL, 0,
336                                 (NvV32) ecc_count, (NvV16) ecc_unit);
337     }
338 }
339 
340 void vgpuServiceEventNvencReportingState(OBJGPU *pGpu, OBJVGPU *pVGpu)
341 {
342     pGpu->encSessionStatsReportingState =
343         (*(NvU32 *)(pVGpu->shared_memory + (NV_VGPU_SHARED_MEMORY_POINTER_NVENC_STATS_REPORTING_STATE / sizeof(NvU32))));
344 }
345 
346 void vgpuServiceEventInbandResponse(OBJGPU *pGpu, OBJVGPU *pVGpu)
347 {
348     NV_STATUS status = NV_OK;
349 
350     OBJRPC * pRpc = GPU_GET_RPC(pGpu);
351     NV2080_CTRL_NVLINK_INBAND_RECEIVED_DATA_PARAMS *pData = NULL;
352     NvBool more = NV_TRUE;
353 
354     if (!pVGpu->bGspPlugin)
355         return;
356 
357     while (more)
358     {
359         NvU32 inband_resp_state = (*(volatile NvU32 *)(pVGpu->shared_memory +
360                     (NV_VGPU_SHARED_MEMORY_POINTER_NVLINK_INBAND_RESPONSE / sizeof(NvU32))));
361 
362         if (inband_resp_state == NV_VGPU_SHARED_MEMORY_POINTER_NVLINK_INBAND_RESPONSE_NONE)
363         {
364             break;
365         }
366         if (pData == NULL)
367         {
368             pData = portMemAllocNonPaged(sizeof(*pData));
369         }
370         portMemSet(pData, 0, sizeof(*pData));
371 
372         if (FLD_TEST_DRF(_VGPU_SHARED_MEMORY_POINTER, _NVLINK_INBAND_RESPONSE, _PROBE, _PENDING, inband_resp_state))
373         {
374             status = rpcCtrlNvlinkGetInbandReceivedData_HAL(pGpu, pRpc, pData,
375                                                             NVLINK_INBAND_MSG_TYPE_GPU_PROBE_RSP, &more);
376             if (status != NV_OK)
377                 goto cleanup;
378 
379             status = knvlinkInbandMsgCallbackDispatcher(pGpu, GPU_GET_KERNEL_NVLINK(pGpu),
380                                                         sizeof(*pData), (void*)pData);
381             if (status != NV_OK)
382                 goto cleanup;
383 
384             break;
385         }
386         if (FLD_TEST_DRF(_VGPU_SHARED_MEMORY_POINTER, _NVLINK_INBAND_RESPONSE, _MC_SETUP, _PENDING, inband_resp_state))
387         {
388             /*TODO: add multicast support here.*/
389             break;
390         }
391         break;
392     }
393 cleanup:
394     if (pData != NULL)
395         portMemFree(pData);
396 }
397 
398 void vgpuServiceEvents(OBJGPU *pGpu, OBJVGPU *pVGpu)
399 {
400     VGPU_EVENT_BUF_ENTRY *pEventEntry;
401     NvU32 flags;
402 
403     pVGpu->eventRing.get = _readEventBufGet(pGpu, pVGpu);
404     pVGpu->eventRing.put = _readEventBufPut(pGpu, pVGpu);
405 
406     // process all pending events
407     while (pVGpu->eventRing.get != pVGpu->eventRing.put)
408     {
409         pEventEntry = ((VGPU_EVENT_BUF_ENTRY*) pVGpu->eventRing.mem.pMemory) + pVGpu->eventRing.get;
410 
411         flags = pEventEntry->flags;
412 
413         if (FLD_TEST_DRF(_VGPU, _EV_FLAGS, _ALLOCATED, _GUEST, flags))
414         {
415             vgpuServiceEventGuestAllocated(pGpu, pVGpu, pEventEntry);
416         }
417         else if (FLD_TEST_DRF(_VGPU, _EV_FLAGS, _ALLOCATED, _PLUGIN, flags))
418         {
419             // Plugin allocated events may/may not have guest-side equivalents
420             switch (DRF_VAL(_VGPU, _EV_FLAGS, _TYPE, flags))
421             {
422                 case NV_VGPU_EV_FLAGS_TYPE_ROBUST_CHANNEL_ERROR:
423                     vgpuServiceEventRC(pGpu, pVGpu, pEventEntry);
424                     break;
425 
426                 case NV_VGPU_EV_FLAGS_TYPE_VNC:
427                     vgpuServiceEventVnc(pGpu, pVGpu);
428                     break;
429 
430                 case NV_VGPU_EV_FLAGS_TYPE_PSTATE:
431                     vgpuServiceEventPstate(pGpu, pVGpu);
432                     break;
433 
434                 case NV_VGPU_EV_FLAGS_TYPE_ECC:
435                     vgpuServiceEventEcc(pGpu, pVGpu);
436                     break;
437 
438                 case NV_VGPU_EV_FLAGS_TYPE_NVENC_REPORTING_STATE:
439                     vgpuServiceEventNvencReportingState(pGpu, pVGpu);
440                     break;
441 
442                 case NV_VGPU_EV_FLAGS_TYPE_INBAND_RESPONSE:
443                     vgpuServiceEventInbandResponse(pGpu, pVGpu);
444                     break;
445 
446                 default:
447                     NV_PRINTF(LEVEL_ERROR, "Unsupported vgpu event type %d\n",
448                               DRF_VAL(_VGPU, _EV_FLAGS, _TYPE, flags));
449                     break;
450             }
451         }
452 
453         // update the get/put pointers
454         pVGpu->eventRing.get = (pVGpu->eventRing.get + 1) % VGPU_EVENT_BUF_ENTRY_COUNT;
455         _writeEventBufGet(pGpu, pVGpu, pVGpu->eventRing.get);
456         pVGpu->eventRing.put = _readEventBufPut(pGpu, pVGpu);
457     }
458 }
459 
460 void vgpuServiceGspPlugin(OBJGPU *pGpu, OBJVGPU *pVGpu)
461 {
462     vgpuServiceEvents(pGpu, pVGpu);
463 }
464 
465 // Service a VGPU event (bottom half/DPC)
466 void vgpuService(OBJGPU *pGpu)
467 {
468     OBJVGPU *pVGpu = GPU_GET_VGPU(pGpu);
469 
470     if (pVGpu->bGspPlugin)
471         vgpuServiceGspPlugin(pGpu, pVGpu);
472 }
473 
474 /*
475  *  Workitem callback for pstate change event
476  *
477  *   To run at PASSIVE_LEVEL, we queue a workitem for
478  *   gpuNotifySubDeviceEvent().
479  *   So this callback will notify all the guest rmclients
480  *   registered for NV2080_NOTIFIERS_PSTATE_CHANGE event.
481  */
482 static void
483 _rmPstateEventCallback
484 (
485     NvU32 gpuInstance,
486     void *pCurrPstate
487 )
488 {
489     OBJGPU     *pGpu = gpumgrGetGpu(gpuInstance);
490     OBJTMR     *pTmr = GPU_GET_TIMER(pGpu);
491     NvU64       startTime = 0;
492     NvU32       currPstate = *(NvU32 *)pCurrPstate;
493     Nv2080PStateChangeNotification pstateParams;
494 
495     tmrGetCurrentTime(pTmr, &startTime);
496 
497     if (FULL_GPU_SANITY_CHECK(pGpu))
498     {
499         pstateParams.timeStamp.nanoseconds[0] = NvU64_HI32(startTime);
500         pstateParams.timeStamp.nanoseconds[1] = NvU64_LO32(startTime);
501         pstateParams.NewPstate = currPstate;
502 
503         gpuNotifySubDeviceEvent(pGpu, NV2080_NOTIFIERS_PSTATE_CHANGE,
504                 &pstateParams, sizeof(pstateParams), currPstate, 0);
505     }
506     else
507     {
508         NV_PRINTF(LEVEL_ERROR,
509                   "GPU sanity check failed! gpuInstance = 0x%x.\n",
510                   gpuInstance);
511     }
512 }
513