1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 #include "gpu/falcon/kernel_falcon.h"
24 #include "gpu/sec2/kernel_sec2.h"
25 #include "gpu/gsp/kernel_gsp.h"
26 
27 #include "gpu/fifo/kernel_fifo.h"
28 #include "gpu/fifo/kernel_channel.h"
29 #include "gpu/fifo/kernel_channel_group.h"
30 #include "gpu/fifo/kernel_channel_group_api.h"
31 #include "gpu/intr/intr.h"
32 #include "gpu/subdevice/subdevice.h"
33 #include "gpu/mem_mgr/mem_mgr.h"
34 #include "gpu/mem_mgr/mem_desc.h"
35 #include "mem_mgr/gpu_vaspace.h"
36 #include "mem_mgr/ctx_buf_pool.h"
37 #include "rmapi/rmapi.h"
38 
39 
40 void kflcnConfigureEngine_IMPL(OBJGPU *pGpu, KernelFalcon *pKernelFalcon, KernelFalconEngineConfig *pFalconConfig)
41 {
42     pKernelFalcon->registerBase       = pFalconConfig->registerBase;
43     pKernelFalcon->riscvRegisterBase  = pFalconConfig->riscvRegisterBase;
44     pKernelFalcon->fbifBase           = pFalconConfig->fbifBase;
45     pKernelFalcon->bBootFromHs        = pFalconConfig->bBootFromHs;
46     pKernelFalcon->pmcEnableMask      = pFalconConfig->pmcEnableMask;
47     pKernelFalcon->bIsPmcDeviceEngine = pFalconConfig->bIsPmcDeviceEngine;
48     pKernelFalcon->physEngDesc        = pFalconConfig->physEngDesc;
49     pKernelFalcon->ctxAttr            = pFalconConfig->ctxAttr;
50     pKernelFalcon->ctxBufferSize      = pFalconConfig->ctxBufferSize;
51     pKernelFalcon->addrSpaceList      = pFalconConfig->addrSpaceList;
52 
53     NV_PRINTF(LEVEL_INFO, "for physEngDesc 0x%x\n", pKernelFalcon->physEngDesc);
54 }
55 
56 KernelFalcon *kflcnGetKernelFalconForEngine_IMPL(OBJGPU *pGpu, ENGDESCRIPTOR physEngDesc)
57 {
58     //
59     // Check for any special objects that are instantiated as GPU children.
60     // Otherwise, OBJGPU keeps track of all falcons as reported by GSP
61     //
62     switch (physEngDesc)
63     {
64         // this list is mirrored in subdeviceCtrlCmdInternalGetConstructedFalconInfo_IMPL
65         case ENG_SEC2:     return staticCast(GPU_GET_KERNEL_SEC2(pGpu), KernelFalcon);
66         case ENG_GSP:      return staticCast(GPU_GET_KERNEL_GSP(pGpu), KernelFalcon);
67         default:
68             return staticCast(gpuGetGenericKernelFalconForEngine(pGpu, physEngDesc), KernelFalcon);
69     }
70 }
71 
72 
73 static NvBool _kflcnNeedToAllocContext(OBJGPU *pGpu, KernelChannel *pKernelChannel)
74 {
75     NvU32 gfid = kchannelGetGfid(pKernelChannel);
76 
77     //
78     // In case of vGPU, when client allocated ctx buffer feature enabled, vGPU guest
79     // RM will alloc all FLCN context buffers for VF channels.
80     // But, for PF channels (IS_GFID_PF(gfid) is TRUE), host RM needs to allocate the
81     // FLCN buffers.
82     //
83     if (!gpuIsClientRmAllocatedCtxBufferEnabled(pGpu) || IS_GFID_VF(gfid))
84         return NV_FALSE;
85 
86     return NV_TRUE;
87 }
88 
89 static NV_STATUS _kflcnAllocAndMapCtxBuffer
90 (
91     OBJGPU *pGpu,
92     KernelFalcon *pKernelFalcon,
93     KernelChannel *pKernelChannel
94 )
95 {
96     MEMORY_DESCRIPTOR  *pCtxMemDesc = NULL;
97     CTX_BUF_POOL_INFO  *pCtxBufPool = NULL;
98     KernelChannelGroup *pKernelChannelGroup = pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup;
99     OBJGVASPACE        *pGVAS = dynamicCast(pKernelChannel->pVAS, OBJGVASPACE);
100     NV_STATUS           status = NV_OK;
101     NvU64               flags = MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE;
102 
103     if (kchannelIsCtxBufferAllocSkipped(pKernelChannel))
104         return NV_OK;
105 
106     kchangrpGetEngineContextMemDesc(pGpu, pKernelChannelGroup, &pCtxMemDesc);
107     if (pCtxMemDesc != NULL)
108     {
109         NV_PRINTF(LEVEL_ERROR, "This channel already has a falcon engine instance on engine %d:%d\n",
110                   ENGDESC_FIELD(pKernelFalcon->physEngDesc, _CLASS),
111                   ENGDESC_FIELD(pKernelFalcon->physEngDesc, _INST));
112         return NV_OK;
113     }
114 
115     if (ctxBufPoolIsSupported(pGpu) && pKernelChannelGroup->pCtxBufPool != NULL)
116     {
117         flags |= MEMDESC_FLAGS_OWNED_BY_CTX_BUF_POOL;
118         pCtxBufPool = pKernelChannelGroup->pCtxBufPool;
119     }
120 
121     //
122     // Setup an engine context and initialize.
123     //
124     NV_ASSERT_OK_OR_RETURN(memdescCreate(&pCtxMemDesc, pGpu,
125                pKernelFalcon->ctxBufferSize,
126                FLCN_BLK_ALIGNMENT,
127                NV_TRUE,
128                ADDR_UNKNOWN,
129                pKernelFalcon->ctxAttr,
130                flags));
131     NV_ASSERT_OK_OR_GOTO(status,
132         memdescSetCtxBufPool(pCtxMemDesc, pCtxBufPool),
133         done);
134     NV_ASSERT_OK_OR_GOTO(status,
135         memdescAllocList(pCtxMemDesc, memdescU32ToAddrSpaceList(pKernelFalcon->addrSpaceList)),
136         done);
137 
138     NV_ASSERT_OK_OR_GOTO(status,
139         memmgrMemDescMemSet(GPU_GET_MEMORY_MANAGER(pGpu), pCtxMemDesc, 0,
140                             TRANSFER_FLAGS_NONE),
141         done);
142 
143     NV_ASSERT_OK_OR_GOTO(status,
144         kchannelSetEngineContextMemDesc(pGpu, pKernelChannel, pKernelFalcon->physEngDesc, pCtxMemDesc),
145         done);
146 
147     if (!gvaspaceIsExternallyOwned(pGVAS))
148     {
149         NV_ASSERT_OK_OR_GOTO(status,
150             kchannelMapEngineCtxBuf(pGpu, pKernelChannel, pKernelFalcon->physEngDesc),
151             done);
152     }
153 
154 done:
155     if (status != NV_OK)
156     {
157         memdescFree(pCtxMemDesc);
158         memdescDestroy(pCtxMemDesc);
159     }
160 
161     return status;
162 }
163 
164 static NV_STATUS _kflcnPromoteContext
165 (
166     OBJGPU *pGpu,
167     KernelFalcon *pKernelFalcon,
168     KernelChannel *pKernelChannel
169 )
170 {
171     RM_API                *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
172     RsClient              *pClient = RES_GET_CLIENT(pKernelChannel);
173     Subdevice             *pSubdevice;
174     RM_ENGINE_TYPE         rmEngineType;
175     ENGINE_CTX_DESCRIPTOR *pEngCtx;
176     NV2080_CTRL_GPU_PROMOTE_CTX_PARAMS rmCtrlParams = {0};
177     OBJGVASPACE           *pGVAS = dynamicCast(pKernelChannel->pVAS, OBJGVASPACE);
178 
179     NV_ASSERT_OK_OR_RETURN(subdeviceGetByGpu(pClient, pGpu, &pSubdevice));
180     NV_ASSERT_OR_RETURN(gpumgrGetSubDeviceInstanceFromGpu(pGpu) == 0, NV_ERR_INVALID_STATE);
181 
182     pEngCtx = pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->ppEngCtxDesc[0];
183     NV_ASSERT_OR_RETURN(pEngCtx != NULL, NV_ERR_INVALID_ARGUMENT);
184 
185     NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, GPU_GET_KERNEL_FIFO(pGpu),
186                             ENGINE_INFO_TYPE_ENG_DESC, pKernelFalcon->physEngDesc,
187                             ENGINE_INFO_TYPE_RM_ENGINE_TYPE, (NvU32 *)&rmEngineType));
188 
189     rmCtrlParams.hClient     = pClient->hClient;
190     rmCtrlParams.hObject     = RES_GET_HANDLE(pKernelChannel);
191     rmCtrlParams.hChanClient = pClient->hClient;
192     rmCtrlParams.size        = pKernelFalcon->ctxBufferSize;
193     rmCtrlParams.engineType  = gpuGetNv2080EngineType(rmEngineType);
194     rmCtrlParams.ChID        = pKernelChannel->ChID;
195 
196     // Promote physical address only. VA will be promoted later as part of nvgpuBindChannelResources
197     if (gvaspaceIsExternallyOwned(pGVAS))
198     {
199         MEMORY_DESCRIPTOR *pMemDesc = NULL;
200         NvU32 physAttr = 0x0;
201 
202         NV_ASSERT_OK_OR_RETURN(kchangrpGetEngineContextMemDesc(pGpu,
203                                    pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup, &pMemDesc));
204         NV_ASSERT_OR_RETURN(memdescGetContiguity(pMemDesc, AT_GPU), NV_ERR_INVALID_STATE);
205 
206         switch (memdescGetAddressSpace(pMemDesc))
207         {
208             case ADDR_FBMEM:
209                 physAttr = FLD_SET_DRF(2080, _CTRL_GPU_INITIALIZE_CTX,
210                            _APERTURE, _VIDMEM, physAttr);
211                 break;
212 
213             case ADDR_SYSMEM:
214                 if (memdescGetCpuCacheAttrib(pMemDesc) == NV_MEMORY_CACHED)
215                 {
216                     physAttr = FLD_SET_DRF(2080, _CTRL_GPU_INITIALIZE_CTX,
217                                 _APERTURE, _COH_SYS, physAttr);
218                 }
219                 else if (memdescGetCpuCacheAttrib(pMemDesc) == NV_MEMORY_UNCACHED)
220                 {
221                     physAttr = FLD_SET_DRF(2080, _CTRL_GPU_INITIALIZE_CTX,
222                                _APERTURE, _NCOH_SYS, physAttr);
223                 }
224                 else
225                 {
226                     return NV_ERR_INVALID_STATE;
227                 }
228                 break;
229 
230             default:
231                 return NV_ERR_INVALID_STATE;
232         }
233 
234         physAttr = FLD_SET_DRF(2080, _CTRL_GPU_INITIALIZE_CTX, _GPU_CACHEABLE, _NO, physAttr);
235 
236         rmCtrlParams.entryCount = 1;
237         rmCtrlParams.promoteEntry[0].gpuPhysAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, 0);
238         rmCtrlParams.promoteEntry[0].size = pMemDesc->Size;
239         rmCtrlParams.promoteEntry[0].physAttr = physAttr;
240         rmCtrlParams.promoteEntry[0].bufferId = 0; // unused for flcn
241         rmCtrlParams.promoteEntry[0].bInitialize = NV_TRUE;
242         rmCtrlParams.promoteEntry[0].bNonmapped = NV_TRUE;
243     }
244     else
245     {
246         NvU64 addr;
247         NV_ASSERT_OK_OR_RETURN(vaListFindVa(&pEngCtx->vaList, pKernelChannel->pVAS, &addr));
248         rmCtrlParams.virtAddress = addr;
249     }
250 
251     NV_ASSERT_OK_OR_RETURN(pRmApi->Control(pRmApi, pClient->hClient, RES_GET_HANDLE(pSubdevice),
252         NV2080_CTRL_CMD_GPU_PROMOTE_CTX, &rmCtrlParams, sizeof(rmCtrlParams)));
253 
254     return NV_OK;
255 }
256 
257 
258 NV_STATUS kflcnAllocContext_IMPL
259 (
260     OBJGPU        *pGpu,
261     KernelFalcon  *pKernelFalcon,
262     KernelChannel *pKernelChannel,
263     NvU32          classNum
264 )
265 {
266     NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_CHANNEL);
267 
268     if (!_kflcnNeedToAllocContext(pGpu, pKernelChannel))
269         return NV_OK;
270 
271     NV_ASSERT_OR_RETURN(gpuIsClassSupported(pGpu, classNum), NV_ERR_INVALID_OBJECT);
272 
273     NV_ASSERT_OK_OR_RETURN(_kflcnAllocAndMapCtxBuffer(pGpu, pKernelFalcon, pKernelChannel));
274 
275     return _kflcnPromoteContext(pGpu, pKernelFalcon, pKernelChannel);
276 }
277 
278 NV_STATUS kflcnFreeContext_IMPL
279 (
280     OBJGPU        *pGpu,
281     KernelFalcon  *pKernelFalcon,
282     KernelChannel *pKernelChannel,
283     NvU32          classNum
284 )
285 {
286     MEMORY_DESCRIPTOR *pCtxMemDesc = NULL;
287     NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_CHANNEL);
288 
289     if (!_kflcnNeedToAllocContext(pGpu, pKernelChannel))
290         return NV_OK;
291 
292     if (kchannelIsCtxBufferAllocSkipped(pKernelChannel))
293         return NV_OK;
294 
295     kchangrpGetEngineContextMemDesc(pGpu,
296         pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup,
297         &pCtxMemDesc);
298 
299     if (pCtxMemDesc == NULL)
300     {
301         NV_PRINTF(LEVEL_WARNING,
302                   "The channel 0x%x does not have a falcon engine instance for engDesc=0x%x\n",
303                   kchannelGetDebugTag(pKernelChannel), pKernelFalcon->physEngDesc);
304         return NV_OK;
305     }
306 
307     kchannelUnmapEngineCtxBuf(pGpu, pKernelChannel, pKernelFalcon->physEngDesc);
308     kchannelSetEngineContextMemDesc(pGpu, pKernelChannel, pKernelFalcon->physEngDesc, NULL);
309     memdescFree(pCtxMemDesc);
310     memdescDestroy(pCtxMemDesc);
311 
312     return NV_OK;
313 }
314 
315 NV_STATUS gkflcnConstruct_IMPL
316 (
317     GenericKernelFalcon *pGenericKernelFalcon,
318     OBJGPU *pGpu,
319     KernelFalconEngineConfig *pFalconConfig
320 )
321 {
322     KernelFalcon *pKernelFalcon = staticCast(pGenericKernelFalcon, KernelFalcon);
323     if (pFalconConfig != NULL)
324     {
325         kflcnConfigureEngine(pGpu, pKernelFalcon, pFalconConfig);
326     }
327     return NV_OK;
328 }
329 
330 NV_STATUS gkflcnResetHw_IMPL(OBJGPU *pGpu, GenericKernelFalcon *pGenKernFlcn)
331 {
332     NV_ASSERT_FAILED("This should only be called on full KernelFalcon implementations");
333     return NV_ERR_NOT_SUPPORTED;
334 }
335 
336 void gkflcnRegisterIntrService_IMPL(OBJGPU *pGpu, GenericKernelFalcon *pGenericKernelFalcon, IntrServiceRecord pRecords[MC_ENGINE_IDX_MAX])
337 {
338     KernelFalcon *pKernelFalcon = staticCast(pGenericKernelFalcon, KernelFalcon);
339     NV_ASSERT_OR_RETURN_VOID(pKernelFalcon);
340 
341     NV_PRINTF(LEVEL_INFO, "physEngDesc 0x%x\n", pKernelFalcon->physEngDesc);
342 
343     if (!IS_NVDEC(pKernelFalcon->physEngDesc) &&
344         pKernelFalcon->physEngDesc != ENG_OFA &&
345         !IS_NVJPEG(pKernelFalcon->physEngDesc) &&
346         !IS_MSENC(pKernelFalcon->physEngDesc))
347         return;
348 
349     // Register to handle nonstalling interrupts of the corresponding physical falcon in kernel rm
350     if (pKernelFalcon->physEngDesc != ENG_INVALID)
351     {
352         NvU32 mcIdx = MC_ENGINE_IDX_NULL;
353 
354         NV_STATUS status = kfifoEngineInfoXlate_HAL(pGpu, GPU_GET_KERNEL_FIFO(pGpu),
355             ENGINE_INFO_TYPE_ENG_DESC, pKernelFalcon->physEngDesc,
356             ENGINE_INFO_TYPE_MC, &mcIdx);
357 
358         NV_ASSERT_OR_RETURN_VOID(status == NV_OK);
359 
360         NV_PRINTF(LEVEL_INFO, "Registering 0x%x/0x%x to handle nonstall intr\n", pKernelFalcon->physEngDesc, mcIdx);
361 
362         NV_ASSERT(pRecords[mcIdx].pNotificationService == NULL);
363         pRecords[mcIdx].bFifoWaiveNotify = NV_FALSE;
364         pRecords[mcIdx].pNotificationService = staticCast(pGenericKernelFalcon, IntrService);
365     }
366 }
367 
368 NV_STATUS gkflcnServiceNotificationInterrupt_IMPL(OBJGPU *pGpu, GenericKernelFalcon *pGenericKernelFalcon, IntrServiceServiceNotificationInterruptArguments *pParams)
369 {
370     NvU32 idxMc = pParams->engineIdx;
371     RM_ENGINE_TYPE rmEngineType = RM_ENGINE_TYPE_NULL;
372 
373     NV_PRINTF(LEVEL_INFO, "nonstall intr for MC 0x%x\n", idxMc);
374 
375     if (MC_ENGINE_IDX_NVDECn(0) <= idxMc &&
376         idxMc < MC_ENGINE_IDX_NVDECn(RM_ENGINE_TYPE_NVDEC_SIZE))
377     {
378         NvU32 nvdecIdx = idxMc - MC_ENGINE_IDX_NVDECn(0);
379         rmEngineType = RM_ENGINE_TYPE_NVDEC(nvdecIdx);
380     }
381     else if (idxMc == MC_ENGINE_IDX_OFA0)
382     {
383         rmEngineType = RM_ENGINE_TYPE_OFA;
384     }
385     else if (MC_ENGINE_IDX_NVJPEGn(0) <= idxMc &&
386              idxMc < MC_ENGINE_IDX_NVJPEGn(RM_ENGINE_TYPE_NVJPEG_SIZE))
387     {
388         NvU32 nvjpgIdx = idxMc - MC_ENGINE_IDX_NVJPEGn(0);
389         rmEngineType = RM_ENGINE_TYPE_NVJPEG(nvjpgIdx);
390     }
391     else if (MC_ENGINE_IDX_MSENCn(0) <= idxMc &&
392              idxMc < MC_ENGINE_IDX_MSENCn(RM_ENGINE_TYPE_NVENC_SIZE))
393     {
394         NvU32 msencIdx = idxMc - MC_ENGINE_IDX_MSENCn(0);
395         rmEngineType = RM_ENGINE_TYPE_NVENC(msencIdx);
396     }
397 
398     NV_ASSERT_OR_RETURN(rmEngineType != RM_ENGINE_TYPE_NULL, NV_ERR_INVALID_STATE);
399 
400     // Wake up channels waiting on this event
401     engineNonStallIntrNotify(pGpu, rmEngineType);
402 
403     return NV_OK;
404 }
405