1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "kernel/gpu/fifo/kernel_fifo.h"
25 #include "kernel/gpu/fifo/kernel_channel.h"
26 #include "kernel/gpu/fifo/kernel_channel_group.h"
27 #include "kernel/gpu/fifo/kernel_channel_group_api.h"
28 #include "kernel/gpu/fifo/kernel_sched_mgr.h"
29 #include "gpu/mem_mgr/mem_mgr.h"
30 #include "gpu/mmu/kern_gmmu.h"
31 
32 #include "nvRmReg.h"
33 
34 #include "vgpu/rpc.h"
35 #include "gpu/bus/kern_bus.h"
36 
37 #include "published/maxwell/gm107/dev_ram.h"
38 #include "published/maxwell/gm107/dev_mmu.h"
39 
40 
41 static inline NvBool
42 _isEngineInfoTypeValidForOnlyHostDriven(ENGINE_INFO_TYPE type);
43 
44 
45 /*! Construct kfifo object */
46 NV_STATUS
kfifoConstructHal_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo)47 kfifoConstructHal_GM107
48 (
49     OBJGPU     *pGpu,
50     KernelFifo *pKernelFifo
51 )
52 {
53     NV_STATUS status;
54     PREALLOCATED_USERD_INFO *pUserdInfo = &pKernelFifo->userdInfo;
55 
56     if (FLD_TEST_DRF(_REG_STR_RM, _INST_VPR, _INSTBLK, _TRUE, pGpu->instVprOverrides))
57     {
58         pKernelFifo->bInstProtectedMem = NV_TRUE;
59     }
60 
61     // Instance Memory
62     switch (DRF_VAL( _REG_STR_RM, _INST_LOC, _INSTBLK, pGpu->instLocOverrides))
63     {
64         default:
65         case NV_REG_STR_RM_INST_LOC_INSTBLK_DEFAULT:
66             if (kfifoIsMixedInstmemApertureDefAllowed(pKernelFifo))
67                 pKernelFifo->pInstAllocList  = ADDRLIST_FBMEM_PREFERRED;
68             else
69                 pKernelFifo->pInstAllocList  = ADDRLIST_FBMEM_ONLY;
70 
71             pKernelFifo->InstAttr        = NV_MEMORY_UNCACHED;
72             break;
73         case NV_REG_STR_RM_INST_LOC_INSTBLK_VID:
74             pKernelFifo->pInstAllocList  = ADDRLIST_FBMEM_ONLY;
75             pKernelFifo->InstAttr        = NV_MEMORY_UNCACHED;
76             break;
77         case NV_REG_STR_RM_INST_LOC_INSTBLK_COH:
78             pKernelFifo->pInstAllocList  = ADDRLIST_SYSMEM_ONLY;
79             pKernelFifo->InstAttr        = NV_MEMORY_CACHED;
80             break;
81         case NV_REG_STR_RM_INST_LOC_INSTBLK_NCOH:
82             pKernelFifo->pInstAllocList  = ADDRLIST_SYSMEM_ONLY;
83             pKernelFifo->InstAttr        = NV_MEMORY_UNCACHED;
84             break;
85     }
86 
87     // USERD
88     pUserdInfo->userdAperture    = ADDR_FBMEM;
89     pUserdInfo->userdAttr        = NV_MEMORY_WRITECOMBINED;
90     memdescOverrideInstLoc(DRF_VAL( _REG_STR_RM, _INST_LOC, _USERD, pGpu->instLocOverrides),
91                            "USERD",
92                            &pUserdInfo->userdAperture,
93                            &pUserdInfo->userdAttr);
94 
95     // Create child object KernelSchedMgr
96     if (kfifoIsSchedSupported(pKernelFifo))
97     {
98         pKernelFifo->pKernelSchedMgr = NULL;
99         status = objCreate(&pKernelFifo->pKernelSchedMgr, pKernelFifo, KernelSchedMgr);
100         if (status != NV_OK)
101         {
102             pKernelFifo->pKernelSchedMgr = NULL;
103             return status;
104         }
105         kschedmgrConstructPolicy(pKernelFifo->pKernelSchedMgr, pGpu);
106     }
107 
108     return NV_OK;
109 }
110 
111 /**
112  * @brief Allocate a page for dummy page directory
113  *
114  * On GV100, PDB corresponding to subcontexts that are freed
115  * will point to a dummy page directory instead of setting it to NULL
116  * Here we allocate a page for this page directory
117  */
118 static NV_STATUS
_kfifoAllocDummyPage(OBJGPU * pGpu,KernelFifo * pKernelFifo)119 _kfifoAllocDummyPage
120 (
121     OBJGPU     *pGpu,
122     KernelFifo *pKernelFifo
123 )
124 {
125     NV_STATUS status   = NV_OK;
126     NvU32     flags    = MEMDESC_FLAGS_NONE;
127     NvBool    bBcState = gpumgrGetBcEnabledStatus(pGpu);
128 
129     if (bBcState)
130     {
131         flags |= MEMDESC_FLAGS_ALLOC_PER_SUBDEVICE;
132     }
133 
134     // Using instance block attributes to allocate dummy page
135     status = memdescCreate(&pKernelFifo->pDummyPageMemDesc, pGpu,
136                            RM_PAGE_SIZE,
137                            0,
138                            NV_FALSE,
139                            ADDR_UNKNOWN,
140                            pKernelFifo->InstAttr,
141                            flags);
142     if (status != NV_OK)
143     {
144         NV_PRINTF(LEVEL_ERROR, "Could not memdescCreate for dummy page\n");
145         DBG_BREAKPOINT();
146         return status;
147     }
148 
149     memdescTagAllocList(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_80,
150                     pKernelFifo->pDummyPageMemDesc, pKernelFifo->pInstAllocList);
151     if (status !=  NV_OK)
152     {
153         NV_PRINTF(LEVEL_ERROR, "Could not allocate dummy page\n");
154         DBG_BREAKPOINT();
155         memdescDestroy(pKernelFifo->pDummyPageMemDesc);
156         pKernelFifo->pDummyPageMemDesc = NULL;
157     }
158 
159     return status;
160 }
161 
162 /**
163  * @brief Free the page used for dummy page directory
164  */
165 static void
_kfifoFreeDummyPage(OBJGPU * pGpu,KernelFifo * pKernelFifo)166 _kfifoFreeDummyPage
167 (
168     OBJGPU     *pGpu,
169     KernelFifo *pKernelFifo
170 )
171 {
172     // Free dummy page memdesc
173     memdescFree(pKernelFifo->pDummyPageMemDesc);
174     memdescDestroy(pKernelFifo->pDummyPageMemDesc);
175     pKernelFifo->pDummyPageMemDesc = NULL;
176 }
177 
178 NV_STATUS
kfifoStatePostLoad_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,NvU32 flags)179 kfifoStatePostLoad_GM107
180 (
181     OBJGPU     *pGpu,
182     KernelFifo *pKernelFifo,
183     NvU32       flags
184 )
185 {
186     NV_STATUS                      status     = NV_OK;
187     const PREALLOCATED_USERD_INFO *pUserdInfo = kfifoGetPreallocatedUserdInfo(pKernelFifo);
188 
189     if (!(flags & GPU_STATE_FLAGS_PRESERVING))
190     {
191         // Prealloc USERD
192         NV_ASSERT_OK_OR_RETURN(kfifoPreAllocUserD_HAL(pGpu, pKernelFifo));
193 
194         if (gpumgrIsParentGPU(pGpu))
195         {
196             if (kfifoIsZombieSubctxWarEnabled(pKernelFifo))
197             {
198                 NvBool bBcState = gpumgrGetBcEnabledStatus(pGpu);
199                 gpumgrSetBcEnabledStatus(pGpu, NV_TRUE);
200 
201                 status = _kfifoAllocDummyPage(pGpu, pKernelFifo);
202                 if (status != NV_OK)
203                 {
204                     NV_PRINTF(LEVEL_ERROR,
205                               "Failed to allocate dummy page for zombie subcontexts\n");
206                     DBG_BREAKPOINT();
207                     gpumgrSetBcEnabledStatus(pGpu, bBcState);
208                     return status;
209                 }
210 
211                 if (IS_VIRTUAL_WITH_SRIOV(pGpu))
212                 {
213                     NV2080_CTRL_FIFO_SETUP_VF_ZOMBIE_SUBCTX_PDB_PARAMS params;
214                     MEMORY_DESCRIPTOR *pDummyPageMemDesc = kfifoGetDummyPageMemDesc(pKernelFifo);
215 
216                     portMemSet(&params, 0, sizeof(params));
217 
218                     params.base     = memdescGetPhysAddr(pDummyPageMemDesc, AT_GPU, 0);;
219                     params.size     = pDummyPageMemDesc->Size;
220                     params.addressSpace = memdescGetAddressSpace(pDummyPageMemDesc);
221                     params.cacheAttrib  = memdescGetCpuCacheAttrib(pDummyPageMemDesc);
222 
223                     NV_RM_RPC_CONTROL(pGpu,
224                                       pGpu->hDefaultClientShare,
225                                       pGpu->hDefaultClientShareSubDevice,
226                                       NV2080_CTRL_CMD_FIFO_SETUP_VF_ZOMBIE_SUBCTX_PDB,
227                                       &params,
228                                       sizeof(params),
229                                       status);
230                     if (status != NV_OK)
231                     {
232                         NV_PRINTF(LEVEL_ERROR,
233                             "RM control call to setup zombie subctx failed, status 0x%x\n", status);
234                         DBG_BREAKPOINT();
235                         return status;
236                     }
237                 }
238 
239                 gpumgrSetBcEnabledStatus(pGpu, bBcState);
240             }
241         }
242     }
243 
244     // Since we have successfully setup BAR1 USERD rsvd memory
245     // lets inform hw (only if the snoop is not disabled.)
246     kfifoSetupBar1UserdSnoop_HAL(pGpu, pKernelFifo, NV_TRUE, pUserdInfo->userdBar1MapStartOffset);
247 
248     if (IS_GSP_CLIENT(pGpu) || IS_VIRTUAL(pGpu))
249     {
250         status = kfifoTriggerPostSchedulingEnableCallback(pGpu, pKernelFifo);
251         if (status != NV_OK)
252             return status;
253     }
254 
255     return status;
256 }
257 
258 NV_STATUS
kfifoStatePreUnload_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,NvU32 flags)259 kfifoStatePreUnload_GM107
260 (
261     OBJGPU     *pGpu,
262     KernelFifo *pKernelFifo,
263     NvU32       flags
264 )
265 {
266     NV_STATUS status = NV_OK;
267     NvU32     sliLoopReentrancy;
268 
269     NV_PRINTF(LEVEL_INFO, "start\n");
270 
271     if (!(flags & GPU_STATE_FLAGS_PRESERVING) && gpumgrIsParentGPU(pGpu))
272     {
273         NvBool bBcState = NV_FALSE;
274 
275         if (kfifoIsZombieSubctxWarEnabled(pKernelFifo))
276         {
277             _kfifoFreeDummyPage(pGpu, pKernelFifo);
278         }
279 
280         // Notify the handlers that the channel will soon be disabled.
281         status = kfifoTriggerPreSchedulingDisableCallback(pGpu, pKernelFifo);
282 
283         // Enable broadcast on SLI
284         bBcState = gpumgrGetBcEnabledStatus(pGpu);
285         gpumgrSetBcEnabledStatus(pGpu, NV_TRUE);
286 
287         // As we have forced here SLI broadcast mode, temporarily reset the reentrancy count
288         sliLoopReentrancy = gpumgrSLILoopReentrancyPop(pGpu);
289 
290         // Ask host to stop snooping
291         kfifoSetupBar1UserdSnoop_HAL(pGpu, pKernelFifo, NV_FALSE, 0);
292 
293         // Restore the reentrancy count
294         gpumgrSLILoopReentrancyPush(pGpu, sliLoopReentrancy);
295 
296         // Restore prior broadcast state
297         gpumgrSetBcEnabledStatus(pGpu, bBcState);
298     }
299 
300     if (!(flags & GPU_STATE_FLAGS_PRESERVING))
301     {
302         // Free preallocated userd
303         kfifoFreePreAllocUserD_HAL(pGpu, pKernelFifo);
304     }
305 
306     return status;
307 }
308 
309 /**
310  * Returns the default timeslice (in us) for a channelgroup as defined by hardware.
311  */
312 NvU64
kfifoChannelGroupGetDefaultTimeslice_GM107(KernelFifo * pKernelFifo)313 kfifoChannelGroupGetDefaultTimeslice_GM107
314 (
315     KernelFifo *pKernelFifo
316 )
317 {
318     return NV_RAMRL_ENTRY_TIMESLICE_TIMEOUT_128 << NV_RAMRL_ENTRY_TIMESLICE_SCALE_3;
319 }
320 
321 /*! Get size and alignment requirements for instance memory */
322 NV_STATUS
kfifoGetInstMemInfo_GM107(KernelFifo * pKernelFifo,NvU64 * pSize,NvU64 * pAlignment,NvBool * pbInstProtectedMem,NvU32 * pInstAttr,const NV_ADDRESS_SPACE ** ppInstAllocList)323 kfifoGetInstMemInfo_GM107
324 (
325     KernelFifo  *pKernelFifo,
326     NvU64       *pSize,
327     NvU64       *pAlignment,
328     NvBool      *pbInstProtectedMem,
329     NvU32       *pInstAttr,
330     const NV_ADDRESS_SPACE **ppInstAllocList
331 )
332 {
333     NV_ASSERT_OR_RETURN(pSize != NULL, NV_ERR_INVALID_ARGUMENT);
334     NV_ASSERT_OR_RETURN(pAlignment != NULL, NV_ERR_INVALID_ARGUMENT);
335 
336     *pSize = NV_RAMIN_ALLOC_SIZE;
337     *pAlignment = 1 << NV_RAMIN_BASE_SHIFT;
338 
339     if(pbInstProtectedMem != NULL)
340         *pbInstProtectedMem = pKernelFifo->bInstProtectedMem;
341 
342     if(pInstAttr != NULL)
343         *pInstAttr = pKernelFifo->InstAttr;
344 
345     if(ppInstAllocList != NULL)
346         *ppInstAllocList = pKernelFifo->pInstAllocList;
347 
348     return NV_OK;
349 }
350 
351 /*! Gets instance block size and offset align for instance memory */
352 void
kfifoGetInstBlkSizeAlign_GM107(KernelFifo * pKernelFifo,NvU32 * pSize,NvU32 * pShift)353 kfifoGetInstBlkSizeAlign_GM107
354 (
355     KernelFifo *pKernelFifo,
356     NvU32      *pSize,
357     NvU32      *pShift
358 )
359 {
360     *pSize  = NV_RAMIN_ALLOC_SIZE;
361     *pShift = NV_RAMIN_BASE_SHIFT;
362 
363     return;
364 }
365 
366 /*!
367  * @brief Gets the default runlist id to use for channels allocated with no engines on them.
368  *
369  * @param[in] pGpu
370  * @param[in] pKernelFifo
371  * @param[in] rmEngineType      - Engine type of the channel to retrieve default runlist id for
372  */
373 NvU32
kfifoGetDefaultRunlist_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,RM_ENGINE_TYPE rmEngineType)374 kfifoGetDefaultRunlist_GM107
375 (
376     OBJGPU *pGpu,
377     KernelFifo *pKernelFifo,
378     RM_ENGINE_TYPE rmEngineType
379 )
380 {
381     NvU32 runlistId = INVALID_RUNLIST_ID;
382     ENGDESCRIPTOR engDesc = ENG_GR(0);
383 
384     if (RM_ENGINE_TYPE_IS_VALID(rmEngineType))
385     {
386         // if translation fails, defualt is ENG_GR(0)
387         NV_ASSERT_OK(
388             kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
389                 ENGINE_INFO_TYPE_RM_ENGINE_TYPE, (NvU32)rmEngineType,
390                 ENGINE_INFO_TYPE_ENG_DESC,       &engDesc));
391     }
392 
393     // if translation fails, default is INVALID_RUNLIST_ID
394     if (kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
395                                  ENGINE_INFO_TYPE_ENG_DESC,
396                                  engDesc,
397                                  ENGINE_INFO_TYPE_RUNLIST,
398                                  &runlistId) != NV_OK)
399     {
400         runlistId = INVALID_RUNLIST_ID;
401     }
402 
403     return runlistId;
404 }
405 
406 /**
407  * @brief Programs a channel's runlist id to a given value
408  *
409  * Verifies that the requested engine is valid based on the current channel's
410  * state.  Does not bind the channel to the runlist in sw or hw. @ref kfifoRunlistSetId.
411  *
412  * @param pGpu
413  * @param pKernelFifo
414  * @param[in/out] pKernelChannel
415  * @param[in] runlistId runlist ID to use
416  */
417 NV_STATUS
kfifoRunlistSetId_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,KernelChannel * pKernelChannel,NvU32 runlistId)418 kfifoRunlistSetId_GM107
419 (
420     OBJGPU *pGpu,
421     KernelFifo *pKernelFifo,
422     KernelChannel *pKernelChannel,
423     NvU32   runlistId
424 )
425 {
426     NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT);
427 
428     if ((runlistId != kchannelGetRunlistId(pKernelChannel)) &&
429         kchannelIsRunlistSet(pGpu, pKernelChannel))
430     {
431         NV_PRINTF(LEVEL_ERROR,
432                   "Channel has already been assigned a runlist incompatible with this "
433                   "engine (requested: 0x%x current: 0x%x).\n", runlistId,
434                   kchannelGetRunlistId(pKernelChannel));
435         return NV_ERR_INVALID_STATE;
436     }
437 
438     //
439     // For TSG channel, the RL should support TSG.
440     // We relax this requirement if the channel is TSG wrapped by RM.
441     // In that case, RM won't write the TSG header in the RL.
442     //
443     if (!kfifoRunlistIsTsgHeaderSupported_HAL(pGpu, pKernelFifo, runlistId) &&
444         (pKernelChannel->pKernelChannelGroupApi != NULL) &&
445         !pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bAllocatedByRm)
446     {
447         NV_PRINTF(LEVEL_ERROR, "Runlist does not support TSGs\n");
448         return NV_ERR_INVALID_STATE;
449     }
450 
451     // If you want to set runlistId of channel - first set it on TSG
452     if (pKernelChannel->pKernelChannelGroupApi != NULL)
453     {
454         // Change TSG runlist if channel is the only one
455         if (pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->chanCount == 1 ||
456             !pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bRunlistAssigned)
457         {
458             pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->runlistId = runlistId;
459             pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bRunlistAssigned = NV_TRUE;
460         }
461         else
462         {
463             NV_ASSERT_OR_RETURN(pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->runlistId ==
464                                     runlistId,
465                                 NV_ERR_INVALID_STATE);
466         }
467     }
468 
469     kchannelSetRunlistId(pKernelChannel, runlistId);
470     kchannelSetRunlistSet(pGpu, pKernelChannel, NV_TRUE);
471     return NV_OK;
472 }
473 
474 /**
475  * @brief Programs a channel's runlist id given the engine tag
476  *
477  * Verifies that the requested engine is valid based on the current channel's
478  * state.  Does not bind the channel to the runlist in sw or hw. @ref kfifoRunlistSetIdByEngine.
479  *
480  * @param pGpu
481  * @param pKernelFifo
482  * @param[in/out] pKernelChannel
483  * @param[in] engDesc
484  */
485 NV_STATUS
kfifoRunlistSetIdByEngine_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,KernelChannel * pKernelChannel,NvU32 engDesc)486 kfifoRunlistSetIdByEngine_GM107
487 (
488     OBJGPU *pGpu,
489     KernelFifo *pKernelFifo,
490     KernelChannel *pKernelChannel,
491     NvU32   engDesc
492 )
493 {
494     NvU32 runlistId;
495     NV_STATUS status;
496     NvU32 subctxType = 0;
497 
498     NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT);
499 
500     kfifoGetSubctxType_HAL(pGpu, pKernelFifo, pKernelChannel, &subctxType);
501 
502     if (!kfifoValidateEngineAndRunqueue_HAL(pGpu, pKernelFifo, engDesc, kchannelGetRunqueue(pKernelChannel)))
503         return NV_ERR_INVALID_ARGUMENT;
504 
505     if (!kfifoValidateEngineAndSubctxType_HAL(pGpu, pKernelFifo, engDesc, subctxType))
506         return NV_ERR_INVALID_ARGUMENT;
507 
508     //
509     // SW objects can go on any runlist so we defer committing of runlist ID to
510     // scheduling or another object's allocation.
511     //
512     if ((engDesc == ENG_SW) || (engDesc == ENG_BUS))
513         return NV_OK;
514 
515     NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, ENGINE_INFO_TYPE_ENG_DESC,
516                 engDesc, ENGINE_INFO_TYPE_RUNLIST, &runlistId));
517 
518     status = kfifoRunlistSetId_HAL(pGpu, pKernelFifo, pKernelChannel, runlistId);
519     if (status != NV_OK)
520     {
521         NV_PRINTF(LEVEL_ERROR, "Unable to program runlist for %s\n",
522                   kfifoGetEngineName_HAL(pKernelFifo, ENGINE_INFO_TYPE_ENG_DESC, engDesc));
523     }
524 
525     return status;
526 }
527 
528 NV_STATUS
kfifoChannelGetFifoContextMemDesc_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,KernelChannel * pKernelChannel,FIFO_CTX engineState,MEMORY_DESCRIPTOR ** ppMemDesc)529 kfifoChannelGetFifoContextMemDesc_GM107
530 (
531     OBJGPU             *pGpu,
532     KernelFifo         *pKernelFifo,
533     KernelChannel      *pKernelChannel,
534     FIFO_CTX            engineState,
535     MEMORY_DESCRIPTOR **ppMemDesc
536 )
537 {
538     FIFO_INSTANCE_BLOCK *pInstanceBlock;
539 
540     /* UVM calls nvGpuOpsGetChannelInstanceMemInfo
541      * which calls current function to fetch FIFO_CTX_INST_BLOCK */
542     /* Currenltly, UVM supported on SRIOV vGPUs only. */
543     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu))
544         return NV_OK;
545 
546     NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE);
547     NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_CHANNEL);
548 
549     pInstanceBlock = pKernelChannel->pFifoHalData[gpumgrGetSubDeviceInstanceFromGpu(pGpu)];
550     if (pInstanceBlock == NULL)
551         return NV_ERR_INVALID_STATE;
552 
553     switch (engineState)
554     {
555         case FIFO_CTX_INST_BLOCK:
556             *ppMemDesc = pInstanceBlock->pInstanceBlockDesc;
557             break;
558 
559         case FIFO_CTX_RAMFC:
560             *ppMemDesc = pInstanceBlock->pRamfcDesc;
561             break;
562 
563         default:
564             NV_PRINTF(LEVEL_ERROR,
565                       "bad engineState 0x%x on engine 0x%x\n",
566                       engineState, ENG_FIFO);
567             DBG_BREAKPOINT();
568             return NV_ERR_INVALID_ARGUMENT;
569     }
570 
571     NV_ASSERT(!memdescHasSubDeviceMemDescs(*ppMemDesc));
572 
573     NV_PRINTF(LEVEL_INFO,
574               "Channel %d engine 0x%x engineState 0x%x *ppMemDesc %p\n",
575               kchannelGetDebugTag(pKernelChannel), ENG_FIFO, engineState, *ppMemDesc);
576 
577     return NV_OK;
578 }
579 
580 /**
581  * @brief lookup the kernelchannel data associated with a given instance address/target
582  *
583  * @param[in] pGpu               OBJGPU pointer
584  * @param[in] pKernelFifo        KernelFifo pointer
585  * @param[in] pInst              INST_BLOCK_DESC pointer
586  * @param[out] ppKernelChannel   KernelChannel ptr
587  */
588 NV_STATUS
kfifoConvertInstToKernelChannel_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,INST_BLOCK_DESC * pInst,KernelChannel ** ppKernelChannel)589 kfifoConvertInstToKernelChannel_GM107
590 (
591     OBJGPU           *pGpu,
592     KernelFifo       *pKernelFifo,
593     INST_BLOCK_DESC  *pInst,
594     KernelChannel   **ppKernelChannel
595 )
596 {
597     MemoryManager       *pMemoryManager   = GPU_GET_MEMORY_MANAGER(pGpu);
598     KernelChannel       *pKernelChannel   = NULL;
599     FIFO_INSTANCE_BLOCK *pInstanceBlock;
600     MEMORY_DESCRIPTOR    instMemDesc;
601     NV_ADDRESS_SPACE     instAperture;
602     CHANNEL_ITERATOR     chanIt;
603 
604     NV_ASSERT_OR_RETURN(pInst != NULL, NV_ERR_INVALID_ARGUMENT);
605     NV_ASSERT_OR_RETURN(ppKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT);
606     NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE);
607 
608     *ppKernelChannel = NULL;
609 
610     switch (pInst->aperture)
611     {
612         case INST_BLOCK_APERTURE_SYSTEM_COHERENT_MEMORY:
613         case INST_BLOCK_APERTURE_SYSTEM_NON_COHERENT_MEMORY:
614             instAperture = ADDR_SYSMEM;
615             break;
616         case INST_BLOCK_APERTURE_VIDEO_MEMORY:
617             instAperture = ADDR_FBMEM;
618             break;
619         default:
620             NV_PRINTF(LEVEL_ERROR, "unknown inst target 0x%x\n", pInst->aperture);
621             DBG_BREAKPOINT();
622             return NV_ERR_INVALID_ADDRESS;
623     }
624 
625     //
626     // The MMU_PTE version of aperture is what the HW should always
627     // report for an instance block. Compare the SW defines against
628     // these values here.
629     //
630     VERIFY_INST_BLOCK_APERTURE(NV_MMU_PTE_APERTURE_VIDEO_MEMORY,
631                                NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY,
632                                NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY);
633 
634     memdescCreateExisting(&instMemDesc, pGpu, NV_RAMIN_ALLOC_SIZE,
635                           instAperture, NV_MEMORY_UNCACHED,
636                           MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE);
637 
638     memdescDescribe(&instMemDesc, instAperture, pInst->address, NV_RAMIN_ALLOC_SIZE);
639 
640     kfifoGetChannelIterator(pGpu, pKernelFifo, &chanIt, INVALID_RUNLIST_ID);
641     while (kfifoGetNextKernelChannel(pGpu, pKernelFifo, &chanIt, &pKernelChannel) == NV_OK)
642     {
643         NV_ASSERT_OR_ELSE(pKernelChannel != NULL, continue);
644 
645         pInstanceBlock = pKernelChannel->pFifoHalData[gpumgrGetSubDeviceInstanceFromGpu(pGpu)];
646 
647         if (pInstanceBlock != NULL &&
648             pInstanceBlock->pInstanceBlockDesc != NULL &&
649             kchannelGetGfid(pKernelChannel) == pInst->gfid &&
650             memmgrComparePhysicalAddresses_HAL(pGpu, pMemoryManager,
651                 kgmmuGetHwPteApertureFromMemdesc(GPU_GET_KERNEL_GMMU(pGpu),
652                     pInstanceBlock->pInstanceBlockDesc),
653                 memdescGetPhysAddr(pInstanceBlock->pInstanceBlockDesc,
654                                    AT_GPU, 0),
655                 kgmmuGetHwPteApertureFromMemdesc(GPU_GET_KERNEL_GMMU(pGpu),
656                                                 &instMemDesc),
657                 memdescGetPhysAddr(&instMemDesc, AT_GPU, 0)))
658         {
659                 *ppKernelChannel = pKernelChannel;
660                 memdescDestroy(&instMemDesc);
661                 return NV_OK;
662         }
663     }
664 
665     NV_PRINTF(LEVEL_INFO,
666               "No channel found for instance 0x%016llx (target 0x%x)\n",
667               memdescGetPhysAddr(&instMemDesc, AT_GPU, 0),
668               kgmmuGetHwPteApertureFromMemdesc(GPU_GET_KERNEL_GMMU(pGpu), &instMemDesc));
669     memdescDestroy(&instMemDesc);
670 
671     return NV_ERR_INVALID_CHANNEL;
672 }
673 
674 static inline NvBool
_isEngineInfoTypeValidForOnlyHostDriven(ENGINE_INFO_TYPE type)675 _isEngineInfoTypeValidForOnlyHostDriven(ENGINE_INFO_TYPE type)
676 {
677     switch (type)
678     {
679         case ENGINE_INFO_TYPE_RUNLIST:
680         case ENGINE_INFO_TYPE_RUNLIST_PRI_BASE:
681         case ENGINE_INFO_TYPE_RUNLIST_ENGINE_ID:
682         case ENGINE_INFO_TYPE_PBDMA_ID:
683         case ENGINE_INFO_TYPE_CHRAM_PRI_BASE:
684         case ENGINE_INFO_TYPE_FIFO_TAG:
685             return NV_TRUE;
686         case ENGINE_INFO_TYPE_ENG_DESC:
687         case ENGINE_INFO_TYPE_RM_ENGINE_TYPE:
688         case ENGINE_INFO_TYPE_MMU_FAULT_ID:
689         case ENGINE_INFO_TYPE_RC_MASK:
690         case ENGINE_INFO_TYPE_RESET:
691         case ENGINE_INFO_TYPE_INTR:
692         case ENGINE_INFO_TYPE_MC:
693         case ENGINE_INFO_TYPE_DEV_TYPE_ENUM:
694         case ENGINE_INFO_TYPE_INSTANCE_ID:
695         case ENGINE_INFO_TYPE_IS_HOST_DRIVEN_ENGINE:
696             // The bool itself is valid for non-host-driven engines too.
697         case ENGINE_INFO_TYPE_INVALID:
698             return NV_FALSE;
699         default:
700             // Ensure that this function covers every value in ENGINE_INFO_TYPE
701             NV_ASSERT(0 && "check all ENGINE_INFO_TYPE are classified as host-driven or not");
702             return NV_FALSE;
703     }
704 }
705 
706 
707 NV_STATUS
kfifoEngineInfoXlate_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,ENGINE_INFO_TYPE inType,NvU32 inVal,ENGINE_INFO_TYPE outType,NvU32 * pOutVal)708 kfifoEngineInfoXlate_GM107
709 (
710     OBJGPU           *pGpu,
711     KernelFifo       *pKernelFifo,
712     ENGINE_INFO_TYPE  inType,
713     NvU32             inVal,
714     ENGINE_INFO_TYPE  outType,
715     NvU32            *pOutVal
716 )
717 {
718     const ENGINE_INFO *pEngineInfo       = kfifoGetEngineInfo(pKernelFifo);
719     FIFO_ENGINE_LIST  *pFoundInputEngine = NULL;
720 
721     NV_ASSERT_OR_RETURN(pOutVal != NULL, NV_ERR_INVALID_ARGUMENT);
722 
723     // PBDMA_ID can only be inType
724     NV_ASSERT_OR_RETURN(outType != ENGINE_INFO_TYPE_PBDMA_ID,
725                         NV_ERR_INVALID_ARGUMENT);
726 
727     if (pEngineInfo == NULL)
728     {
729         NV_ASSERT_OK_OR_RETURN(kfifoConstructEngineList_HAL(pGpu, pKernelFifo));
730         pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
731     }
732     NV_ASSERT_OR_RETURN(pEngineInfo != NULL, NV_ERR_INVALID_STATE);
733 
734     if (inType == ENGINE_INFO_TYPE_INVALID)
735     {
736         NV_ASSERT_OR_RETURN(inVal < pEngineInfo->engineInfoListSize,
737                             NV_ERR_INVALID_ARGUMENT);
738         pFoundInputEngine = &pEngineInfo->engineInfoList[inVal];
739     }
740     else
741     {
742         NvU32 i;
743         for (i = 0;
744              (i < pEngineInfo->engineInfoListSize) &&
745              (pFoundInputEngine == NULL);
746              ++i)
747         {
748             FIFO_ENGINE_LIST *pThisEngine = &pEngineInfo->engineInfoList[i];
749 
750             if (inType == ENGINE_INFO_TYPE_PBDMA_ID)
751             {
752                 NvU32 j;
753                 for (j = 0; j < pThisEngine->numPbdmas; ++j)
754                 {
755                     if (pThisEngine->pbdmaIds[j] == inVal)
756                     {
757                         pFoundInputEngine = pThisEngine;
758                         break;
759                     }
760                 }
761             }
762             else if (pThisEngine->engineData[inType] == inVal)
763             {
764                 pFoundInputEngine = pThisEngine;
765             }
766         }
767     }
768 
769     if (pFoundInputEngine == NULL)
770     {
771         return NV_ERR_OBJECT_NOT_FOUND;
772     }
773 
774     if (_isEngineInfoTypeValidForOnlyHostDriven(outType) &&
775         !pFoundInputEngine->engineData[ENGINE_INFO_TYPE_IS_HOST_DRIVEN_ENGINE])
776     {
777         //
778         // Bug 3748452 TODO
779         // Bug 3772199 TODO
780         //
781         // We can't easily just return an error here because hundreds of
782         // callsites would fail their asserts. The above two bugs track fixing
783         // all callsites after which, we can uncomment this.
784         //
785         // return NV_ERR_OBJECT_NOT_FOUND;
786         //
787         NV_PRINTF(LEVEL_ERROR,
788             "Asked for host-specific type(0x%x) for non-host engine type(0x%x),val(0x%08x)\n",
789             outType, inType, inVal);
790     }
791 
792     *pOutVal = pFoundInputEngine->engineData[outType];
793     return NV_OK;
794 }
795 
796 /**
797  * @brief Get the local maximum number of subctx allowed in this TSG
798  */
799 NvU32
kfifoChannelGroupGetLocalMaxSubcontext_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,KernelChannelGroup * pKernelChannelGroup,NvBool bLegacyMode)800 kfifoChannelGroupGetLocalMaxSubcontext_GM107
801 (
802     OBJGPU             *pGpu,
803     KernelFifo         *pKernelFifo,
804     KernelChannelGroup *pKernelChannelGroup,
805     NvBool              bLegacyMode
806 )
807 {
808     // Pre-AMPERE, each channel group has the global maximum available
809     return kfifoGetMaxSubcontext_HAL(pGpu, pKernelFifo, bLegacyMode);
810 }
811 
812 void
kfifoSetupUserD_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,MEMORY_DESCRIPTOR * pMemDesc)813 kfifoSetupUserD_GM107
814 (
815     OBJGPU *pGpu,
816     KernelFifo *pKernelFifo,
817     MEMORY_DESCRIPTOR *pMemDesc
818 )
819 {
820     TRANSFER_SURFACE tSurf = {.pMemDesc = pMemDesc, .offset = 0};
821 
822     NV_ASSERT_OK(memmgrMemSet(GPU_GET_MEMORY_MANAGER(pGpu), &tSurf, 0,
823         NV_RAMUSERD_CHAN_SIZE, TRANSFER_FLAGS_NONE));
824 }
825 /**
826  * @brief return number of HW engines
827  *
828  *  Can be used to loop over all engines in the system by looping from 0
829  *  through the value returned by this function and then using
830  *  kfifoEngineInfoXlate() with an input type of ENGINE_INFO_TYPE_INVALID.
831  *
832  * @param[in] pGpu          OBJGPU pointer
833  * @param[in] pKernelFifo   KernelFifo pointer
834  *
835  * @returns number of HW engines present on chip.
836  */
837 NvU32
kfifoGetNumEngines_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo)838 kfifoGetNumEngines_GM107
839 (
840     OBJGPU     *pGpu,
841     KernelFifo *pKernelFifo
842 )
843 {
844     const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
845 
846     if (pEngineInfo == NULL)
847     {
848         NV_ASSERT_OR_RETURN(kfifoConstructEngineList_HAL(pGpu, pKernelFifo) == NV_OK, 0);
849 
850         pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
851         NV_ASSERT_OR_RETURN(pEngineInfo != NULL, 0);
852     }
853 
854     NV_ASSERT(pEngineInfo->engineInfoListSize);
855 
856     // we don't count the SW engine entry at the end of the list
857     return pEngineInfo->engineInfoListSize-1;
858 }
859 
860 /**
861  * @brief Retrieves the name of the engine corresponding to the given @ref ENGINE_INFO_TYPE
862  *
863  * @param pKernelFifo
864  * @param[in] inType
865  * @param[in] inVal
866  *
867  * @returns a string
868  */
869 const char *
kfifoGetEngineName_GM107(KernelFifo * pKernelFifo,ENGINE_INFO_TYPE inType,NvU32 inVal)870 kfifoGetEngineName_GM107
871 (
872     KernelFifo *pKernelFifo,
873     ENGINE_INFO_TYPE inType,
874     NvU32 inVal
875 )
876 {
877     const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
878     NvU32 i;
879 
880     if (inType == ENGINE_INFO_TYPE_INVALID)
881     {
882         NV_ASSERT_OR_RETURN (inVal < pEngineInfo->engineInfoListSize, NULL);
883         return pEngineInfo->engineInfoList[inVal].engineName;
884     }
885     for (i = 0; i < pEngineInfo->engineInfoListSize; ++i)
886     {
887         if (pEngineInfo->engineInfoList[i].engineData[inType] == inVal)
888         {
889             return pEngineInfo->engineInfoList[i].engineName;
890         }
891     }
892 
893     return "UNKNOWN";
894 }
895 
896 /**
897  * @brief Returns the maximum possible number of runlists.
898  *
899  * Returns a number which represents the limit of any runlistId indexed
900  * registers in hardware.  Does not necessarily return how many runlists are
901  * active.  In the range of 0..kfifoGetMaxNumRunlists() there may be runlists
902  * that are not used.
903  *
904  * @param pGpu
905  * @param pKernelFifo
906  */
907 NvU32
kfifoGetMaxNumRunlists_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo)908 kfifoGetMaxNumRunlists_GM107
909 (
910     OBJGPU     *pGpu,
911     KernelFifo *pKernelFifo
912 )
913 {
914     const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
915 
916     return pEngineInfo->maxNumRunlists;
917 }
918 
919 NV_STATUS
kfifoGetEnginePbdmaIds_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,ENGINE_INFO_TYPE type,NvU32 val,NvU32 ** ppPbdmaIds,NvU32 * pNumPbdmas)920 kfifoGetEnginePbdmaIds_GM107
921 (
922     OBJGPU *pGpu,
923     KernelFifo *pKernelFifo,
924     ENGINE_INFO_TYPE type,
925     NvU32 val,
926     NvU32 **ppPbdmaIds,
927     NvU32 *pNumPbdmas
928 )
929 {
930     const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
931     NvU32 i;
932 
933     if (pEngineInfo == NULL)
934     {
935         NV_ASSERT_OK_OR_RETURN(kfifoConstructEngineList_HAL(pGpu, pKernelFifo));
936 
937         pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
938         NV_ASSERT_OR_RETURN(pEngineInfo != NULL, NV_ERR_INVALID_STATE);
939     }
940 
941     if (type == ENGINE_INFO_TYPE_INVALID)
942     {
943         NV_ASSERT_OR_RETURN(val < pEngineInfo->engineInfoListSize, NV_ERR_INVALID_ARGUMENT);
944         *ppPbdmaIds = pEngineInfo->engineInfoList[val].pbdmaIds;
945         *pNumPbdmas = pEngineInfo->engineInfoList[val].numPbdmas;
946         return NV_OK;
947     }
948 
949     for (i = 0; i < pEngineInfo->engineInfoListSize; i++)
950     {
951         if (pEngineInfo->engineInfoList[i].engineData[type] == val)
952         {
953             *ppPbdmaIds = pEngineInfo->engineInfoList[i].pbdmaIds;
954             *pNumPbdmas = pEngineInfo->engineInfoList[i].numPbdmas;
955             return NV_OK;
956         }
957     }
958 
959     return NV_ERR_INVALID_ARGUMENT;
960 }
961 
962 /**
963  * @brief finds all engines on the same pbdma as the input
964  *
965  * pPartnerListParams->partnershipClassId is currently ignored.
966  *
967  * @param pGpu
968  * @param pKernelFifo
969  * @param[in/out] pPartnerListParams engineType is input, partnerList/numPartners are ouput
970  *
971  * @returns NV_OK if successful, error otherwise
972  */
973 NV_STATUS
kfifoGetEnginePartnerList_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,NV2080_CTRL_GPU_GET_ENGINE_PARTNERLIST_PARAMS * pPartnerListParams)974 kfifoGetEnginePartnerList_GM107
975 (
976     OBJGPU *pGpu,
977     KernelFifo *pKernelFifo,
978     NV2080_CTRL_GPU_GET_ENGINE_PARTNERLIST_PARAMS *pPartnerListParams
979 )
980 {
981     const NvU32 numEngines = kfifoGetNumEngines_HAL(pGpu, pKernelFifo);
982     NvU32 i;
983     NvU32 srcRunlist;
984     NvU32 runlist;
985     NvU32 *pSrcPbdmaIds;
986     NvU32 numSrcPbdmaIds;
987     NvU32 srcPbdmaId;
988     NvU32 *pPbdmaIds;
989     NvU32 numPbdmaIds;
990     NvU32 numClasses = 0;
991     ENGDESCRIPTOR engDesc;
992     RM_ENGINE_TYPE rmEngineType = gpuGetRmEngineType(pPartnerListParams->engineType);
993 
994     if (pPartnerListParams->runqueue >= kfifoGetNumRunqueues_HAL(pGpu, pKernelFifo))
995         return NV_ERR_INVALID_ARGUMENT;
996 
997     NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
998                                                     ENGINE_INFO_TYPE_RM_ENGINE_TYPE,
999                                                     (NvU32)rmEngineType,
1000                                                     ENGINE_INFO_TYPE_RUNLIST,
1001                                                     &srcRunlist));
1002 
1003     NV_ASSERT_OK_OR_RETURN(kfifoGetEnginePbdmaIds_HAL(pGpu, pKernelFifo,
1004                                                       ENGINE_INFO_TYPE_RM_ENGINE_TYPE,
1005                                                       (NvU32)rmEngineType,
1006                                                       &pSrcPbdmaIds,
1007                                                       &numSrcPbdmaIds));
1008 
1009     pPartnerListParams->numPartners = 0;
1010 
1011     // Get the PBDMA ID for the runqueue-th runqueue
1012     if (pPartnerListParams->runqueue >= numSrcPbdmaIds)
1013     {
1014         return NV_ERR_INVALID_ARGUMENT;
1015     }
1016     srcPbdmaId = pSrcPbdmaIds[pPartnerListParams->runqueue];
1017 
1018     //
1019     // Find all engines sharing a runlist with the input engine, add each to
1020     // the output array.
1021     //
1022     for (i = 0; i < numEngines; i++)
1023     {
1024         NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
1025                                                         ENGINE_INFO_TYPE_INVALID, i,
1026                                                         ENGINE_INFO_TYPE_ENG_DESC, &engDesc));
1027 
1028         NV_ASSERT_OK_OR_RETURN(gpuGetClassList(pGpu, &numClasses, NULL, engDesc));
1029         if (numClasses == 0)
1030         {
1031             NV_PRINTF(LEVEL_INFO,
1032                       "EngineID %x is not part classDB, skipping\n",
1033                       engDesc);
1034             continue;
1035         }
1036 
1037         NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
1038                                                         ENGINE_INFO_TYPE_INVALID, i,
1039                                                         ENGINE_INFO_TYPE_RUNLIST, &runlist));
1040 
1041         if (runlist == srcRunlist)
1042         {
1043             NvU32 j;
1044             RM_ENGINE_TYPE localRmEngineType;
1045 
1046             NV_ASSERT_OK_OR_RETURN(kfifoGetEnginePbdmaIds_HAL(pGpu, pKernelFifo,
1047                                                               ENGINE_INFO_TYPE_INVALID, i,
1048                                                               &pPbdmaIds, &numPbdmaIds));
1049 
1050             for (j = 0; j < numPbdmaIds; j++)
1051             {
1052                 if (pPbdmaIds[j] == srcPbdmaId)
1053                 {
1054                     NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
1055                                                                     ENGINE_INFO_TYPE_INVALID, i,
1056                                                                     ENGINE_INFO_TYPE_RM_ENGINE_TYPE, (NvU32 *)&localRmEngineType));
1057 
1058                     // Don't include input in output list
1059                     if (localRmEngineType != rmEngineType)
1060                     {
1061                         pPartnerListParams->partnerList[pPartnerListParams->numPartners++] =
1062                             gpuGetNv2080EngineType(localRmEngineType);
1063 
1064                         if (pPartnerListParams->numPartners >= NV2080_CTRL_GPU_MAX_ENGINE_PARTNERS)
1065                             return NV_ERR_INVALID_ARGUMENT;
1066                     }
1067                 }
1068             }
1069         }
1070     }
1071 
1072     return NV_OK;
1073 }
1074 
1075 /**
1076  * @brief Check if the runlist has TSG support
1077  *
1078  * Currently, we only enable the TSG runlist for GR
1079  *
1080  *  @return NV_TRUE if TSG is supported, NV_FALSE if not
1081  */
1082 NvBool
kfifoRunlistIsTsgHeaderSupported_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,NvU32 runlistId)1083 kfifoRunlistIsTsgHeaderSupported_GM107
1084 (
1085     OBJGPU *pGpu,
1086     KernelFifo *pKernelFifo,
1087     NvU32 runlistId
1088 )
1089 {
1090     NvU32 tmp_runlist;
1091 
1092     if (kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, ENGINE_INFO_TYPE_ENG_DESC,
1093         ENG_GR(0), ENGINE_INFO_TYPE_RUNLIST, &tmp_runlist) != NV_OK)
1094     {
1095         NV_PRINTF(LEVEL_ERROR,
1096                   "can't find runlist ID for engine ENG_GR(0)!\n");
1097         NV_ASSERT(0);
1098         return NV_FALSE;
1099     }
1100 
1101     return tmp_runlist == runlistId;
1102 }
1103 
1104 /**
1105  * @brief Get the runlist entry size
1106  *
1107  * @param pKernelFifo
1108  *
1109  * @return size in bytes
1110  */
1111 NvU32
kfifoRunlistGetEntrySize_GM107(KernelFifo * pKernelFifo)1112 kfifoRunlistGetEntrySize_GM107
1113 (
1114     KernelFifo *pKernelFifo
1115 )
1116 {
1117     return NV_RAMRL_ENTRY_SIZE;
1118 }
1119 
1120 /**
1121  * @brief Get the runlist base shift amount
1122  *
1123  * @param pKernelFifo
1124  *
1125  * @return shift amount
1126  */
1127 NvU32
kfifoRunlistGetBaseShift_GM107(KernelFifo * pKernelFifo)1128 kfifoRunlistGetBaseShift_GM107
1129 (
1130     KernelFifo *pKernelFifo
1131 )
1132 {
1133     return NV_RAMRL_BASE_SHIFT;
1134 }
1135 
1136 /**
1137  * @brief Pre-allocate BAR1 userd space
1138  *
1139  * @param   pGpu
1140  * @param   pKernelFifo
1141  *
1142  * @returns NV_STATUS
1143  */
1144 NV_STATUS
kfifoPreAllocUserD_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo)1145 kfifoPreAllocUserD_GM107
1146 (
1147     OBJGPU     *pGpu,
1148     KernelFifo *pKernelFifo
1149 )
1150 {
1151     OBJGPU     *pParentGpu             = gpumgrGetParentGPU(pGpu);
1152     KernelFifo *pParentKernelFifo      = GPU_GET_KERNEL_FIFO(pParentGpu);
1153     KernelBus  *pKernelBus             = GPU_GET_KERNEL_BUS(pGpu);
1154     NvBool      bCoherentCpuMapping    = NV_FALSE;
1155     NV_STATUS   status                 = NV_OK;
1156     NvU64       temp                   = 0;
1157     NvU32       userdSize;
1158     NvU32       userdShift;
1159     NvU32       numChannels;
1160     NvBool      bFifoFirstInit;
1161     NvU32       flags                  = MEMDESC_FLAGS_NONE;
1162     NvU32       mapFlags               = BUS_MAP_FB_FLAGS_MAP_DOWNWARDS |
1163                                          BUS_MAP_FB_FLAGS_MAP_UNICAST;
1164     NvU32       currentGpuInst         = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
1165     CHID_MGR   *pChidMgr               = kfifoGetChidMgr(pGpu, pKernelFifo, 0);
1166 
1167     MemoryManager     *pMemoryManager    = GPU_GET_MEMORY_MANAGER(pGpu);
1168     KernelMIGManager  *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
1169     PREALLOCATED_USERD_INFO *pUserdInfo  = &pParentKernelFifo->userdInfo;
1170 
1171     NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE);
1172 
1173     // We don't support RM allocated USERD for vGPU guest with SRIOV
1174     if (IS_VIRTUAL_WITH_SRIOV(pGpu))
1175     {
1176         return NV_OK;
1177     }
1178 
1179     bCoherentCpuMapping = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING);
1180 
1181     if (pUserdInfo->userdBar1CpuPtr == NULL)
1182     {
1183         bFifoFirstInit = NV_TRUE;
1184     }
1185     else
1186     {
1187         mapFlags |= BUS_MAP_FB_FLAGS_MAP_OFFSET_FIXED;
1188         bFifoFirstInit = NV_FALSE;
1189     }
1190 
1191     //
1192     // Allocate the physical memory associated with the UserD if this is
1193     // the first GPU to init fifo. This relies on the assumption that
1194     // UserD is shared physmem.
1195     //
1196     if (bFifoFirstInit)
1197     {
1198         pUserdInfo->userdBar1MapStartOffset   =  0;
1199         pUserdInfo->userdBar1MapSize          =  0;
1200 
1201         // This is a WAR for HW bug 600241
1202         if (pUserdInfo->userdAperture == ADDR_SYSMEM)
1203         {
1204             pKernelFifo->bUserdInSystemMemory = NV_TRUE;
1205         }
1206     }
1207 
1208     kfifoGetUserdSizeAlign_HAL(pKernelFifo, &userdSize, &userdShift);
1209 
1210     numChannels = kfifoChidMgrGetNumChannels(pGpu, pKernelFifo, pChidMgr);
1211 
1212     // Alloc USERD of size numChannels * sizeof( USERD ) for each gpu
1213     status = memdescCreate(&pUserdInfo->userdPhysDesc[currentGpuInst], pGpu,
1214                            userdSize * numChannels,
1215                            1ULL << userdShift,
1216                            NV_TRUE,
1217                            pUserdInfo->userdAperture,
1218                            pUserdInfo->userdAttr,
1219                            flags);
1220     if (status != NV_OK)
1221     {
1222         NV_PRINTF(LEVEL_ERROR,
1223                   "Could not memdescCreate for USERD for %x #channels\n",
1224                   numChannels);
1225         DBG_BREAKPOINT();
1226         goto fail;
1227     }
1228     temp = pUserdInfo->userdPhysDesc[currentGpuInst]->Size;
1229 
1230     //
1231     // For vGPU, do not allocate USERD memory in guest.
1232     // vGPU does all HW management in host, so host RM will
1233     // allocate the real USERD memory.
1234     //
1235     if (IS_VIRTUAL(pGpu))
1236     {
1237         // Force page size to 4KB to match host phys access
1238         memmgrSetMemDescPageSize_HAL(pGpu, pMemoryManager,
1239                                      pUserdInfo->userdPhysDesc[currentGpuInst],
1240                                      AT_GPU, RM_ATTR_PAGE_SIZE_4KB);
1241         if (bFifoFirstInit)
1242         {
1243             pUserdInfo->userdBar1MapStartOffset = kfifoGetUserdBar1MapStartOffset_HAL(pGpu, pKernelFifo);
1244         }
1245     }
1246     else
1247     {
1248         memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_81,
1249                         pUserdInfo->userdPhysDesc[currentGpuInst]);
1250         if (status != NV_OK)
1251         {
1252             NV_PRINTF(LEVEL_ERROR,
1253                       "Could not allocate USERD for %x #channels\n",
1254                       numChannels);
1255             DBG_BREAKPOINT();
1256             goto fail;
1257         }
1258 
1259         // Force page size to 4KB in broadcast to match host phys access
1260         memmgrSetMemDescPageSize_HAL(pGpu, pMemoryManager, pUserdInfo->userdPhysDesc[currentGpuInst],
1261                                      AT_GPU, RM_ATTR_PAGE_SIZE_4KB);
1262 
1263         //
1264         // If coherent link is available, just get a coherent mapping to USERD and
1265         // lie about the BAR1 offset, since we are not using BAR1
1266         // TODO: Make these bar1 offsets unicast on each gpu as well
1267         //
1268         if (bCoherentCpuMapping &&
1269             (memdescGetAddressSpace(pUserdInfo->userdPhysDesc[currentGpuInst]) == ADDR_FBMEM))
1270         {
1271 
1272             NV_PRINTF(LEVEL_INFO, "Mapping USERD with coherent link (USERD in FBMEM).\n");
1273             NV_ASSERT(pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED));
1274             NV_ASSERT(pUserdInfo->userdPhysDesc[currentGpuInst]->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS);
1275 
1276             if (bFifoFirstInit)
1277             {
1278                 pUserdInfo->userdBar1MapStartOffset =  pUserdInfo->userdPhysDesc[currentGpuInst]->_pteArray[0] +
1279                                                        pUserdInfo->userdPhysDesc[currentGpuInst]->PteAdjust;
1280             }
1281         }
1282         //
1283         // get sysmem mapping for USERD if USERD is in sysmem and reflected BAR access is not allowed
1284         //
1285         else if ((bCoherentCpuMapping &&
1286                  memdescGetAddressSpace(pUserdInfo->userdPhysDesc[currentGpuInst]) == ADDR_SYSMEM &&
1287                  !kbusIsReflectedMappingAccessAllowed(pKernelBus)) ||
1288                  pGpu->getProperty(pGpu, PDB_PROP_GPU_BAR1_BAR2_DISABLED))
1289         {
1290             NV_PRINTF(LEVEL_INFO, "Mapping USERD with coherent link (USERD in SYSMEM).\n");
1291 
1292             if (bFifoFirstInit)
1293             {
1294                 pUserdInfo->userdBar1MapStartOffset =
1295                         memdescGetPhysAddr(pUserdInfo->userdPhysDesc[currentGpuInst], AT_CPU, 0);
1296             }
1297         }
1298         else
1299         {
1300             // vGpu may boot with partitioning enabled but that's not true for host RM
1301             if ((pKernelMIGManager != NULL) && kmigmgrIsMIGMemPartitioningEnabled(pGpu, pKernelMIGManager))
1302             {
1303                 status = NV_ERR_INVALID_STATE;
1304                 NV_PRINTF(LEVEL_ERROR, "Pre-allocated USERD is not supported with MIG\n");
1305                 DBG_BREAKPOINT();
1306                 goto fail;
1307             }
1308             // Now BAR1 map it
1309             status = kbusMapFbAperture_HAL(pGpu, pKernelBus, pUserdInfo->userdPhysDesc[currentGpuInst], 0,
1310                                            &pUserdInfo->userdBar1MapStartOffset,
1311                                            &temp, mapFlags | BUS_MAP_FB_FLAGS_PRE_INIT, NULL);
1312 
1313             if (status != NV_OK)
1314             {
1315                 NV_PRINTF(LEVEL_ERROR, "Could not map USERD to BAR1\n");
1316                 DBG_BREAKPOINT();
1317                 goto fail;
1318             }
1319 
1320             // Add current GPU to list of GPUs referencing pFifo userD bar1
1321             pUserdInfo->userdBar1RefMask |= NVBIT(pGpu->gpuInstance);
1322         }
1323     }
1324 
1325     if (bFifoFirstInit)
1326     {
1327         pUserdInfo->userdBar1MapSize = NvU64_LO32(temp);
1328 
1329         if (bCoherentCpuMapping &&
1330             (memdescGetAddressSpace(pUserdInfo->userdPhysDesc[currentGpuInst]) == ADDR_FBMEM))
1331         {
1332             pUserdInfo->userdBar1CpuPtr = kbusMapCoherentCpuMapping_HAL(pGpu, pKernelBus,
1333                                              pUserdInfo->userdPhysDesc[currentGpuInst]);
1334             status = pUserdInfo->userdBar1CpuPtr == NULL ? NV_ERR_GENERIC : NV_OK;
1335         }
1336         else if ((bCoherentCpuMapping &&
1337                  memdescGetAddressSpace(pUserdInfo->userdPhysDesc[currentGpuInst]) == ADDR_SYSMEM &&
1338                  !kbusIsReflectedMappingAccessAllowed(pKernelBus)) ||
1339                  pGpu->getProperty(pGpu, PDB_PROP_GPU_BAR1_BAR2_DISABLED))
1340         {
1341             status = osMapPciMemoryKernelOld(pGpu,
1342                                              pUserdInfo->userdBar1MapStartOffset,
1343                                              pUserdInfo->userdBar1MapSize,
1344                                              NV_PROTECT_READ_WRITE,
1345                                              (void**)&pUserdInfo->userdBar1CpuPtr,
1346                                              NV_MEMORY_UNCACHED);
1347         }
1348         else
1349         {
1350             // Cpu map the BAR1 snoop range
1351             status = osMapPciMemoryKernelOld(pGpu, gpumgrGetGpuPhysFbAddr(pGpu) +
1352                                              pUserdInfo->userdBar1MapStartOffset,
1353                                              pUserdInfo->userdBar1MapSize,
1354                                              NV_PROTECT_READ_WRITE,
1355                                              (void**)&pUserdInfo->userdBar1CpuPtr,
1356                                              NV_MEMORY_UNCACHED);
1357         }
1358 
1359         if ((pUserdInfo->userdBar1CpuPtr == NULL) && (status != NV_OK))
1360         {
1361             NV_PRINTF(LEVEL_ERROR, "Could not cpu map BAR1 snoop range\n");
1362             DBG_BREAKPOINT();
1363             goto fail;
1364         }
1365     }
1366 
1367     NV_PRINTF(LEVEL_INFO,
1368               "USERD Preallocated phys @ 0x%llx bar1 offset @ 0x%llx of size 0x%x\n",
1369               memdescGetPhysAddr(pUserdInfo->userdPhysDesc[currentGpuInst], AT_GPU, 0),
1370               pUserdInfo->userdBar1MapStartOffset,
1371               pUserdInfo->userdBar1MapSize);
1372 
1373     return status;
1374 
1375 fail:
1376     kfifoFreePreAllocUserD_HAL(pGpu, pKernelFifo);
1377 
1378     return status;
1379 }
1380 
1381 /**
1382  * @brief Free the pre-allocated BAR1 userd space
1383  *
1384  * @param   pGpu
1385  * @param   pKernelFifo
1386  *
1387  * @returns NV_STATUS
1388  */
1389 void
kfifoFreePreAllocUserD_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo)1390 kfifoFreePreAllocUserD_GM107
1391 (
1392     OBJGPU     *pGpu,
1393     KernelFifo *pKernelFifo
1394 )
1395 {
1396     OBJGPU            *pParentGpu           = gpumgrGetParentGPU(pGpu);
1397     KernelBus         *pKernelBus           = GPU_GET_KERNEL_BUS(pGpu);
1398     NvU32              currentGpuInst       = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
1399     KernelFifo        *pParentKernelFifo    = GPU_GET_KERNEL_FIFO(pParentGpu);
1400     PREALLOCATED_USERD_INFO *pUserdInfo     = &pParentKernelFifo->userdInfo;
1401     NvBool             bCoherentCpuMapping  = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING) &&
1402         (memdescGetAddressSpace(pUserdInfo->userdPhysDesc[currentGpuInst]) == ADDR_FBMEM);
1403 
1404     // We don't support RM allocated USERD for vGPU guest with SRIOV
1405     if (IS_VIRTUAL_WITH_SRIOV(pGpu))
1406     {
1407         return;
1408     }
1409 
1410     if (gpumgrGetBcEnabledStatus(pGpu))
1411     {
1412         DBG_BREAKPOINT();
1413     }
1414 
1415     if (bCoherentCpuMapping)
1416     {
1417         NV_PRINTF(LEVEL_INFO, "Unmapping USERD from NVLINK.\n");
1418         NV_ASSERT(pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED));
1419     }
1420 
1421     if (pUserdInfo->userdBar1CpuPtr)
1422     {
1423         if (bCoherentCpuMapping)
1424         {
1425             kbusUnmapCoherentCpuMapping_HAL(pGpu, pKernelBus,
1426                 pUserdInfo->userdPhysDesc[currentGpuInst]);
1427         }
1428         else
1429         {
1430             osUnmapPciMemoryKernelOld(pGpu, pUserdInfo->userdBar1CpuPtr);
1431         }
1432 
1433         pUserdInfo->userdBar1CpuPtr = NULL;
1434     }
1435 
1436     if (pUserdInfo->userdBar1MapSize)
1437     {
1438         if ((!IS_VIRTUAL(pGpu)) && (!bCoherentCpuMapping))
1439         {
1440             if ((pUserdInfo->userdBar1RefMask & NVBIT(pGpu->gpuInstance)) != 0)
1441             {
1442                 //
1443                 // Unmap in UC for each GPU with a pKernelFifo userd
1444                 // reference mapped through bar1
1445                 //
1446                 kbusUnmapFbAperture_HAL(pGpu, pKernelBus,
1447                                         pUserdInfo->userdPhysDesc[currentGpuInst],
1448                                         pUserdInfo->userdBar1MapStartOffset,
1449                                         pUserdInfo->userdBar1MapSize,
1450                                         BUS_MAP_FB_FLAGS_MAP_UNICAST | BUS_MAP_FB_FLAGS_PRE_INIT);
1451                 pUserdInfo->userdBar1RefMask &= (~NVBIT(pGpu->gpuInstance));
1452             }
1453 
1454         }
1455     }
1456 
1457     // Unallocated memdescFrees are allowed.
1458     memdescFree(pUserdInfo->userdPhysDesc[currentGpuInst]);
1459     memdescDestroy(pUserdInfo->userdPhysDesc[currentGpuInst]);
1460     pUserdInfo->userdPhysDesc[currentGpuInst] = NULL;
1461     NV_PRINTF(LEVEL_INFO, "Freeing preallocated USERD phys and bar1 range\n");
1462 }
1463 
1464 //
1465 // Returns the BAR1 offset and size of the entire USERD mapping.
1466 //
1467 NV_STATUS
kfifoGetUserdBar1MapInfo_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,NvU64 * pBar1MapOffset,NvU32 * pBar1MapSize)1468 kfifoGetUserdBar1MapInfo_GM107
1469 (
1470     OBJGPU     *pGpu,
1471     KernelFifo *pKernelFifo,
1472     NvU64      *pBar1MapOffset,
1473     NvU32      *pBar1MapSize
1474 )
1475 {
1476     const PREALLOCATED_USERD_INFO *pUserdInfo = kfifoGetPreallocatedUserdInfo(pKernelFifo);
1477 
1478     // We don't support RM allocated USERD in vGPU guest with SRIOV
1479     if (IS_VIRTUAL_WITH_SRIOV(pGpu))
1480     {
1481         *pBar1MapOffset = 0;
1482         *pBar1MapSize   = 0;
1483 
1484         return NV_OK;
1485     }
1486 
1487     if (pUserdInfo->userdBar1MapSize == 0 )
1488     {
1489         NV_PRINTF(LEVEL_ERROR, "BAR1 map of USERD has not been setup yet\n");
1490         NV_ASSERT( 0 );
1491         return NV_ERR_GENERIC;
1492     }
1493 
1494     *pBar1MapOffset = pUserdInfo->userdBar1MapStartOffset;
1495     *pBar1MapSize   = pUserdInfo->userdBar1MapSize;
1496 
1497     return NV_OK;
1498 }
1499 
1500 /**
1501  * @brief Determines the aperture and attribute of memory where userd is located.
1502  *
1503  * @param pKernelFifo[in]
1504  * @param pUserdAperture[out]
1505  * @param pUserdAttribute[out]
1506  *
1507  * @returns NV_STATUS
1508  */
1509 NV_STATUS
kfifoGetUserdLocation_GM107(KernelFifo * pKernelFifo,NvU32 * pUserdAperture,NvU32 * pUserdAttribute)1510 kfifoGetUserdLocation_GM107
1511 (
1512     KernelFifo *pKernelFifo,
1513     NvU32 *pUserdAperture,
1514     NvU32 *pUserdAttribute
1515 )
1516 {
1517     const PREALLOCATED_USERD_INFO *pUserdInfo = kfifoGetPreallocatedUserdInfo(pKernelFifo);
1518 
1519     NV_ASSERT_OR_RETURN(pUserdAperture != NULL && pUserdAttribute != NULL,
1520                         NV_ERR_INVALID_POINTER);
1521 
1522     *pUserdAperture = pUserdInfo->userdAperture;
1523     *pUserdAttribute = pUserdInfo->userdAttr;
1524 
1525     return NV_OK;
1526 }
1527 
1528 /**
1529  * @brief Returns size/address shift for USERD's BAR1 mapping
1530  *
1531  * @param pKernelFifo
1532  * @param[out] pSize populated with USERD size if non-null
1533  * @param[out] pAddrShift populated with USERD address shift if non-null
1534  */
1535 void
kfifoGetUserdSizeAlign_GM107(KernelFifo * pKernelFifo,NvU32 * pSize,NvU32 * pAddrShift)1536 kfifoGetUserdSizeAlign_GM107
1537 (
1538     KernelFifo *pKernelFifo,
1539     NvU32 *pSize,
1540     NvU32 *pAddrShift
1541 )
1542 {
1543     if (pSize != NULL)
1544         *pSize = 1<<NV_RAMUSERD_BASE_SHIFT;
1545     if (pAddrShift != NULL)
1546         *pAddrShift = NV_RAMUSERD_BASE_SHIFT;
1547 }
1548 
1549 /**
1550  * @brief Determines if an engine is a host engine and if so, if it is present.
1551  *
1552  * @param pGpu
1553  * @param pKernelFifo
1554  * @param[in] engDesc
1555  * @param[out]  pPresent NV_TRUE if the engine is present, NV_FALSE if not.
1556  *
1557  * @return OK if host could determine the engine's presence.  ERROR otherwise
1558  */
1559 NV_STATUS
kfifoCheckEngine_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,NvU32 engDesc,NvBool * pPresent)1560 kfifoCheckEngine_GM107
1561 (
1562     OBJGPU     *pGpu,
1563     KernelFifo *pKernelFifo,
1564     NvU32       engDesc,
1565     NvBool     *pPresent
1566 )
1567 {
1568     NvU32 bEschedDriven = NV_FALSE;
1569     NV_STATUS status;
1570 
1571     status = kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
1572         ENGINE_INFO_TYPE_ENG_DESC,              engDesc,
1573         ENGINE_INFO_TYPE_IS_HOST_DRIVEN_ENGINE, &bEschedDriven);
1574 
1575     *pPresent = (status == NV_OK) && bEschedDriven;
1576 
1577     return NV_OK;
1578 }
1579