1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "kernel/gpu/fifo/kernel_fifo.h"
25 #include "kernel/gpu/fifo/kernel_channel.h"
26 #include "kernel/gpu/fifo/kernel_channel_group.h"
27 #include "kernel/gpu/fifo/kernel_channel_group_api.h"
28 #include "kernel/gpu/fifo/kernel_sched_mgr.h"
29 #include "gpu/mem_mgr/mem_mgr.h"
30 #include "gpu/mmu/kern_gmmu.h"
31 
32 #include "nvRmReg.h"
33 
34 #include "vgpu/rpc.h"
35 #include "gpu/bus/kern_bus.h"
36 
37 #include "published/maxwell/gm107/dev_ram.h"
38 #include "published/maxwell/gm107/dev_mmu.h"
39 
40 
41 static inline NvBool
42 _isEngineInfoTypeValidForOnlyHostDriven(ENGINE_INFO_TYPE type);
43 
44 
45 /*! Construct kfifo object */
46 NV_STATUS
47 kfifoConstructHal_GM107
48 (
49     OBJGPU     *pGpu,
50     KernelFifo *pKernelFifo
51 )
52 {
53     NV_STATUS status;
54     PREALLOCATED_USERD_INFO *pUserdInfo = &pKernelFifo->userdInfo;
55 
56     if (FLD_TEST_DRF(_REG_STR_RM, _INST_VPR, _INSTBLK, _TRUE, pGpu->instVprOverrides))
57     {
58         pKernelFifo->bInstProtectedMem = NV_TRUE;
59     }
60 
61     // Instance Memory
62     switch (DRF_VAL( _REG_STR_RM, _INST_LOC, _INSTBLK, pGpu->instLocOverrides))
63     {
64         default:
65         case NV_REG_STR_RM_INST_LOC_INSTBLK_DEFAULT:
66             if (kfifoIsMixedInstmemApertureDefAllowed(pKernelFifo))
67                 pKernelFifo->pInstAllocList  = ADDRLIST_FBMEM_PREFERRED;
68             else
69                 pKernelFifo->pInstAllocList  = ADDRLIST_FBMEM_ONLY;
70 
71             pKernelFifo->InstAttr        = NV_MEMORY_UNCACHED;
72             break;
73         case NV_REG_STR_RM_INST_LOC_INSTBLK_VID:
74             pKernelFifo->pInstAllocList  = ADDRLIST_FBMEM_ONLY;
75             pKernelFifo->InstAttr        = NV_MEMORY_UNCACHED;
76             break;
77         case NV_REG_STR_RM_INST_LOC_INSTBLK_COH:
78             pKernelFifo->pInstAllocList  = ADDRLIST_SYSMEM_ONLY;
79             pKernelFifo->InstAttr        = NV_MEMORY_CACHED;
80             break;
81         case NV_REG_STR_RM_INST_LOC_INSTBLK_NCOH:
82             pKernelFifo->pInstAllocList  = ADDRLIST_SYSMEM_ONLY;
83             pKernelFifo->InstAttr        = NV_MEMORY_UNCACHED;
84             break;
85     }
86 
87     // USERD
88     pUserdInfo->userdAperture    = ADDR_FBMEM;
89     pUserdInfo->userdAttr        = NV_MEMORY_WRITECOMBINED;
90     memdescOverrideInstLoc(DRF_VAL( _REG_STR_RM, _INST_LOC, _USERD, pGpu->instLocOverrides),
91                            "USERD",
92                            &pUserdInfo->userdAperture,
93                            &pUserdInfo->userdAttr);
94 
95     // Create child object KernelSchedMgr
96     if (kfifoIsSchedSupported(pKernelFifo))
97     {
98         pKernelFifo->pKernelSchedMgr = NULL;
99         status = objCreate(&pKernelFifo->pKernelSchedMgr, pKernelFifo, KernelSchedMgr);
100         if (status != NV_OK)
101         {
102             pKernelFifo->pKernelSchedMgr = NULL;
103             return status;
104         }
105         kschedmgrConstructPolicy(pKernelFifo->pKernelSchedMgr, pGpu);
106     }
107 
108     return NV_OK;
109 }
110 
111 /**
112  * @brief Allocate a page for dummy page directory
113  *
114  * On GV100, PDB corresponding to subcontexts that are freed
115  * will point to a dummy page directory instead of setting it to NULL
116  * Here we allocate a page for this page directory
117  */
118 static NV_STATUS
119 _kfifoAllocDummyPage
120 (
121     OBJGPU     *pGpu,
122     KernelFifo *pKernelFifo
123 )
124 {
125     NV_STATUS status   = NV_OK;
126     NvU32     flags    = MEMDESC_FLAGS_NONE;
127     NvBool    bBcState = gpumgrGetBcEnabledStatus(pGpu);
128 
129     if (bBcState)
130     {
131         flags |= MEMDESC_FLAGS_ALLOC_PER_SUBDEVICE;
132     }
133 
134     // Using instance block attributes to allocate dummy page
135     status = memdescCreate(&pKernelFifo->pDummyPageMemDesc, pGpu,
136                            RM_PAGE_SIZE,
137                            0,
138                            NV_FALSE,
139                            ADDR_UNKNOWN,
140                            pKernelFifo->InstAttr,
141                            flags);
142     if (status != NV_OK)
143     {
144         NV_PRINTF(LEVEL_ERROR, "Could not memdescCreate for dummy page\n");
145         DBG_BREAKPOINT();
146         return status;
147     }
148 
149     status = memdescAllocList(pKernelFifo->pDummyPageMemDesc, pKernelFifo->pInstAllocList);
150     if (status !=  NV_OK)
151     {
152         NV_PRINTF(LEVEL_ERROR, "Could not allocate dummy page\n");
153         DBG_BREAKPOINT();
154         memdescDestroy(pKernelFifo->pDummyPageMemDesc);
155         pKernelFifo->pDummyPageMemDesc = NULL;
156     }
157 
158     return status;
159 }
160 
161 /**
162  * @brief Free the page used for dummy page directory
163  */
164 static void
165 _kfifoFreeDummyPage
166 (
167     OBJGPU     *pGpu,
168     KernelFifo *pKernelFifo
169 )
170 {
171     // Free dummy page memdesc
172     memdescFree(pKernelFifo->pDummyPageMemDesc);
173     memdescDestroy(pKernelFifo->pDummyPageMemDesc);
174     pKernelFifo->pDummyPageMemDesc = NULL;
175 }
176 
177 NV_STATUS
178 kfifoStatePostLoad_GM107
179 (
180     OBJGPU     *pGpu,
181     KernelFifo *pKernelFifo,
182     NvU32       flags
183 )
184 {
185     NV_STATUS                      status     = NV_OK;
186     const PREALLOCATED_USERD_INFO *pUserdInfo = kfifoGetPreallocatedUserdInfo(pKernelFifo);
187 
188     if (!(flags & GPU_STATE_FLAGS_PRESERVING))
189     {
190         // Prealloc USERD
191         NV_ASSERT_OK_OR_RETURN(kfifoPreAllocUserD_HAL(pGpu, pKernelFifo));
192 
193         if (gpumgrIsParentGPU(pGpu))
194         {
195             if (kfifoIsZombieSubctxWarEnabled(pKernelFifo))
196             {
197                 NvBool bBcState = gpumgrGetBcEnabledStatus(pGpu);
198                 gpumgrSetBcEnabledStatus(pGpu, NV_TRUE);
199 
200                 status = _kfifoAllocDummyPage(pGpu, pKernelFifo);
201                 if (status != NV_OK)
202                 {
203                     NV_PRINTF(LEVEL_ERROR,
204                               "Failed to allocate dummy page for zombie subcontexts\n");
205                     DBG_BREAKPOINT();
206                     gpumgrSetBcEnabledStatus(pGpu, bBcState);
207                     return status;
208                 }
209 
210                 if (IS_VIRTUAL_WITH_SRIOV(pGpu))
211                 {
212                     NV2080_CTRL_FIFO_SETUP_VF_ZOMBIE_SUBCTX_PDB_PARAMS params;
213                     MEMORY_DESCRIPTOR *pDummyPageMemDesc = kfifoGetDummyPageMemDesc(pKernelFifo);
214 
215                     portMemSet(&params, 0, sizeof(params));
216 
217                     params.base     = memdescGetPhysAddr(pDummyPageMemDesc, AT_GPU, 0);;
218                     params.size     = pDummyPageMemDesc->Size;
219                     params.addressSpace = memdescGetAddressSpace(pDummyPageMemDesc);
220                     params.cacheAttrib  = memdescGetCpuCacheAttrib(pDummyPageMemDesc);
221 
222                     NV_RM_RPC_CONTROL(pGpu,
223                                       pGpu->hDefaultClientShare,
224                                       pGpu->hDefaultClientShareSubDevice,
225                                       NV2080_CTRL_CMD_FIFO_SETUP_VF_ZOMBIE_SUBCTX_PDB,
226                                       &params,
227                                       sizeof(params),
228                                       status);
229                     if (status != NV_OK)
230                     {
231                         NV_PRINTF(LEVEL_ERROR,
232                             "RM control call to setup zombie subctx failed, status 0x%x\n", status);
233                         DBG_BREAKPOINT();
234                         return status;
235                     }
236                 }
237 
238                 gpumgrSetBcEnabledStatus(pGpu, bBcState);
239             }
240         }
241     }
242 
243     // Since we have successfully setup BAR1 USERD rsvd memory
244     // lets inform hw (only if the snoop is not disabled.)
245     kfifoSetupBar1UserdSnoop_HAL(pGpu, pKernelFifo, NV_TRUE, pUserdInfo->userdBar1MapStartOffset);
246 
247     if (IS_GSP_CLIENT(pGpu) || IS_VIRTUAL(pGpu))
248     {
249         status = kfifoTriggerPostSchedulingEnableCallback(pGpu, pKernelFifo);
250         if (status != NV_OK)
251             return status;
252     }
253 
254     return status;
255 }
256 
257 NV_STATUS
258 kfifoStatePreUnload_GM107
259 (
260     OBJGPU     *pGpu,
261     KernelFifo *pKernelFifo,
262     NvU32       flags
263 )
264 {
265     NV_STATUS status = NV_OK;
266     NvU32     sliLoopReentrancy;
267 
268     NV_PRINTF(LEVEL_INFO, "start\n");
269 
270     if (!(flags & GPU_STATE_FLAGS_PRESERVING) && gpumgrIsParentGPU(pGpu))
271     {
272         NvBool bBcState = NV_FALSE;
273 
274         if (kfifoIsZombieSubctxWarEnabled(pKernelFifo))
275         {
276             _kfifoFreeDummyPage(pGpu, pKernelFifo);
277         }
278 
279         // Notify the handlers that the channel will soon be disabled.
280         status = kfifoTriggerPreSchedulingDisableCallback(pGpu, pKernelFifo);
281 
282         // Enable broadcast on SLI
283         bBcState = gpumgrGetBcEnabledStatus(pGpu);
284         gpumgrSetBcEnabledStatus(pGpu, NV_TRUE);
285 
286         // As we have forced here SLI broadcast mode, temporarily reset the reentrancy count
287         sliLoopReentrancy = gpumgrSLILoopReentrancyPop(pGpu);
288 
289         // Ask host to stop snooping
290         kfifoSetupBar1UserdSnoop_HAL(pGpu, pKernelFifo, NV_FALSE, 0);
291 
292         // Restore the reentrancy count
293         gpumgrSLILoopReentrancyPush(pGpu, sliLoopReentrancy);
294 
295         // Restore prior broadcast state
296         gpumgrSetBcEnabledStatus(pGpu, bBcState);
297     }
298 
299     if (!(flags & GPU_STATE_FLAGS_PRESERVING))
300     {
301         // Free preallocated userd
302         kfifoFreePreAllocUserD_HAL(pGpu, pKernelFifo);
303     }
304 
305     return status;
306 }
307 
308 /**
309  * Returns the default timeslice (in us) for a channelgroup as defined by hardware.
310  */
311 NvU64
312 kfifoChannelGroupGetDefaultTimeslice_GM107
313 (
314     KernelFifo *pKernelFifo
315 )
316 {
317     return NV_RAMRL_ENTRY_TIMESLICE_TIMEOUT_128 << NV_RAMRL_ENTRY_TIMESLICE_SCALE_3;
318 }
319 
320 /*! Get size and alignment requirements for instance memory */
321 NV_STATUS
322 kfifoGetInstMemInfo_GM107
323 (
324     KernelFifo  *pKernelFifo,
325     NvU64       *pSize,
326     NvU64       *pAlignment,
327     NvBool      *pbInstProtectedMem,
328     NvU32       *pInstAttr,
329     const NV_ADDRESS_SPACE **ppInstAllocList
330 )
331 {
332     NV_ASSERT_OR_RETURN(pSize != NULL, NV_ERR_INVALID_ARGUMENT);
333     NV_ASSERT_OR_RETURN(pAlignment != NULL, NV_ERR_INVALID_ARGUMENT);
334 
335     *pSize = NV_RAMIN_ALLOC_SIZE;
336     *pAlignment = 1 << NV_RAMIN_BASE_SHIFT;
337 
338     if(pbInstProtectedMem != NULL)
339         *pbInstProtectedMem = pKernelFifo->bInstProtectedMem;
340 
341     if(pInstAttr != NULL)
342         *pInstAttr = pKernelFifo->InstAttr;
343 
344     if(ppInstAllocList != NULL)
345         *ppInstAllocList = pKernelFifo->pInstAllocList;
346 
347     return NV_OK;
348 }
349 
350 /*! Gets instance block size and offset align for instance memory */
351 void
352 kfifoGetInstBlkSizeAlign_GM107
353 (
354     KernelFifo *pKernelFifo,
355     NvU32      *pSize,
356     NvU32      *pShift
357 )
358 {
359     *pSize  = NV_RAMIN_ALLOC_SIZE;
360     *pShift = NV_RAMIN_BASE_SHIFT;
361 
362     return;
363 }
364 
365 /*!
366  * @brief Gets the default runlist id to use for channels allocated with no engines on them.
367  *
368  * @param[in] pGpu
369  * @param[in] pKernelFifo
370  * @param[in] rmEngineType      - Engine type of the channel to retrieve default runlist id for
371  */
372 NvU32
373 kfifoGetDefaultRunlist_GM107
374 (
375     OBJGPU *pGpu,
376     KernelFifo *pKernelFifo,
377     RM_ENGINE_TYPE rmEngineType
378 )
379 {
380     NvU32 runlistId = INVALID_RUNLIST_ID;
381     ENGDESCRIPTOR engDesc = ENG_GR(0);
382 
383     if (RM_ENGINE_TYPE_IS_VALID(rmEngineType))
384     {
385         // if translation fails, defualt is ENG_GR(0)
386         NV_ASSERT_OK(
387             kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
388                 ENGINE_INFO_TYPE_RM_ENGINE_TYPE, (NvU32)rmEngineType,
389                 ENGINE_INFO_TYPE_ENG_DESC,       &engDesc));
390     }
391 
392     // if translation fails, default is INVALID_RUNLIST_ID
393     if (kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
394                                  ENGINE_INFO_TYPE_ENG_DESC,
395                                  engDesc,
396                                  ENGINE_INFO_TYPE_RUNLIST,
397                                  &runlistId) != NV_OK)
398     {
399         runlistId = INVALID_RUNLIST_ID;
400     }
401 
402     return runlistId;
403 }
404 
405 /**
406  * @brief Programs a channel's runlist id to a given value
407  *
408  * Verifies that the requested engine is valid based on the current channel's
409  * state.  Does not bind the channel to the runlist in sw or hw. @ref kfifoRunlistSetId.
410  *
411  * @param pGpu
412  * @param pKernelFifo
413  * @param[in/out] pKernelChannel
414  * @param[in] runlistId runlist ID to use
415  */
416 NV_STATUS
417 kfifoRunlistSetId_GM107
418 (
419     OBJGPU *pGpu,
420     KernelFifo *pKernelFifo,
421     KernelChannel *pKernelChannel,
422     NvU32   runlistId
423 )
424 {
425     NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT);
426 
427     if ((runlistId != kchannelGetRunlistId(pKernelChannel)) &&
428         kchannelIsRunlistSet(pGpu, pKernelChannel))
429     {
430         NV_PRINTF(LEVEL_ERROR,
431                   "Channel has already been assigned a runlist incompatible with this "
432                   "engine (requested: 0x%x current: 0x%x).\n", runlistId,
433                   kchannelGetRunlistId(pKernelChannel));
434         return NV_ERR_INVALID_STATE;
435     }
436 
437     //
438     // For TSG channel, the RL should support TSG.
439     // We relax this requirement if the channel is TSG wrapped by RM.
440     // In that case, RM won't write the TSG header in the RL.
441     //
442     if (!kfifoRunlistIsTsgHeaderSupported_HAL(pGpu, pKernelFifo, runlistId) &&
443         (pKernelChannel->pKernelChannelGroupApi != NULL) &&
444         !pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bAllocatedByRm)
445     {
446         NV_PRINTF(LEVEL_ERROR, "Runlist does not support TSGs\n");
447         return NV_ERR_INVALID_STATE;
448     }
449 
450     // If you want to set runlistId of channel - first set it on TSG
451     if (pKernelChannel->pKernelChannelGroupApi != NULL)
452     {
453         // Change TSG runlist if channel is the only one
454         if (pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->chanCount == 1 ||
455             !pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bRunlistAssigned)
456         {
457             pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->runlistId = runlistId;
458             pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bRunlistAssigned = NV_TRUE;
459         }
460         else
461         {
462             NV_ASSERT_OR_RETURN(pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->runlistId ==
463                                     runlistId,
464                                 NV_ERR_INVALID_STATE);
465         }
466     }
467 
468     kchannelSetRunlistId(pKernelChannel, runlistId);
469     kchannelSetRunlistSet(pGpu, pKernelChannel, NV_TRUE);
470     return NV_OK;
471 }
472 
473 /**
474  * @brief Programs a channel's runlist id given the engine tag
475  *
476  * Verifies that the requested engine is valid based on the current channel's
477  * state.  Does not bind the channel to the runlist in sw or hw. @ref kfifoRunlistSetIdByEngine.
478  *
479  * @param pGpu
480  * @param pKernelFifo
481  * @param[in/out] pKernelChannel
482  * @param[in] engDesc
483  */
484 NV_STATUS
485 kfifoRunlistSetIdByEngine_GM107
486 (
487     OBJGPU *pGpu,
488     KernelFifo *pKernelFifo,
489     KernelChannel *pKernelChannel,
490     NvU32   engDesc
491 )
492 {
493     NvU32 runlistId;
494     NV_STATUS status;
495     NvU32 subctxType = 0;
496 
497     NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT);
498 
499     kfifoGetSubctxType_HAL(pGpu, pKernelFifo, pKernelChannel, &subctxType);
500 
501     if (!kfifoValidateEngineAndRunqueue_HAL(pGpu, pKernelFifo, engDesc, kchannelGetRunqueue(pKernelChannel)))
502         return NV_ERR_INVALID_ARGUMENT;
503 
504     if (!kfifoValidateEngineAndSubctxType_HAL(pGpu, pKernelFifo, engDesc, subctxType))
505         return NV_ERR_INVALID_ARGUMENT;
506 
507     //
508     // SW objects can go on any runlist so we defer committing of runlist ID to
509     // scheduling or another object's allocation.
510     //
511     if ((engDesc == ENG_SW) || (engDesc == ENG_BUS))
512         return NV_OK;
513 
514     NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, ENGINE_INFO_TYPE_ENG_DESC,
515                 engDesc, ENGINE_INFO_TYPE_RUNLIST, &runlistId));
516 
517     status = kfifoRunlistSetId_HAL(pGpu, pKernelFifo, pKernelChannel, runlistId);
518     if (status != NV_OK)
519     {
520         NV_PRINTF(LEVEL_ERROR, "Unable to program runlist for %s\n",
521                   kfifoGetEngineName_HAL(pKernelFifo, ENGINE_INFO_TYPE_ENG_DESC, engDesc));
522     }
523 
524     return status;
525 }
526 
527 NV_STATUS
528 kfifoChannelGetFifoContextMemDesc_GM107
529 (
530     OBJGPU             *pGpu,
531     KernelFifo         *pKernelFifo,
532     KernelChannel      *pKernelChannel,
533     FIFO_CTX            engineState,
534     MEMORY_DESCRIPTOR **ppMemDesc
535 )
536 {
537     FIFO_INSTANCE_BLOCK *pInstanceBlock;
538 
539     /* UVM calls nvGpuOpsGetChannelInstanceMemInfo
540      * which calls current function to fetch FIFO_CTX_INST_BLOCK */
541     /* Currenltly, UVM supported on SRIOV vGPUs only. */
542     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu))
543         return NV_OK;
544 
545     NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE);
546     NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_CHANNEL);
547 
548     pInstanceBlock = pKernelChannel->pFifoHalData[gpumgrGetSubDeviceInstanceFromGpu(pGpu)];
549     if (pInstanceBlock == NULL)
550         return NV_ERR_INVALID_STATE;
551 
552     switch (engineState)
553     {
554         case FIFO_CTX_INST_BLOCK:
555             *ppMemDesc = pInstanceBlock->pInstanceBlockDesc;
556             break;
557 
558         case FIFO_CTX_RAMFC:
559             *ppMemDesc = pInstanceBlock->pRamfcDesc;
560             break;
561 
562         default:
563             NV_PRINTF(LEVEL_ERROR,
564                       "bad engineState 0x%x on engine 0x%x\n",
565                       engineState, ENG_FIFO);
566             DBG_BREAKPOINT();
567             return NV_ERR_INVALID_ARGUMENT;
568     }
569 
570     NV_ASSERT(!memdescHasSubDeviceMemDescs(*ppMemDesc));
571 
572     NV_PRINTF(LEVEL_INFO,
573               "Channel %d engine 0x%x engineState 0x%x *ppMemDesc %p\n",
574               kchannelGetDebugTag(pKernelChannel), ENG_FIFO, engineState, *ppMemDesc);
575 
576     return NV_OK;
577 }
578 
579 /**
580  * @brief lookup the kernelchannel data associated with a given instance address/target
581  *
582  * @param[in] pGpu               OBJGPU pointer
583  * @param[in] pKernelFifo        KernelFifo pointer
584  * @param[in] pInst              INST_BLOCK_DESC pointer
585  * @param[out] ppKernelChannel   KernelChannel ptr
586  */
587 NV_STATUS
588 kfifoConvertInstToKernelChannel_GM107
589 (
590     OBJGPU           *pGpu,
591     KernelFifo       *pKernelFifo,
592     INST_BLOCK_DESC  *pInst,
593     KernelChannel   **ppKernelChannel
594 )
595 {
596     MemoryManager       *pMemoryManager   = GPU_GET_MEMORY_MANAGER(pGpu);
597     KernelChannel       *pKernelChannel   = NULL;
598     FIFO_INSTANCE_BLOCK *pInstanceBlock;
599     MEMORY_DESCRIPTOR    instMemDesc;
600     NV_ADDRESS_SPACE     instAperture;
601     CHANNEL_ITERATOR     chanIt;
602 
603     NV_ASSERT_OR_RETURN(pInst != NULL, NV_ERR_INVALID_ARGUMENT);
604     NV_ASSERT_OR_RETURN(ppKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT);
605     NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE);
606 
607     *ppKernelChannel = NULL;
608 
609     switch (pInst->aperture)
610     {
611         case INST_BLOCK_APERTURE_SYSTEM_COHERENT_MEMORY:
612         case INST_BLOCK_APERTURE_SYSTEM_NON_COHERENT_MEMORY:
613             instAperture = ADDR_SYSMEM;
614             break;
615         case INST_BLOCK_APERTURE_VIDEO_MEMORY:
616             instAperture = ADDR_FBMEM;
617             break;
618         default:
619             NV_PRINTF(LEVEL_ERROR, "unknown inst target 0x%x\n", pInst->aperture);
620             DBG_BREAKPOINT();
621             return NV_ERR_INVALID_ADDRESS;
622     }
623 
624     //
625     // The MMU_PTE version of aperture is what the HW should always
626     // report for an instance block. Compare the SW defines against
627     // these values here.
628     //
629     VERIFY_INST_BLOCK_APERTURE(NV_MMU_PTE_APERTURE_VIDEO_MEMORY,
630                                NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY,
631                                NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY);
632 
633     memdescCreateExisting(&instMemDesc, pGpu, NV_RAMIN_ALLOC_SIZE,
634                           instAperture, NV_MEMORY_UNCACHED,
635                           MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE);
636 
637     memdescDescribe(&instMemDesc, instAperture, pInst->address, NV_RAMIN_ALLOC_SIZE);
638 
639     kfifoGetChannelIterator(pGpu, pKernelFifo, &chanIt);
640     while (kfifoGetNextKernelChannel(pGpu, pKernelFifo, &chanIt, &pKernelChannel) == NV_OK)
641     {
642         NV_ASSERT_OR_ELSE(pKernelChannel != NULL, continue);
643 
644         pInstanceBlock = pKernelChannel->pFifoHalData[gpumgrGetSubDeviceInstanceFromGpu(pGpu)];
645 
646         if (pInstanceBlock != NULL &&
647             pInstanceBlock->pInstanceBlockDesc != NULL &&
648             kchannelGetGfid(pKernelChannel) == pInst->gfid &&
649             memmgrComparePhysicalAddresses_HAL(pGpu, pMemoryManager,
650                 kgmmuGetHwPteApertureFromMemdesc(GPU_GET_KERNEL_GMMU(pGpu),
651                     pInstanceBlock->pInstanceBlockDesc),
652                 memdescGetPhysAddr(pInstanceBlock->pInstanceBlockDesc,
653                                    AT_GPU, 0),
654                 kgmmuGetHwPteApertureFromMemdesc(GPU_GET_KERNEL_GMMU(pGpu),
655                                                 &instMemDesc),
656                 memdescGetPhysAddr(&instMemDesc, AT_GPU, 0)))
657         {
658                 *ppKernelChannel = pKernelChannel;
659                 memdescDestroy(&instMemDesc);
660                 return NV_OK;
661         }
662     }
663 
664     NV_PRINTF(LEVEL_INFO,
665               "No channel found for instance 0x%016llx (target 0x%x)\n",
666               memdescGetPhysAddr(&instMemDesc, AT_GPU, 0),
667               kgmmuGetHwPteApertureFromMemdesc(GPU_GET_KERNEL_GMMU(pGpu), &instMemDesc));
668     memdescDestroy(&instMemDesc);
669 
670     return NV_ERR_INVALID_CHANNEL;
671 }
672 
673 static inline NvBool
674 _isEngineInfoTypeValidForOnlyHostDriven(ENGINE_INFO_TYPE type)
675 {
676     switch (type)
677     {
678         case ENGINE_INFO_TYPE_RUNLIST:
679         case ENGINE_INFO_TYPE_RUNLIST_PRI_BASE:
680         case ENGINE_INFO_TYPE_RUNLIST_ENGINE_ID:
681         case ENGINE_INFO_TYPE_PBDMA_ID:
682         case ENGINE_INFO_TYPE_CHRAM_PRI_BASE:
683         case ENGINE_INFO_TYPE_FIFO_TAG:
684             return NV_TRUE;
685         case ENGINE_INFO_TYPE_ENG_DESC:
686         case ENGINE_INFO_TYPE_RM_ENGINE_TYPE:
687         case ENGINE_INFO_TYPE_MMU_FAULT_ID:
688         case ENGINE_INFO_TYPE_RC_MASK:
689         case ENGINE_INFO_TYPE_RESET:
690         case ENGINE_INFO_TYPE_INTR:
691         case ENGINE_INFO_TYPE_MC:
692         case ENGINE_INFO_TYPE_DEV_TYPE_ENUM:
693         case ENGINE_INFO_TYPE_INSTANCE_ID:
694         case ENGINE_INFO_TYPE_IS_HOST_DRIVEN_ENGINE:
695             // The bool itself is valid for non-host-driven engines too.
696         case ENGINE_INFO_TYPE_INVALID:
697             return NV_FALSE;
698         default:
699             // Ensure that this function covers every value in ENGINE_INFO_TYPE
700             NV_ASSERT(0 && "check all ENGINE_INFO_TYPE are classified as host-driven or not");
701             return NV_FALSE;
702     }
703 }
704 
705 
706 NV_STATUS
707 kfifoEngineInfoXlate_GM107
708 (
709     OBJGPU           *pGpu,
710     KernelFifo       *pKernelFifo,
711     ENGINE_INFO_TYPE  inType,
712     NvU32             inVal,
713     ENGINE_INFO_TYPE  outType,
714     NvU32            *pOutVal
715 )
716 {
717     const ENGINE_INFO *pEngineInfo       = kfifoGetEngineInfo(pKernelFifo);
718     FIFO_ENGINE_LIST  *pFoundInputEngine = NULL;
719 
720     NV_ASSERT_OR_RETURN(pOutVal != NULL, NV_ERR_INVALID_ARGUMENT);
721 
722     // PBDMA_ID can only be inType
723     NV_ASSERT_OR_RETURN(outType != ENGINE_INFO_TYPE_PBDMA_ID,
724                         NV_ERR_INVALID_ARGUMENT);
725 
726     if (pEngineInfo == NULL)
727     {
728         NV_ASSERT_OK_OR_RETURN(kfifoConstructEngineList_HAL(pGpu, pKernelFifo));
729         pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
730     }
731     NV_ASSERT_OR_RETURN(pEngineInfo != NULL, NV_ERR_INVALID_STATE);
732 
733     if (inType == ENGINE_INFO_TYPE_INVALID)
734     {
735         NV_ASSERT_OR_RETURN(inVal < pEngineInfo->engineInfoListSize,
736                             NV_ERR_INVALID_ARGUMENT);
737         pFoundInputEngine = &pEngineInfo->engineInfoList[inVal];
738     }
739     else
740     {
741         NvU32 i;
742         for (i = 0;
743              (i < pEngineInfo->engineInfoListSize) &&
744              (pFoundInputEngine == NULL);
745              ++i)
746         {
747             FIFO_ENGINE_LIST *pThisEngine = &pEngineInfo->engineInfoList[i];
748 
749             if (inType == ENGINE_INFO_TYPE_PBDMA_ID)
750             {
751                 NvU32 j;
752                 for (j = 0; j < pThisEngine->numPbdmas; ++j)
753                 {
754                     if (pThisEngine->pbdmaIds[j] == inVal)
755                     {
756                         pFoundInputEngine = pThisEngine;
757                         break;
758                     }
759                 }
760             }
761             else if (pThisEngine->engineData[inType] == inVal)
762             {
763                 pFoundInputEngine = pThisEngine;
764             }
765         }
766     }
767 
768     if (pFoundInputEngine == NULL)
769     {
770         return NV_ERR_OBJECT_NOT_FOUND;
771     }
772 
773     if (_isEngineInfoTypeValidForOnlyHostDriven(outType) &&
774         !pFoundInputEngine->engineData[ENGINE_INFO_TYPE_IS_HOST_DRIVEN_ENGINE])
775     {
776         //
777         // Bug 3748452 TODO
778         // Bug 3772199 TODO
779         //
780         // We can't easily just return an error here because hundreds of
781         // callsites would fail their asserts. The above two bugs track fixing
782         // all callsites after which, we can uncomment this.
783         //
784         // return NV_ERR_OBJECT_NOT_FOUND;
785         //
786         NV_PRINTF(LEVEL_ERROR,
787             "Asked for host-specific type(0x%x) for non-host engine type(0x%x),val(0x%08x)\n",
788             outType, inType, inVal);
789     }
790 
791     *pOutVal = pFoundInputEngine->engineData[outType];
792     return NV_OK;
793 }
794 
795 /**
796  * @brief Get the local maximum number of subctx allowed in this TSG
797  */
798 NvU32
799 kfifoChannelGroupGetLocalMaxSubcontext_GM107
800 (
801     OBJGPU             *pGpu,
802     KernelFifo         *pKernelFifo,
803     KernelChannelGroup *pKernelChannelGroup,
804     NvBool              bLegacyMode
805 )
806 {
807     // Pre-AMPERE, each channel group has the global maximum available
808     return kfifoGetMaxSubcontext_HAL(pGpu, pKernelFifo, bLegacyMode);
809 }
810 
811 void
812 kfifoSetupUserD_GM107
813 (
814     OBJGPU *pGpu,
815     KernelFifo *pKernelFifo,
816     MEMORY_DESCRIPTOR *pMemDesc
817 )
818 {
819     TRANSFER_SURFACE tSurf = {.pMemDesc = pMemDesc, .offset = 0};
820 
821     NV_ASSERT_OK(memmgrMemSet(GPU_GET_MEMORY_MANAGER(pGpu), &tSurf, 0,
822         NV_RAMUSERD_CHAN_SIZE, TRANSFER_FLAGS_NONE));
823 }
824 /**
825  * @brief return number of HW engines
826  *
827  *  Can be used to loop over all engines in the system by looping from 0
828  *  through the value returned by this function and then using
829  *  kfifoEngineInfoXlate() with an input type of ENGINE_INFO_TYPE_INVALID.
830  *
831  * @param[in] pGpu          OBJGPU pointer
832  * @param[in] pKernelFifo   KernelFifo pointer
833  *
834  * @returns number of HW engines present on chip.
835  */
836 NvU32
837 kfifoGetNumEngines_GM107
838 (
839     OBJGPU     *pGpu,
840     KernelFifo *pKernelFifo
841 )
842 {
843     const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
844 
845     if (pEngineInfo == NULL)
846     {
847         NV_ASSERT_OR_RETURN(kfifoConstructEngineList_HAL(pGpu, pKernelFifo) == NV_OK, 0);
848 
849         pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
850         NV_ASSERT_OR_RETURN(pEngineInfo != NULL, 0);
851     }
852 
853     NV_ASSERT(pEngineInfo->engineInfoListSize);
854 
855     // we don't count the SW engine entry at the end of the list
856     return pEngineInfo->engineInfoListSize-1;
857 }
858 
859 /**
860  * @brief Retrieves the name of the engine corresponding to the given @ref ENGINE_INFO_TYPE
861  *
862  * @param pKernelFifo
863  * @param[in] inType
864  * @param[in] inVal
865  *
866  * @returns a string
867  */
868 const char *
869 kfifoGetEngineName_GM107
870 (
871     KernelFifo *pKernelFifo,
872     ENGINE_INFO_TYPE inType,
873     NvU32 inVal
874 )
875 {
876     const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
877     NvU32 i;
878 
879     if (inType == ENGINE_INFO_TYPE_INVALID)
880     {
881         NV_ASSERT_OR_RETURN (inVal < pEngineInfo->engineInfoListSize, NULL);
882         return pEngineInfo->engineInfoList[inVal].engineName;
883     }
884     for (i = 0; i < pEngineInfo->engineInfoListSize; ++i)
885     {
886         if (pEngineInfo->engineInfoList[i].engineData[inType] == inVal)
887         {
888             return pEngineInfo->engineInfoList[i].engineName;
889         }
890     }
891 
892     return "UNKNOWN";
893 }
894 
895 /**
896  * @brief Returns the maximum possible number of runlists.
897  *
898  * Returns a number which represents the limit of any runlistId indexed
899  * registers in hardware.  Does not necessarily return how many runlists are
900  * active.  In the range of 0..kfifoGetMaxNumRunlists() there may be runlists
901  * that are not used.
902  *
903  * @param pGpu
904  * @param pKernelFifo
905  */
906 NvU32
907 kfifoGetMaxNumRunlists_GM107
908 (
909     OBJGPU     *pGpu,
910     KernelFifo *pKernelFifo
911 )
912 {
913     const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
914 
915     return pEngineInfo->maxNumRunlists;
916 }
917 
918 NV_STATUS
919 kfifoGetEnginePbdmaIds_GM107
920 (
921     OBJGPU *pGpu,
922     KernelFifo *pKernelFifo,
923     ENGINE_INFO_TYPE type,
924     NvU32 val,
925     NvU32 **ppPbdmaIds,
926     NvU32 *pNumPbdmas
927 )
928 {
929     const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
930     NvU32 i;
931 
932     if (pEngineInfo == NULL)
933     {
934         NV_ASSERT_OK_OR_RETURN(kfifoConstructEngineList_HAL(pGpu, pKernelFifo));
935 
936         pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
937         NV_ASSERT_OR_RETURN(pEngineInfo != NULL, NV_ERR_INVALID_STATE);
938     }
939 
940     if (type == ENGINE_INFO_TYPE_INVALID)
941     {
942         NV_ASSERT_OR_RETURN(val < pEngineInfo->engineInfoListSize, NV_ERR_INVALID_ARGUMENT);
943         *ppPbdmaIds = pEngineInfo->engineInfoList[val].pbdmaIds;
944         *pNumPbdmas = pEngineInfo->engineInfoList[val].numPbdmas;
945         return NV_OK;
946     }
947 
948     for (i = 0; i < pEngineInfo->engineInfoListSize; i++)
949     {
950         if (pEngineInfo->engineInfoList[i].engineData[type] == val)
951         {
952             *ppPbdmaIds = pEngineInfo->engineInfoList[i].pbdmaIds;
953             *pNumPbdmas = pEngineInfo->engineInfoList[i].numPbdmas;
954             return NV_OK;
955         }
956     }
957 
958     return NV_ERR_INVALID_ARGUMENT;
959 }
960 
961 /**
962  * @brief finds all engines on the same pbdma as the input
963  *
964  * pPartnerListParams->partnershipClassId is currently ignored.
965  *
966  * @param pGpu
967  * @param pKernelFifo
968  * @param[in/out] pPartnerListParams engineType is input, partnerList/numPartners are ouput
969  *
970  * @returns NV_OK if successful, error otherwise
971  */
972 NV_STATUS
973 kfifoGetEnginePartnerList_GM107
974 (
975     OBJGPU *pGpu,
976     KernelFifo *pKernelFifo,
977     NV2080_CTRL_GPU_GET_ENGINE_PARTNERLIST_PARAMS *pPartnerListParams
978 )
979 {
980     const NvU32 numEngines = kfifoGetNumEngines_HAL(pGpu, pKernelFifo);
981     NvU32 i;
982     NvU32 srcRunlist;
983     NvU32 runlist;
984     NvU32 *pSrcPbdmaIds;
985     NvU32 numSrcPbdmaIds;
986     NvU32 srcPbdmaId;
987     NvU32 *pPbdmaIds;
988     NvU32 numPbdmaIds;
989     NvU32 numClasses = 0;
990     ENGDESCRIPTOR engDesc;
991     RM_ENGINE_TYPE rmEngineType = gpuGetRmEngineType(pPartnerListParams->engineType);
992 
993     if (pPartnerListParams->runqueue >= kfifoGetNumRunqueues_HAL(pGpu, pKernelFifo))
994         return NV_ERR_INVALID_ARGUMENT;
995 
996     NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
997                                                     ENGINE_INFO_TYPE_RM_ENGINE_TYPE,
998                                                     (NvU32)rmEngineType,
999                                                     ENGINE_INFO_TYPE_RUNLIST,
1000                                                     &srcRunlist));
1001 
1002     NV_ASSERT_OK_OR_RETURN(kfifoGetEnginePbdmaIds_HAL(pGpu, pKernelFifo,
1003                                                       ENGINE_INFO_TYPE_RM_ENGINE_TYPE,
1004                                                       (NvU32)rmEngineType,
1005                                                       &pSrcPbdmaIds,
1006                                                       &numSrcPbdmaIds));
1007 
1008     pPartnerListParams->numPartners = 0;
1009 
1010     // Get the PBDMA ID for the runqueue-th runqueue
1011     if (pPartnerListParams->runqueue >= numSrcPbdmaIds)
1012     {
1013         return NV_ERR_INVALID_ARGUMENT;
1014     }
1015     srcPbdmaId = pSrcPbdmaIds[pPartnerListParams->runqueue];
1016 
1017     //
1018     // Find all engines sharing a runlist with the input engine, add each to
1019     // the output array.
1020     //
1021     for (i = 0; i < numEngines; i++)
1022     {
1023         NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
1024                                                         ENGINE_INFO_TYPE_INVALID, i,
1025                                                         ENGINE_INFO_TYPE_ENG_DESC, &engDesc));
1026 
1027         NV_ASSERT_OK_OR_RETURN(gpuGetClassList(pGpu, &numClasses, NULL, engDesc));
1028         if (numClasses == 0)
1029         {
1030             NV_PRINTF(LEVEL_INFO,
1031                       "EngineID %x is not part classDB, skipping\n",
1032                       engDesc);
1033             continue;
1034         }
1035 
1036         NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
1037                                                         ENGINE_INFO_TYPE_INVALID, i,
1038                                                         ENGINE_INFO_TYPE_RUNLIST, &runlist));
1039 
1040         if (runlist == srcRunlist)
1041         {
1042             NvU32 j;
1043             RM_ENGINE_TYPE localRmEngineType;
1044 
1045             NV_ASSERT_OK_OR_RETURN(kfifoGetEnginePbdmaIds_HAL(pGpu, pKernelFifo,
1046                                                               ENGINE_INFO_TYPE_INVALID, i,
1047                                                               &pPbdmaIds, &numPbdmaIds));
1048 
1049             for (j = 0; j < numPbdmaIds; j++)
1050             {
1051                 if (pPbdmaIds[j] == srcPbdmaId)
1052                 {
1053                     NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
1054                                                                     ENGINE_INFO_TYPE_INVALID, i,
1055                                                                     ENGINE_INFO_TYPE_RM_ENGINE_TYPE, (NvU32 *)&localRmEngineType));
1056 
1057                     // Don't include input in output list
1058                     if (localRmEngineType != rmEngineType)
1059                     {
1060                         pPartnerListParams->partnerList[pPartnerListParams->numPartners++] =
1061                             gpuGetNv2080EngineType(localRmEngineType);
1062 
1063                         if (pPartnerListParams->numPartners >= NV2080_CTRL_GPU_MAX_ENGINE_PARTNERS)
1064                             return NV_ERR_INVALID_ARGUMENT;
1065                     }
1066                 }
1067             }
1068         }
1069     }
1070 
1071     return NV_OK;
1072 }
1073 
1074 /**
1075  * @brief Check if the runlist has TSG support
1076  *
1077  * Currently, we only enable the TSG runlist for GR
1078  *
1079  *  @return NV_TRUE if TSG is supported, NV_FALSE if not
1080  */
1081 NvBool
1082 kfifoRunlistIsTsgHeaderSupported_GM107
1083 (
1084     OBJGPU *pGpu,
1085     KernelFifo *pKernelFifo,
1086     NvU32 runlistId
1087 )
1088 {
1089     NvU32 tmp_runlist;
1090 
1091     if (kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, ENGINE_INFO_TYPE_ENG_DESC,
1092         ENG_GR(0), ENGINE_INFO_TYPE_RUNLIST, &tmp_runlist) != NV_OK)
1093     {
1094         NV_PRINTF(LEVEL_ERROR,
1095                   "can't find runlist ID for engine ENG_GR(0)!\n");
1096         NV_ASSERT(0);
1097         return NV_FALSE;
1098     }
1099 
1100     return tmp_runlist == runlistId;
1101 }
1102 
1103 /**
1104  * @brief Get the runlist entry size
1105  *
1106  * @param pKernelFifo
1107  *
1108  * @return size in bytes
1109  */
1110 NvU32
1111 kfifoRunlistGetEntrySize_GM107
1112 (
1113     KernelFifo *pKernelFifo
1114 )
1115 {
1116     return NV_RAMRL_ENTRY_SIZE;
1117 }
1118 
1119 /**
1120  * @brief Get the runlist base shift amount
1121  *
1122  * @param pKernelFifo
1123  *
1124  * @return shift amount
1125  */
1126 NvU32
1127 kfifoRunlistGetBaseShift_GM107
1128 (
1129     KernelFifo *pKernelFifo
1130 )
1131 {
1132     return NV_RAMRL_BASE_SHIFT;
1133 }
1134 
1135 /**
1136  * @brief Pre-allocate BAR1 userd space
1137  *
1138  * @param   pGpu
1139  * @param   pKernelFifo
1140  *
1141  * @returns NV_STATUS
1142  */
1143 NV_STATUS
1144 kfifoPreAllocUserD_GM107
1145 (
1146     OBJGPU     *pGpu,
1147     KernelFifo *pKernelFifo
1148 )
1149 {
1150     OBJGPU     *pParentGpu             = gpumgrGetParentGPU(pGpu);
1151     KernelFifo *pParentKernelFifo      = GPU_GET_KERNEL_FIFO(pParentGpu);
1152     KernelBus  *pKernelBus             = GPU_GET_KERNEL_BUS(pGpu);
1153     NvBool      bCoherentCpuMapping    = NV_FALSE;
1154     NV_STATUS   status                 = NV_OK;
1155     NvU64       temp                   = 0;
1156     NvU32       userdSize;
1157     NvU32       userdShift;
1158     NvU32       numChannels;
1159     NvBool      bFifoFirstInit;
1160     NvU32       flags                  = MEMDESC_FLAGS_NONE;
1161     NvU32       mapFlags               = BUS_MAP_FB_FLAGS_MAP_DOWNWARDS |
1162                                          BUS_MAP_FB_FLAGS_MAP_UNICAST;
1163     NvU32       currentGpuInst         = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
1164     CHID_MGR   *pChidMgr               = kfifoGetChidMgr(pGpu, pKernelFifo, 0);
1165 
1166     MemoryManager     *pMemoryManager    = GPU_GET_MEMORY_MANAGER(pGpu);
1167     KernelMIGManager  *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
1168     PREALLOCATED_USERD_INFO *pUserdInfo  = &pParentKernelFifo->userdInfo;
1169 
1170     NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE);
1171 
1172     // We don't support RM allocated USERD for vGPU guest with SRIOV
1173     if (IS_VIRTUAL_WITH_SRIOV(pGpu))
1174     {
1175         return NV_OK;
1176     }
1177 
1178     bCoherentCpuMapping = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING);
1179 
1180     if (pUserdInfo->userdBar1CpuPtr == NULL)
1181     {
1182         bFifoFirstInit = NV_TRUE;
1183     }
1184     else
1185     {
1186         mapFlags |= BUS_MAP_FB_FLAGS_MAP_OFFSET_FIXED;
1187         bFifoFirstInit = NV_FALSE;
1188     }
1189 
1190     //
1191     // Allocate the physical memory associated with the UserD if this is
1192     // the first GPU to init fifo. This relies on the assumption that
1193     // UserD is shared physmem.
1194     //
1195     if (bFifoFirstInit)
1196     {
1197         pUserdInfo->userdBar1MapStartOffset   =  0;
1198         pUserdInfo->userdBar1MapSize          =  0;
1199 
1200         // This is a WAR for HW bug 600241
1201         if (pUserdInfo->userdAperture == ADDR_SYSMEM)
1202         {
1203             pKernelFifo->bUserdInSystemMemory = NV_TRUE;
1204         }
1205     }
1206 
1207     kfifoGetUserdSizeAlign_HAL(pKernelFifo, &userdSize, &userdShift);
1208 
1209     numChannels = kfifoChidMgrGetNumChannels(pGpu, pKernelFifo, pChidMgr);
1210 
1211     // Alloc USERD of size numChannels * sizeof( USERD ) for each gpu
1212     status = memdescCreate(&pUserdInfo->userdPhysDesc[currentGpuInst], pGpu,
1213                            userdSize * numChannels,
1214                            1ULL << userdShift,
1215                            NV_TRUE,
1216                            pUserdInfo->userdAperture,
1217                            pUserdInfo->userdAttr,
1218                            flags);
1219     if (status != NV_OK)
1220     {
1221         NV_PRINTF(LEVEL_ERROR,
1222                   "Could not memdescCreate for USERD for %x #channels\n",
1223                   numChannels);
1224         DBG_BREAKPOINT();
1225         goto fail;
1226     }
1227     temp = pUserdInfo->userdPhysDesc[currentGpuInst]->Size;
1228 
1229     //
1230     // For vGPU, do not allocate USERD memory in guest.
1231     // vGPU does all HW management in host, so host RM will
1232     // allocate the real USERD memory.
1233     //
1234     if (IS_VIRTUAL(pGpu))
1235     {
1236         // Force page size to 4KB to match host phys access
1237         memmgrSetMemDescPageSize_HAL(pGpu, pMemoryManager,
1238                                      pUserdInfo->userdPhysDesc[currentGpuInst],
1239                                      AT_GPU, RM_ATTR_PAGE_SIZE_4KB);
1240         if (bFifoFirstInit)
1241         {
1242             pUserdInfo->userdBar1MapStartOffset = kfifoGetUserdBar1MapStartOffset_HAL(pGpu, pKernelFifo);
1243         }
1244     }
1245     else
1246     {
1247         status = memdescAlloc(pUserdInfo->userdPhysDesc[currentGpuInst]);
1248         if (status != NV_OK)
1249         {
1250             NV_PRINTF(LEVEL_ERROR,
1251                       "Could not allocate USERD for %x #channels\n",
1252                       numChannels);
1253             DBG_BREAKPOINT();
1254             goto fail;
1255         }
1256 
1257         // Force page size to 4KB in broadcast to match host phys access
1258         memmgrSetMemDescPageSize_HAL(pGpu, pMemoryManager, pUserdInfo->userdPhysDesc[currentGpuInst],
1259                                      AT_GPU, RM_ATTR_PAGE_SIZE_4KB);
1260 
1261         //
1262         // If coherent link is available, just get a coherent mapping to USERD and
1263         // lie about the BAR1 offset, since we are not using BAR1
1264         // TODO: Make these bar1 offsets unicast on each gpu as well
1265         //
1266         if (bCoherentCpuMapping &&
1267             (memdescGetAddressSpace(pUserdInfo->userdPhysDesc[currentGpuInst]) == ADDR_FBMEM))
1268         {
1269 
1270             NV_PRINTF(LEVEL_INFO, "Mapping USERD with coherent link.\n");
1271             NV_ASSERT(pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED));
1272             NV_ASSERT(pUserdInfo->userdPhysDesc[currentGpuInst]->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS);
1273 
1274             if (bFifoFirstInit)
1275             {
1276                 pUserdInfo->userdBar1MapStartOffset =  pUserdInfo->userdPhysDesc[currentGpuInst]->_pteArray[0] +
1277                                                        pUserdInfo->userdPhysDesc[currentGpuInst]->PteAdjust;
1278             }
1279         }
1280         else
1281         {
1282             // vGpu may boot with partitioning enabled but that's not true for host RM
1283             if ((pKernelMIGManager != NULL) && kmigmgrIsMIGMemPartitioningEnabled(pGpu, pKernelMIGManager))
1284             {
1285                 status = NV_ERR_INVALID_STATE;
1286                 NV_PRINTF(LEVEL_ERROR, "Pre-allocated USERD is not supported with MIG\n");
1287                 DBG_BREAKPOINT();
1288                 goto fail;
1289             }
1290             // Now BAR1 map it
1291             status = kbusMapFbAperture_HAL(pGpu, pKernelBus, pUserdInfo->userdPhysDesc[currentGpuInst], 0,
1292                                            &pUserdInfo->userdBar1MapStartOffset,
1293                                            &temp, mapFlags | BUS_MAP_FB_FLAGS_PRE_INIT, NV01_NULL_OBJECT);
1294         }
1295 
1296         if (status != NV_OK)
1297         {
1298             NV_PRINTF(LEVEL_ERROR, "Could not map USERD to BAR1\n");
1299             DBG_BREAKPOINT();
1300             goto fail;
1301         }
1302 
1303         // Add current GPU to list of GPUs referencing pFifo userD bar1
1304         pUserdInfo->userdBar1RefMask |= NVBIT(pGpu->gpuInstance);
1305     }
1306 
1307     if (bFifoFirstInit)
1308     {
1309         pUserdInfo->userdBar1MapSize = NvU64_LO32(temp);
1310 
1311         if (bCoherentCpuMapping &&
1312             (memdescGetAddressSpace(pUserdInfo->userdPhysDesc[currentGpuInst]) == ADDR_FBMEM))
1313         {
1314             pUserdInfo->userdBar1CpuPtr = kbusMapCoherentCpuMapping_HAL(pGpu, pKernelBus,
1315                                              pUserdInfo->userdPhysDesc[currentGpuInst]);
1316             status = pUserdInfo->userdBar1CpuPtr == NULL ? NV_ERR_GENERIC : NV_OK;
1317         }
1318         else
1319         {
1320             // Cpu map the BAR1 snoop range
1321             status = osMapPciMemoryKernelOld(pGpu, gpumgrGetGpuPhysFbAddr(pGpu) +
1322                                              pUserdInfo->userdBar1MapStartOffset,
1323                                              pUserdInfo->userdBar1MapSize,
1324                                              NV_PROTECT_READ_WRITE,
1325                                              (void**)&pUserdInfo->userdBar1CpuPtr,
1326                                              NV_MEMORY_UNCACHED);
1327         }
1328 
1329         if ((pUserdInfo->userdBar1CpuPtr == NULL) && (status != NV_OK))
1330         {
1331             NV_PRINTF(LEVEL_ERROR, "Could not cpu map BAR1 snoop range\n");
1332             DBG_BREAKPOINT();
1333             goto fail;
1334         }
1335     }
1336 
1337     NV_PRINTF(LEVEL_INFO,
1338               "USERD Preallocated phys @ 0x%llx bar1 offset @ 0x%llx of size 0x%x\n",
1339               memdescGetPhysAddr(pUserdInfo->userdPhysDesc[currentGpuInst], AT_GPU, 0),
1340               pUserdInfo->userdBar1MapStartOffset,
1341               pUserdInfo->userdBar1MapSize);
1342 
1343     return status;
1344 
1345 fail:
1346     kfifoFreePreAllocUserD_HAL(pGpu, pKernelFifo);
1347 
1348     return status;
1349 }
1350 
1351 /**
1352  * @brief Free the pre-allocated BAR1 userd space
1353  *
1354  * @param   pGpu
1355  * @param   pKernelFifo
1356  *
1357  * @returns NV_STATUS
1358  */
1359 void
1360 kfifoFreePreAllocUserD_GM107
1361 (
1362     OBJGPU     *pGpu,
1363     KernelFifo *pKernelFifo
1364 )
1365 {
1366     OBJGPU            *pParentGpu           = gpumgrGetParentGPU(pGpu);
1367     KernelBus         *pKernelBus           = GPU_GET_KERNEL_BUS(pGpu);
1368     NvU32              currentGpuInst       = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
1369     KernelFifo        *pParentKernelFifo    = GPU_GET_KERNEL_FIFO(pParentGpu);
1370     PREALLOCATED_USERD_INFO *pUserdInfo     = &pParentKernelFifo->userdInfo;
1371     NvBool             bCoherentCpuMapping  = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING) &&
1372         (memdescGetAddressSpace(pUserdInfo->userdPhysDesc[currentGpuInst]) == ADDR_FBMEM);
1373 
1374     // We don't support RM allocated USERD for vGPU guest with SRIOV
1375     if (IS_VIRTUAL_WITH_SRIOV(pGpu))
1376     {
1377         return;
1378     }
1379 
1380     if (gpumgrGetBcEnabledStatus(pGpu))
1381     {
1382         DBG_BREAKPOINT();
1383     }
1384 
1385     if (bCoherentCpuMapping)
1386     {
1387         NV_PRINTF(LEVEL_INFO, "Unmapping USERD from NVLINK.\n");
1388         NV_ASSERT(pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED));
1389     }
1390 
1391     if (pUserdInfo->userdBar1CpuPtr)
1392     {
1393         if (bCoherentCpuMapping)
1394         {
1395             kbusUnmapCoherentCpuMapping_HAL(pGpu, pKernelBus,
1396                 pUserdInfo->userdPhysDesc[currentGpuInst]);
1397         }
1398         else
1399         {
1400             osUnmapPciMemoryKernelOld(pGpu, pUserdInfo->userdBar1CpuPtr);
1401         }
1402 
1403         pUserdInfo->userdBar1CpuPtr = NULL;
1404     }
1405 
1406     if (pUserdInfo->userdBar1MapSize)
1407     {
1408         if ((!IS_VIRTUAL(pGpu)) && (!bCoherentCpuMapping))
1409         {
1410             if ((pUserdInfo->userdBar1RefMask & NVBIT(pGpu->gpuInstance)) != 0)
1411             {
1412                 //
1413                 // Unmap in UC for each GPU with a pKernelFifo userd
1414                 // reference mapped through bar1
1415                 //
1416                 kbusUnmapFbAperture_HAL(pGpu, pKernelBus,
1417                                         pUserdInfo->userdPhysDesc[currentGpuInst],
1418                                         pUserdInfo->userdBar1MapStartOffset,
1419                                         pUserdInfo->userdBar1MapSize,
1420                                         BUS_MAP_FB_FLAGS_MAP_UNICAST | BUS_MAP_FB_FLAGS_PRE_INIT);
1421                 pUserdInfo->userdBar1RefMask &= (~NVBIT(pGpu->gpuInstance));
1422             }
1423 
1424         }
1425     }
1426 
1427     // Unallocated memdescFrees are allowed.
1428     memdescFree(pUserdInfo->userdPhysDesc[currentGpuInst]);
1429     memdescDestroy(pUserdInfo->userdPhysDesc[currentGpuInst]);
1430     pUserdInfo->userdPhysDesc[currentGpuInst] = NULL;
1431     NV_PRINTF(LEVEL_INFO, "Freeing preallocated USERD phys and bar1 range\n");
1432 }
1433 
1434 //
1435 // Returns the BAR1 offset and size of the entire USERD mapping.
1436 //
1437 NV_STATUS
1438 kfifoGetUserdBar1MapInfo_GM107
1439 (
1440     OBJGPU     *pGpu,
1441     KernelFifo *pKernelFifo,
1442     NvU64      *pBar1MapOffset,
1443     NvU32      *pBar1MapSize
1444 )
1445 {
1446     const PREALLOCATED_USERD_INFO *pUserdInfo = kfifoGetPreallocatedUserdInfo(pKernelFifo);
1447 
1448     // We don't support RM allocated USERD in vGPU guest with SRIOV
1449     if (IS_VIRTUAL_WITH_SRIOV(pGpu))
1450     {
1451         *pBar1MapOffset = 0;
1452         *pBar1MapSize   = 0;
1453 
1454         return NV_OK;
1455     }
1456 
1457     if (pUserdInfo->userdBar1MapSize == 0 )
1458     {
1459         NV_PRINTF(LEVEL_ERROR, "BAR1 map of USERD has not been setup yet\n");
1460         NV_ASSERT( 0 );
1461         return NV_ERR_GENERIC;
1462     }
1463 
1464     *pBar1MapOffset = pUserdInfo->userdBar1MapStartOffset;
1465     *pBar1MapSize   = pUserdInfo->userdBar1MapSize;
1466 
1467     return NV_OK;
1468 }
1469 
1470 /**
1471  * @brief Determines the aperture and attribute of memory where userd is located.
1472  *
1473  * @param pKernelFifo[in]
1474  * @param pUserdAperture[out]
1475  * @param pUserdAttribute[out]
1476  *
1477  * @returns NV_STATUS
1478  */
1479 NV_STATUS
1480 kfifoGetUserdLocation_GM107
1481 (
1482     KernelFifo *pKernelFifo,
1483     NvU32 *pUserdAperture,
1484     NvU32 *pUserdAttribute
1485 )
1486 {
1487     const PREALLOCATED_USERD_INFO *pUserdInfo = kfifoGetPreallocatedUserdInfo(pKernelFifo);
1488 
1489     NV_ASSERT_OR_RETURN(pUserdAperture != NULL && pUserdAttribute != NULL,
1490                         NV_ERR_INVALID_POINTER);
1491 
1492     *pUserdAperture = pUserdInfo->userdAperture;
1493     *pUserdAttribute = pUserdInfo->userdAttr;
1494 
1495     return NV_OK;
1496 }
1497 
1498 /**
1499  * @brief Returns size/address shift for USERD's BAR1 mapping
1500  *
1501  * @param pKernelFifo
1502  * @param[out] pSize populated with USERD size if non-null
1503  * @param[out] pAddrShift populated with USERD address shift if non-null
1504  */
1505 void
1506 kfifoGetUserdSizeAlign_GM107
1507 (
1508     KernelFifo *pKernelFifo,
1509     NvU32 *pSize,
1510     NvU32 *pAddrShift
1511 )
1512 {
1513     if (pSize != NULL)
1514         *pSize = 1<<NV_RAMUSERD_BASE_SHIFT;
1515     if (pAddrShift != NULL)
1516         *pAddrShift = NV_RAMUSERD_BASE_SHIFT;
1517 }
1518 
1519 /**
1520  * @brief Determines if an engine is a host engine and if so, if it is present.
1521  *
1522  * @param pGpu
1523  * @param pKernelFifo
1524  * @param[in] engDesc
1525  * @param[out]  pPresent NV_TRUE if the engine is present, NV_FALSE if not.
1526  *
1527  * @return OK if host could determine the engine's presence.  ERROR otherwise
1528  */
1529 NV_STATUS
1530 kfifoCheckEngine_GM107
1531 (
1532     OBJGPU     *pGpu,
1533     KernelFifo *pKernelFifo,
1534     NvU32       engDesc,
1535     NvBool     *pPresent
1536 )
1537 {
1538     NvU32 bEschedDriven = NV_FALSE;
1539     NV_STATUS status;
1540 
1541     status = kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
1542         ENGINE_INFO_TYPE_ENG_DESC,              engDesc,
1543         ENGINE_INFO_TYPE_IS_HOST_DRIVEN_ENGINE, &bEschedDriven);
1544 
1545     *pPresent = (status == NV_OK) && bEschedDriven;
1546 
1547     return NV_OK;
1548 }
1549