1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "kernel/gpu/fifo/kernel_fifo.h"
25 #include "kernel/gpu/fifo/kernel_channel.h"
26 #include "kernel/gpu/fifo/kernel_channel_group.h"
27 #include "kernel/gpu/fifo/kernel_channel_group_api.h"
28 #include "kernel/gpu/fifo/kernel_sched_mgr.h"
29 #include "gpu/mem_mgr/mem_mgr.h"
30 #include "gpu/mmu/kern_gmmu.h"
31
32 #include "nvRmReg.h"
33
34 #include "vgpu/rpc.h"
35 #include "gpu/bus/kern_bus.h"
36
37 #include "published/maxwell/gm107/dev_ram.h"
38 #include "published/maxwell/gm107/dev_mmu.h"
39
40
41 static inline NvBool
42 _isEngineInfoTypeValidForOnlyHostDriven(ENGINE_INFO_TYPE type);
43
44
45 /*! Construct kfifo object */
46 NV_STATUS
kfifoConstructHal_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo)47 kfifoConstructHal_GM107
48 (
49 OBJGPU *pGpu,
50 KernelFifo *pKernelFifo
51 )
52 {
53 NV_STATUS status;
54 PREALLOCATED_USERD_INFO *pUserdInfo = &pKernelFifo->userdInfo;
55
56 if (FLD_TEST_DRF(_REG_STR_RM, _INST_VPR, _INSTBLK, _TRUE, pGpu->instVprOverrides))
57 {
58 pKernelFifo->bInstProtectedMem = NV_TRUE;
59 }
60
61 // Instance Memory
62 switch (DRF_VAL( _REG_STR_RM, _INST_LOC, _INSTBLK, pGpu->instLocOverrides))
63 {
64 default:
65 case NV_REG_STR_RM_INST_LOC_INSTBLK_DEFAULT:
66 if (kfifoIsMixedInstmemApertureDefAllowed(pKernelFifo))
67 pKernelFifo->pInstAllocList = ADDRLIST_FBMEM_PREFERRED;
68 else
69 pKernelFifo->pInstAllocList = ADDRLIST_FBMEM_ONLY;
70
71 pKernelFifo->InstAttr = NV_MEMORY_UNCACHED;
72 break;
73 case NV_REG_STR_RM_INST_LOC_INSTBLK_VID:
74 pKernelFifo->pInstAllocList = ADDRLIST_FBMEM_ONLY;
75 pKernelFifo->InstAttr = NV_MEMORY_UNCACHED;
76 break;
77 case NV_REG_STR_RM_INST_LOC_INSTBLK_COH:
78 pKernelFifo->pInstAllocList = ADDRLIST_SYSMEM_ONLY;
79 pKernelFifo->InstAttr = NV_MEMORY_CACHED;
80 break;
81 case NV_REG_STR_RM_INST_LOC_INSTBLK_NCOH:
82 pKernelFifo->pInstAllocList = ADDRLIST_SYSMEM_ONLY;
83 pKernelFifo->InstAttr = NV_MEMORY_UNCACHED;
84 break;
85 }
86
87 // USERD
88 pUserdInfo->userdAperture = ADDR_FBMEM;
89 pUserdInfo->userdAttr = NV_MEMORY_WRITECOMBINED;
90 memdescOverrideInstLoc(DRF_VAL( _REG_STR_RM, _INST_LOC, _USERD, pGpu->instLocOverrides),
91 "USERD",
92 &pUserdInfo->userdAperture,
93 &pUserdInfo->userdAttr);
94
95 // Create child object KernelSchedMgr
96 if (kfifoIsSchedSupported(pKernelFifo))
97 {
98 pKernelFifo->pKernelSchedMgr = NULL;
99 status = objCreate(&pKernelFifo->pKernelSchedMgr, pKernelFifo, KernelSchedMgr);
100 if (status != NV_OK)
101 {
102 pKernelFifo->pKernelSchedMgr = NULL;
103 return status;
104 }
105 kschedmgrConstructPolicy(pKernelFifo->pKernelSchedMgr, pGpu);
106 }
107
108 return NV_OK;
109 }
110
111 /**
112 * @brief Allocate a page for dummy page directory
113 *
114 * On GV100, PDB corresponding to subcontexts that are freed
115 * will point to a dummy page directory instead of setting it to NULL
116 * Here we allocate a page for this page directory
117 */
118 static NV_STATUS
_kfifoAllocDummyPage(OBJGPU * pGpu,KernelFifo * pKernelFifo)119 _kfifoAllocDummyPage
120 (
121 OBJGPU *pGpu,
122 KernelFifo *pKernelFifo
123 )
124 {
125 NV_STATUS status = NV_OK;
126 NvU32 flags = MEMDESC_FLAGS_NONE;
127 NvBool bBcState = gpumgrGetBcEnabledStatus(pGpu);
128
129 if (bBcState)
130 {
131 flags |= MEMDESC_FLAGS_ALLOC_PER_SUBDEVICE;
132 }
133
134 // Using instance block attributes to allocate dummy page
135 status = memdescCreate(&pKernelFifo->pDummyPageMemDesc, pGpu,
136 RM_PAGE_SIZE,
137 0,
138 NV_FALSE,
139 ADDR_UNKNOWN,
140 pKernelFifo->InstAttr,
141 flags);
142 if (status != NV_OK)
143 {
144 NV_PRINTF(LEVEL_ERROR, "Could not memdescCreate for dummy page\n");
145 DBG_BREAKPOINT();
146 return status;
147 }
148
149 memdescTagAllocList(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_80,
150 pKernelFifo->pDummyPageMemDesc, pKernelFifo->pInstAllocList);
151 if (status != NV_OK)
152 {
153 NV_PRINTF(LEVEL_ERROR, "Could not allocate dummy page\n");
154 DBG_BREAKPOINT();
155 memdescDestroy(pKernelFifo->pDummyPageMemDesc);
156 pKernelFifo->pDummyPageMemDesc = NULL;
157 }
158
159 return status;
160 }
161
162 /**
163 * @brief Free the page used for dummy page directory
164 */
165 static void
_kfifoFreeDummyPage(OBJGPU * pGpu,KernelFifo * pKernelFifo)166 _kfifoFreeDummyPage
167 (
168 OBJGPU *pGpu,
169 KernelFifo *pKernelFifo
170 )
171 {
172 // Free dummy page memdesc
173 memdescFree(pKernelFifo->pDummyPageMemDesc);
174 memdescDestroy(pKernelFifo->pDummyPageMemDesc);
175 pKernelFifo->pDummyPageMemDesc = NULL;
176 }
177
178 NV_STATUS
kfifoStatePostLoad_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,NvU32 flags)179 kfifoStatePostLoad_GM107
180 (
181 OBJGPU *pGpu,
182 KernelFifo *pKernelFifo,
183 NvU32 flags
184 )
185 {
186 NV_STATUS status = NV_OK;
187 const PREALLOCATED_USERD_INFO *pUserdInfo = kfifoGetPreallocatedUserdInfo(pKernelFifo);
188
189 if (!(flags & GPU_STATE_FLAGS_PRESERVING))
190 {
191 // Prealloc USERD
192 NV_ASSERT_OK_OR_RETURN(kfifoPreAllocUserD_HAL(pGpu, pKernelFifo));
193
194 if (gpumgrIsParentGPU(pGpu))
195 {
196 if (kfifoIsZombieSubctxWarEnabled(pKernelFifo))
197 {
198 NvBool bBcState = gpumgrGetBcEnabledStatus(pGpu);
199 gpumgrSetBcEnabledStatus(pGpu, NV_TRUE);
200
201 status = _kfifoAllocDummyPage(pGpu, pKernelFifo);
202 if (status != NV_OK)
203 {
204 NV_PRINTF(LEVEL_ERROR,
205 "Failed to allocate dummy page for zombie subcontexts\n");
206 DBG_BREAKPOINT();
207 gpumgrSetBcEnabledStatus(pGpu, bBcState);
208 return status;
209 }
210
211 if (IS_VIRTUAL_WITH_SRIOV(pGpu))
212 {
213 NV2080_CTRL_FIFO_SETUP_VF_ZOMBIE_SUBCTX_PDB_PARAMS params;
214 MEMORY_DESCRIPTOR *pDummyPageMemDesc = kfifoGetDummyPageMemDesc(pKernelFifo);
215
216 portMemSet(¶ms, 0, sizeof(params));
217
218 params.base = memdescGetPhysAddr(pDummyPageMemDesc, AT_GPU, 0);;
219 params.size = pDummyPageMemDesc->Size;
220 params.addressSpace = memdescGetAddressSpace(pDummyPageMemDesc);
221 params.cacheAttrib = memdescGetCpuCacheAttrib(pDummyPageMemDesc);
222
223 NV_RM_RPC_CONTROL(pGpu,
224 pGpu->hDefaultClientShare,
225 pGpu->hDefaultClientShareSubDevice,
226 NV2080_CTRL_CMD_FIFO_SETUP_VF_ZOMBIE_SUBCTX_PDB,
227 ¶ms,
228 sizeof(params),
229 status);
230 if (status != NV_OK)
231 {
232 NV_PRINTF(LEVEL_ERROR,
233 "RM control call to setup zombie subctx failed, status 0x%x\n", status);
234 DBG_BREAKPOINT();
235 return status;
236 }
237 }
238
239 gpumgrSetBcEnabledStatus(pGpu, bBcState);
240 }
241 }
242 }
243
244 // Since we have successfully setup BAR1 USERD rsvd memory
245 // lets inform hw (only if the snoop is not disabled.)
246 kfifoSetupBar1UserdSnoop_HAL(pGpu, pKernelFifo, NV_TRUE, pUserdInfo->userdBar1MapStartOffset);
247
248 if (IS_GSP_CLIENT(pGpu) || IS_VIRTUAL(pGpu))
249 {
250 status = kfifoTriggerPostSchedulingEnableCallback(pGpu, pKernelFifo);
251 if (status != NV_OK)
252 return status;
253 }
254
255 return status;
256 }
257
258 NV_STATUS
kfifoStatePreUnload_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,NvU32 flags)259 kfifoStatePreUnload_GM107
260 (
261 OBJGPU *pGpu,
262 KernelFifo *pKernelFifo,
263 NvU32 flags
264 )
265 {
266 NV_STATUS status = NV_OK;
267 NvU32 sliLoopReentrancy;
268
269 NV_PRINTF(LEVEL_INFO, "start\n");
270
271 if (!(flags & GPU_STATE_FLAGS_PRESERVING) && gpumgrIsParentGPU(pGpu))
272 {
273 NvBool bBcState = NV_FALSE;
274
275 if (kfifoIsZombieSubctxWarEnabled(pKernelFifo))
276 {
277 _kfifoFreeDummyPage(pGpu, pKernelFifo);
278 }
279
280 // Notify the handlers that the channel will soon be disabled.
281 status = kfifoTriggerPreSchedulingDisableCallback(pGpu, pKernelFifo);
282
283 // Enable broadcast on SLI
284 bBcState = gpumgrGetBcEnabledStatus(pGpu);
285 gpumgrSetBcEnabledStatus(pGpu, NV_TRUE);
286
287 // As we have forced here SLI broadcast mode, temporarily reset the reentrancy count
288 sliLoopReentrancy = gpumgrSLILoopReentrancyPop(pGpu);
289
290 // Ask host to stop snooping
291 kfifoSetupBar1UserdSnoop_HAL(pGpu, pKernelFifo, NV_FALSE, 0);
292
293 // Restore the reentrancy count
294 gpumgrSLILoopReentrancyPush(pGpu, sliLoopReentrancy);
295
296 // Restore prior broadcast state
297 gpumgrSetBcEnabledStatus(pGpu, bBcState);
298 }
299
300 if (!(flags & GPU_STATE_FLAGS_PRESERVING))
301 {
302 // Free preallocated userd
303 kfifoFreePreAllocUserD_HAL(pGpu, pKernelFifo);
304 }
305
306 return status;
307 }
308
309 /**
310 * Returns the default timeslice (in us) for a channelgroup as defined by hardware.
311 */
312 NvU64
kfifoChannelGroupGetDefaultTimeslice_GM107(KernelFifo * pKernelFifo)313 kfifoChannelGroupGetDefaultTimeslice_GM107
314 (
315 KernelFifo *pKernelFifo
316 )
317 {
318 return NV_RAMRL_ENTRY_TIMESLICE_TIMEOUT_128 << NV_RAMRL_ENTRY_TIMESLICE_SCALE_3;
319 }
320
321 /*! Get size and alignment requirements for instance memory */
322 NV_STATUS
kfifoGetInstMemInfo_GM107(KernelFifo * pKernelFifo,NvU64 * pSize,NvU64 * pAlignment,NvBool * pbInstProtectedMem,NvU32 * pInstAttr,const NV_ADDRESS_SPACE ** ppInstAllocList)323 kfifoGetInstMemInfo_GM107
324 (
325 KernelFifo *pKernelFifo,
326 NvU64 *pSize,
327 NvU64 *pAlignment,
328 NvBool *pbInstProtectedMem,
329 NvU32 *pInstAttr,
330 const NV_ADDRESS_SPACE **ppInstAllocList
331 )
332 {
333 NV_ASSERT_OR_RETURN(pSize != NULL, NV_ERR_INVALID_ARGUMENT);
334 NV_ASSERT_OR_RETURN(pAlignment != NULL, NV_ERR_INVALID_ARGUMENT);
335
336 *pSize = NV_RAMIN_ALLOC_SIZE;
337 *pAlignment = 1 << NV_RAMIN_BASE_SHIFT;
338
339 if(pbInstProtectedMem != NULL)
340 *pbInstProtectedMem = pKernelFifo->bInstProtectedMem;
341
342 if(pInstAttr != NULL)
343 *pInstAttr = pKernelFifo->InstAttr;
344
345 if(ppInstAllocList != NULL)
346 *ppInstAllocList = pKernelFifo->pInstAllocList;
347
348 return NV_OK;
349 }
350
351 /*! Gets instance block size and offset align for instance memory */
352 void
kfifoGetInstBlkSizeAlign_GM107(KernelFifo * pKernelFifo,NvU32 * pSize,NvU32 * pShift)353 kfifoGetInstBlkSizeAlign_GM107
354 (
355 KernelFifo *pKernelFifo,
356 NvU32 *pSize,
357 NvU32 *pShift
358 )
359 {
360 *pSize = NV_RAMIN_ALLOC_SIZE;
361 *pShift = NV_RAMIN_BASE_SHIFT;
362
363 return;
364 }
365
366 /*!
367 * @brief Gets the default runlist id to use for channels allocated with no engines on them.
368 *
369 * @param[in] pGpu
370 * @param[in] pKernelFifo
371 * @param[in] rmEngineType - Engine type of the channel to retrieve default runlist id for
372 */
373 NvU32
kfifoGetDefaultRunlist_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,RM_ENGINE_TYPE rmEngineType)374 kfifoGetDefaultRunlist_GM107
375 (
376 OBJGPU *pGpu,
377 KernelFifo *pKernelFifo,
378 RM_ENGINE_TYPE rmEngineType
379 )
380 {
381 NvU32 runlistId = INVALID_RUNLIST_ID;
382 ENGDESCRIPTOR engDesc = ENG_GR(0);
383
384 if (RM_ENGINE_TYPE_IS_VALID(rmEngineType))
385 {
386 // if translation fails, defualt is ENG_GR(0)
387 NV_ASSERT_OK(
388 kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
389 ENGINE_INFO_TYPE_RM_ENGINE_TYPE, (NvU32)rmEngineType,
390 ENGINE_INFO_TYPE_ENG_DESC, &engDesc));
391 }
392
393 // if translation fails, default is INVALID_RUNLIST_ID
394 if (kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
395 ENGINE_INFO_TYPE_ENG_DESC,
396 engDesc,
397 ENGINE_INFO_TYPE_RUNLIST,
398 &runlistId) != NV_OK)
399 {
400 runlistId = INVALID_RUNLIST_ID;
401 }
402
403 return runlistId;
404 }
405
406 /**
407 * @brief Programs a channel's runlist id to a given value
408 *
409 * Verifies that the requested engine is valid based on the current channel's
410 * state. Does not bind the channel to the runlist in sw or hw. @ref kfifoRunlistSetId.
411 *
412 * @param pGpu
413 * @param pKernelFifo
414 * @param[in/out] pKernelChannel
415 * @param[in] runlistId runlist ID to use
416 */
417 NV_STATUS
kfifoRunlistSetId_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,KernelChannel * pKernelChannel,NvU32 runlistId)418 kfifoRunlistSetId_GM107
419 (
420 OBJGPU *pGpu,
421 KernelFifo *pKernelFifo,
422 KernelChannel *pKernelChannel,
423 NvU32 runlistId
424 )
425 {
426 NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT);
427
428 if ((runlistId != kchannelGetRunlistId(pKernelChannel)) &&
429 kchannelIsRunlistSet(pGpu, pKernelChannel))
430 {
431 NV_PRINTF(LEVEL_ERROR,
432 "Channel has already been assigned a runlist incompatible with this "
433 "engine (requested: 0x%x current: 0x%x).\n", runlistId,
434 kchannelGetRunlistId(pKernelChannel));
435 return NV_ERR_INVALID_STATE;
436 }
437
438 //
439 // For TSG channel, the RL should support TSG.
440 // We relax this requirement if the channel is TSG wrapped by RM.
441 // In that case, RM won't write the TSG header in the RL.
442 //
443 if (!kfifoRunlistIsTsgHeaderSupported_HAL(pGpu, pKernelFifo, runlistId) &&
444 (pKernelChannel->pKernelChannelGroupApi != NULL) &&
445 !pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bAllocatedByRm)
446 {
447 NV_PRINTF(LEVEL_ERROR, "Runlist does not support TSGs\n");
448 return NV_ERR_INVALID_STATE;
449 }
450
451 // If you want to set runlistId of channel - first set it on TSG
452 if (pKernelChannel->pKernelChannelGroupApi != NULL)
453 {
454 // Change TSG runlist if channel is the only one
455 if (pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->chanCount == 1 ||
456 !pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bRunlistAssigned)
457 {
458 pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->runlistId = runlistId;
459 pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->bRunlistAssigned = NV_TRUE;
460 }
461 else
462 {
463 NV_ASSERT_OR_RETURN(pKernelChannel->pKernelChannelGroupApi->pKernelChannelGroup->runlistId ==
464 runlistId,
465 NV_ERR_INVALID_STATE);
466 }
467 }
468
469 kchannelSetRunlistId(pKernelChannel, runlistId);
470 kchannelSetRunlistSet(pGpu, pKernelChannel, NV_TRUE);
471 return NV_OK;
472 }
473
474 /**
475 * @brief Programs a channel's runlist id given the engine tag
476 *
477 * Verifies that the requested engine is valid based on the current channel's
478 * state. Does not bind the channel to the runlist in sw or hw. @ref kfifoRunlistSetIdByEngine.
479 *
480 * @param pGpu
481 * @param pKernelFifo
482 * @param[in/out] pKernelChannel
483 * @param[in] engDesc
484 */
485 NV_STATUS
kfifoRunlistSetIdByEngine_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,KernelChannel * pKernelChannel,NvU32 engDesc)486 kfifoRunlistSetIdByEngine_GM107
487 (
488 OBJGPU *pGpu,
489 KernelFifo *pKernelFifo,
490 KernelChannel *pKernelChannel,
491 NvU32 engDesc
492 )
493 {
494 NvU32 runlistId;
495 NV_STATUS status;
496 NvU32 subctxType = 0;
497
498 NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT);
499
500 kfifoGetSubctxType_HAL(pGpu, pKernelFifo, pKernelChannel, &subctxType);
501
502 if (!kfifoValidateEngineAndRunqueue_HAL(pGpu, pKernelFifo, engDesc, kchannelGetRunqueue(pKernelChannel)))
503 return NV_ERR_INVALID_ARGUMENT;
504
505 if (!kfifoValidateEngineAndSubctxType_HAL(pGpu, pKernelFifo, engDesc, subctxType))
506 return NV_ERR_INVALID_ARGUMENT;
507
508 //
509 // SW objects can go on any runlist so we defer committing of runlist ID to
510 // scheduling or another object's allocation.
511 //
512 if ((engDesc == ENG_SW) || (engDesc == ENG_BUS))
513 return NV_OK;
514
515 NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, ENGINE_INFO_TYPE_ENG_DESC,
516 engDesc, ENGINE_INFO_TYPE_RUNLIST, &runlistId));
517
518 status = kfifoRunlistSetId_HAL(pGpu, pKernelFifo, pKernelChannel, runlistId);
519 if (status != NV_OK)
520 {
521 NV_PRINTF(LEVEL_ERROR, "Unable to program runlist for %s\n",
522 kfifoGetEngineName_HAL(pKernelFifo, ENGINE_INFO_TYPE_ENG_DESC, engDesc));
523 }
524
525 return status;
526 }
527
528 NV_STATUS
kfifoChannelGetFifoContextMemDesc_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,KernelChannel * pKernelChannel,FIFO_CTX engineState,MEMORY_DESCRIPTOR ** ppMemDesc)529 kfifoChannelGetFifoContextMemDesc_GM107
530 (
531 OBJGPU *pGpu,
532 KernelFifo *pKernelFifo,
533 KernelChannel *pKernelChannel,
534 FIFO_CTX engineState,
535 MEMORY_DESCRIPTOR **ppMemDesc
536 )
537 {
538 FIFO_INSTANCE_BLOCK *pInstanceBlock;
539
540 /* UVM calls nvGpuOpsGetChannelInstanceMemInfo
541 * which calls current function to fetch FIFO_CTX_INST_BLOCK */
542 /* Currenltly, UVM supported on SRIOV vGPUs only. */
543 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu))
544 return NV_OK;
545
546 NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE);
547 NV_ASSERT_OR_RETURN(pKernelChannel != NULL, NV_ERR_INVALID_CHANNEL);
548
549 pInstanceBlock = pKernelChannel->pFifoHalData[gpumgrGetSubDeviceInstanceFromGpu(pGpu)];
550 if (pInstanceBlock == NULL)
551 return NV_ERR_INVALID_STATE;
552
553 switch (engineState)
554 {
555 case FIFO_CTX_INST_BLOCK:
556 *ppMemDesc = pInstanceBlock->pInstanceBlockDesc;
557 break;
558
559 case FIFO_CTX_RAMFC:
560 *ppMemDesc = pInstanceBlock->pRamfcDesc;
561 break;
562
563 default:
564 NV_PRINTF(LEVEL_ERROR,
565 "bad engineState 0x%x on engine 0x%x\n",
566 engineState, ENG_FIFO);
567 DBG_BREAKPOINT();
568 return NV_ERR_INVALID_ARGUMENT;
569 }
570
571 NV_ASSERT(!memdescHasSubDeviceMemDescs(*ppMemDesc));
572
573 NV_PRINTF(LEVEL_INFO,
574 "Channel %d engine 0x%x engineState 0x%x *ppMemDesc %p\n",
575 kchannelGetDebugTag(pKernelChannel), ENG_FIFO, engineState, *ppMemDesc);
576
577 return NV_OK;
578 }
579
580 /**
581 * @brief lookup the kernelchannel data associated with a given instance address/target
582 *
583 * @param[in] pGpu OBJGPU pointer
584 * @param[in] pKernelFifo KernelFifo pointer
585 * @param[in] pInst INST_BLOCK_DESC pointer
586 * @param[out] ppKernelChannel KernelChannel ptr
587 */
588 NV_STATUS
kfifoConvertInstToKernelChannel_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,INST_BLOCK_DESC * pInst,KernelChannel ** ppKernelChannel)589 kfifoConvertInstToKernelChannel_GM107
590 (
591 OBJGPU *pGpu,
592 KernelFifo *pKernelFifo,
593 INST_BLOCK_DESC *pInst,
594 KernelChannel **ppKernelChannel
595 )
596 {
597 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
598 KernelChannel *pKernelChannel = NULL;
599 FIFO_INSTANCE_BLOCK *pInstanceBlock;
600 MEMORY_DESCRIPTOR instMemDesc;
601 NV_ADDRESS_SPACE instAperture;
602 CHANNEL_ITERATOR chanIt;
603
604 NV_ASSERT_OR_RETURN(pInst != NULL, NV_ERR_INVALID_ARGUMENT);
605 NV_ASSERT_OR_RETURN(ppKernelChannel != NULL, NV_ERR_INVALID_ARGUMENT);
606 NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE);
607
608 *ppKernelChannel = NULL;
609
610 switch (pInst->aperture)
611 {
612 case INST_BLOCK_APERTURE_SYSTEM_COHERENT_MEMORY:
613 case INST_BLOCK_APERTURE_SYSTEM_NON_COHERENT_MEMORY:
614 instAperture = ADDR_SYSMEM;
615 break;
616 case INST_BLOCK_APERTURE_VIDEO_MEMORY:
617 instAperture = ADDR_FBMEM;
618 break;
619 default:
620 NV_PRINTF(LEVEL_ERROR, "unknown inst target 0x%x\n", pInst->aperture);
621 DBG_BREAKPOINT();
622 return NV_ERR_INVALID_ADDRESS;
623 }
624
625 //
626 // The MMU_PTE version of aperture is what the HW should always
627 // report for an instance block. Compare the SW defines against
628 // these values here.
629 //
630 VERIFY_INST_BLOCK_APERTURE(NV_MMU_PTE_APERTURE_VIDEO_MEMORY,
631 NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY,
632 NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY);
633
634 memdescCreateExisting(&instMemDesc, pGpu, NV_RAMIN_ALLOC_SIZE,
635 instAperture, NV_MEMORY_UNCACHED,
636 MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE);
637
638 memdescDescribe(&instMemDesc, instAperture, pInst->address, NV_RAMIN_ALLOC_SIZE);
639
640 kfifoGetChannelIterator(pGpu, pKernelFifo, &chanIt, INVALID_RUNLIST_ID);
641 while (kfifoGetNextKernelChannel(pGpu, pKernelFifo, &chanIt, &pKernelChannel) == NV_OK)
642 {
643 NV_ASSERT_OR_ELSE(pKernelChannel != NULL, continue);
644
645 pInstanceBlock = pKernelChannel->pFifoHalData[gpumgrGetSubDeviceInstanceFromGpu(pGpu)];
646
647 if (pInstanceBlock != NULL &&
648 pInstanceBlock->pInstanceBlockDesc != NULL &&
649 kchannelGetGfid(pKernelChannel) == pInst->gfid &&
650 memmgrComparePhysicalAddresses_HAL(pGpu, pMemoryManager,
651 kgmmuGetHwPteApertureFromMemdesc(GPU_GET_KERNEL_GMMU(pGpu),
652 pInstanceBlock->pInstanceBlockDesc),
653 memdescGetPhysAddr(pInstanceBlock->pInstanceBlockDesc,
654 AT_GPU, 0),
655 kgmmuGetHwPteApertureFromMemdesc(GPU_GET_KERNEL_GMMU(pGpu),
656 &instMemDesc),
657 memdescGetPhysAddr(&instMemDesc, AT_GPU, 0)))
658 {
659 *ppKernelChannel = pKernelChannel;
660 memdescDestroy(&instMemDesc);
661 return NV_OK;
662 }
663 }
664
665 NV_PRINTF(LEVEL_INFO,
666 "No channel found for instance 0x%016llx (target 0x%x)\n",
667 memdescGetPhysAddr(&instMemDesc, AT_GPU, 0),
668 kgmmuGetHwPteApertureFromMemdesc(GPU_GET_KERNEL_GMMU(pGpu), &instMemDesc));
669 memdescDestroy(&instMemDesc);
670
671 return NV_ERR_INVALID_CHANNEL;
672 }
673
674 static inline NvBool
_isEngineInfoTypeValidForOnlyHostDriven(ENGINE_INFO_TYPE type)675 _isEngineInfoTypeValidForOnlyHostDriven(ENGINE_INFO_TYPE type)
676 {
677 switch (type)
678 {
679 case ENGINE_INFO_TYPE_RUNLIST:
680 case ENGINE_INFO_TYPE_RUNLIST_PRI_BASE:
681 case ENGINE_INFO_TYPE_RUNLIST_ENGINE_ID:
682 case ENGINE_INFO_TYPE_PBDMA_ID:
683 case ENGINE_INFO_TYPE_CHRAM_PRI_BASE:
684 case ENGINE_INFO_TYPE_FIFO_TAG:
685 return NV_TRUE;
686 case ENGINE_INFO_TYPE_ENG_DESC:
687 case ENGINE_INFO_TYPE_RM_ENGINE_TYPE:
688 case ENGINE_INFO_TYPE_MMU_FAULT_ID:
689 case ENGINE_INFO_TYPE_RC_MASK:
690 case ENGINE_INFO_TYPE_RESET:
691 case ENGINE_INFO_TYPE_INTR:
692 case ENGINE_INFO_TYPE_MC:
693 case ENGINE_INFO_TYPE_DEV_TYPE_ENUM:
694 case ENGINE_INFO_TYPE_INSTANCE_ID:
695 case ENGINE_INFO_TYPE_IS_HOST_DRIVEN_ENGINE:
696 // The bool itself is valid for non-host-driven engines too.
697 case ENGINE_INFO_TYPE_INVALID:
698 return NV_FALSE;
699 default:
700 // Ensure that this function covers every value in ENGINE_INFO_TYPE
701 NV_ASSERT(0 && "check all ENGINE_INFO_TYPE are classified as host-driven or not");
702 return NV_FALSE;
703 }
704 }
705
706
707 NV_STATUS
kfifoEngineInfoXlate_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,ENGINE_INFO_TYPE inType,NvU32 inVal,ENGINE_INFO_TYPE outType,NvU32 * pOutVal)708 kfifoEngineInfoXlate_GM107
709 (
710 OBJGPU *pGpu,
711 KernelFifo *pKernelFifo,
712 ENGINE_INFO_TYPE inType,
713 NvU32 inVal,
714 ENGINE_INFO_TYPE outType,
715 NvU32 *pOutVal
716 )
717 {
718 const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
719 FIFO_ENGINE_LIST *pFoundInputEngine = NULL;
720
721 NV_ASSERT_OR_RETURN(pOutVal != NULL, NV_ERR_INVALID_ARGUMENT);
722
723 // PBDMA_ID can only be inType
724 NV_ASSERT_OR_RETURN(outType != ENGINE_INFO_TYPE_PBDMA_ID,
725 NV_ERR_INVALID_ARGUMENT);
726
727 if (pEngineInfo == NULL)
728 {
729 NV_ASSERT_OK_OR_RETURN(kfifoConstructEngineList_HAL(pGpu, pKernelFifo));
730 pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
731 }
732 NV_ASSERT_OR_RETURN(pEngineInfo != NULL, NV_ERR_INVALID_STATE);
733
734 if (inType == ENGINE_INFO_TYPE_INVALID)
735 {
736 NV_ASSERT_OR_RETURN(inVal < pEngineInfo->engineInfoListSize,
737 NV_ERR_INVALID_ARGUMENT);
738 pFoundInputEngine = &pEngineInfo->engineInfoList[inVal];
739 }
740 else
741 {
742 NvU32 i;
743 for (i = 0;
744 (i < pEngineInfo->engineInfoListSize) &&
745 (pFoundInputEngine == NULL);
746 ++i)
747 {
748 FIFO_ENGINE_LIST *pThisEngine = &pEngineInfo->engineInfoList[i];
749
750 if (inType == ENGINE_INFO_TYPE_PBDMA_ID)
751 {
752 NvU32 j;
753 for (j = 0; j < pThisEngine->numPbdmas; ++j)
754 {
755 if (pThisEngine->pbdmaIds[j] == inVal)
756 {
757 pFoundInputEngine = pThisEngine;
758 break;
759 }
760 }
761 }
762 else if (pThisEngine->engineData[inType] == inVal)
763 {
764 pFoundInputEngine = pThisEngine;
765 }
766 }
767 }
768
769 if (pFoundInputEngine == NULL)
770 {
771 return NV_ERR_OBJECT_NOT_FOUND;
772 }
773
774 if (_isEngineInfoTypeValidForOnlyHostDriven(outType) &&
775 !pFoundInputEngine->engineData[ENGINE_INFO_TYPE_IS_HOST_DRIVEN_ENGINE])
776 {
777 //
778 // Bug 3748452 TODO
779 // Bug 3772199 TODO
780 //
781 // We can't easily just return an error here because hundreds of
782 // callsites would fail their asserts. The above two bugs track fixing
783 // all callsites after which, we can uncomment this.
784 //
785 // return NV_ERR_OBJECT_NOT_FOUND;
786 //
787 NV_PRINTF(LEVEL_ERROR,
788 "Asked for host-specific type(0x%x) for non-host engine type(0x%x),val(0x%08x)\n",
789 outType, inType, inVal);
790 }
791
792 *pOutVal = pFoundInputEngine->engineData[outType];
793 return NV_OK;
794 }
795
796 /**
797 * @brief Get the local maximum number of subctx allowed in this TSG
798 */
799 NvU32
kfifoChannelGroupGetLocalMaxSubcontext_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,KernelChannelGroup * pKernelChannelGroup,NvBool bLegacyMode)800 kfifoChannelGroupGetLocalMaxSubcontext_GM107
801 (
802 OBJGPU *pGpu,
803 KernelFifo *pKernelFifo,
804 KernelChannelGroup *pKernelChannelGroup,
805 NvBool bLegacyMode
806 )
807 {
808 // Pre-AMPERE, each channel group has the global maximum available
809 return kfifoGetMaxSubcontext_HAL(pGpu, pKernelFifo, bLegacyMode);
810 }
811
812 void
kfifoSetupUserD_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,MEMORY_DESCRIPTOR * pMemDesc)813 kfifoSetupUserD_GM107
814 (
815 OBJGPU *pGpu,
816 KernelFifo *pKernelFifo,
817 MEMORY_DESCRIPTOR *pMemDesc
818 )
819 {
820 TRANSFER_SURFACE tSurf = {.pMemDesc = pMemDesc, .offset = 0};
821
822 NV_ASSERT_OK(memmgrMemSet(GPU_GET_MEMORY_MANAGER(pGpu), &tSurf, 0,
823 NV_RAMUSERD_CHAN_SIZE, TRANSFER_FLAGS_NONE));
824 }
825 /**
826 * @brief return number of HW engines
827 *
828 * Can be used to loop over all engines in the system by looping from 0
829 * through the value returned by this function and then using
830 * kfifoEngineInfoXlate() with an input type of ENGINE_INFO_TYPE_INVALID.
831 *
832 * @param[in] pGpu OBJGPU pointer
833 * @param[in] pKernelFifo KernelFifo pointer
834 *
835 * @returns number of HW engines present on chip.
836 */
837 NvU32
kfifoGetNumEngines_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo)838 kfifoGetNumEngines_GM107
839 (
840 OBJGPU *pGpu,
841 KernelFifo *pKernelFifo
842 )
843 {
844 const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
845
846 if (pEngineInfo == NULL)
847 {
848 NV_ASSERT_OR_RETURN(kfifoConstructEngineList_HAL(pGpu, pKernelFifo) == NV_OK, 0);
849
850 pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
851 NV_ASSERT_OR_RETURN(pEngineInfo != NULL, 0);
852 }
853
854 NV_ASSERT(pEngineInfo->engineInfoListSize);
855
856 // we don't count the SW engine entry at the end of the list
857 return pEngineInfo->engineInfoListSize-1;
858 }
859
860 /**
861 * @brief Retrieves the name of the engine corresponding to the given @ref ENGINE_INFO_TYPE
862 *
863 * @param pKernelFifo
864 * @param[in] inType
865 * @param[in] inVal
866 *
867 * @returns a string
868 */
869 const char *
kfifoGetEngineName_GM107(KernelFifo * pKernelFifo,ENGINE_INFO_TYPE inType,NvU32 inVal)870 kfifoGetEngineName_GM107
871 (
872 KernelFifo *pKernelFifo,
873 ENGINE_INFO_TYPE inType,
874 NvU32 inVal
875 )
876 {
877 const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
878 NvU32 i;
879
880 if (inType == ENGINE_INFO_TYPE_INVALID)
881 {
882 NV_ASSERT_OR_RETURN (inVal < pEngineInfo->engineInfoListSize, NULL);
883 return pEngineInfo->engineInfoList[inVal].engineName;
884 }
885 for (i = 0; i < pEngineInfo->engineInfoListSize; ++i)
886 {
887 if (pEngineInfo->engineInfoList[i].engineData[inType] == inVal)
888 {
889 return pEngineInfo->engineInfoList[i].engineName;
890 }
891 }
892
893 return "UNKNOWN";
894 }
895
896 /**
897 * @brief Returns the maximum possible number of runlists.
898 *
899 * Returns a number which represents the limit of any runlistId indexed
900 * registers in hardware. Does not necessarily return how many runlists are
901 * active. In the range of 0..kfifoGetMaxNumRunlists() there may be runlists
902 * that are not used.
903 *
904 * @param pGpu
905 * @param pKernelFifo
906 */
907 NvU32
kfifoGetMaxNumRunlists_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo)908 kfifoGetMaxNumRunlists_GM107
909 (
910 OBJGPU *pGpu,
911 KernelFifo *pKernelFifo
912 )
913 {
914 const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
915
916 return pEngineInfo->maxNumRunlists;
917 }
918
919 NV_STATUS
kfifoGetEnginePbdmaIds_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,ENGINE_INFO_TYPE type,NvU32 val,NvU32 ** ppPbdmaIds,NvU32 * pNumPbdmas)920 kfifoGetEnginePbdmaIds_GM107
921 (
922 OBJGPU *pGpu,
923 KernelFifo *pKernelFifo,
924 ENGINE_INFO_TYPE type,
925 NvU32 val,
926 NvU32 **ppPbdmaIds,
927 NvU32 *pNumPbdmas
928 )
929 {
930 const ENGINE_INFO *pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
931 NvU32 i;
932
933 if (pEngineInfo == NULL)
934 {
935 NV_ASSERT_OK_OR_RETURN(kfifoConstructEngineList_HAL(pGpu, pKernelFifo));
936
937 pEngineInfo = kfifoGetEngineInfo(pKernelFifo);
938 NV_ASSERT_OR_RETURN(pEngineInfo != NULL, NV_ERR_INVALID_STATE);
939 }
940
941 if (type == ENGINE_INFO_TYPE_INVALID)
942 {
943 NV_ASSERT_OR_RETURN(val < pEngineInfo->engineInfoListSize, NV_ERR_INVALID_ARGUMENT);
944 *ppPbdmaIds = pEngineInfo->engineInfoList[val].pbdmaIds;
945 *pNumPbdmas = pEngineInfo->engineInfoList[val].numPbdmas;
946 return NV_OK;
947 }
948
949 for (i = 0; i < pEngineInfo->engineInfoListSize; i++)
950 {
951 if (pEngineInfo->engineInfoList[i].engineData[type] == val)
952 {
953 *ppPbdmaIds = pEngineInfo->engineInfoList[i].pbdmaIds;
954 *pNumPbdmas = pEngineInfo->engineInfoList[i].numPbdmas;
955 return NV_OK;
956 }
957 }
958
959 return NV_ERR_INVALID_ARGUMENT;
960 }
961
962 /**
963 * @brief finds all engines on the same pbdma as the input
964 *
965 * pPartnerListParams->partnershipClassId is currently ignored.
966 *
967 * @param pGpu
968 * @param pKernelFifo
969 * @param[in/out] pPartnerListParams engineType is input, partnerList/numPartners are ouput
970 *
971 * @returns NV_OK if successful, error otherwise
972 */
973 NV_STATUS
kfifoGetEnginePartnerList_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,NV2080_CTRL_GPU_GET_ENGINE_PARTNERLIST_PARAMS * pPartnerListParams)974 kfifoGetEnginePartnerList_GM107
975 (
976 OBJGPU *pGpu,
977 KernelFifo *pKernelFifo,
978 NV2080_CTRL_GPU_GET_ENGINE_PARTNERLIST_PARAMS *pPartnerListParams
979 )
980 {
981 const NvU32 numEngines = kfifoGetNumEngines_HAL(pGpu, pKernelFifo);
982 NvU32 i;
983 NvU32 srcRunlist;
984 NvU32 runlist;
985 NvU32 *pSrcPbdmaIds;
986 NvU32 numSrcPbdmaIds;
987 NvU32 srcPbdmaId;
988 NvU32 *pPbdmaIds;
989 NvU32 numPbdmaIds;
990 NvU32 numClasses = 0;
991 ENGDESCRIPTOR engDesc;
992 RM_ENGINE_TYPE rmEngineType = gpuGetRmEngineType(pPartnerListParams->engineType);
993
994 if (pPartnerListParams->runqueue >= kfifoGetNumRunqueues_HAL(pGpu, pKernelFifo))
995 return NV_ERR_INVALID_ARGUMENT;
996
997 NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
998 ENGINE_INFO_TYPE_RM_ENGINE_TYPE,
999 (NvU32)rmEngineType,
1000 ENGINE_INFO_TYPE_RUNLIST,
1001 &srcRunlist));
1002
1003 NV_ASSERT_OK_OR_RETURN(kfifoGetEnginePbdmaIds_HAL(pGpu, pKernelFifo,
1004 ENGINE_INFO_TYPE_RM_ENGINE_TYPE,
1005 (NvU32)rmEngineType,
1006 &pSrcPbdmaIds,
1007 &numSrcPbdmaIds));
1008
1009 pPartnerListParams->numPartners = 0;
1010
1011 // Get the PBDMA ID for the runqueue-th runqueue
1012 if (pPartnerListParams->runqueue >= numSrcPbdmaIds)
1013 {
1014 return NV_ERR_INVALID_ARGUMENT;
1015 }
1016 srcPbdmaId = pSrcPbdmaIds[pPartnerListParams->runqueue];
1017
1018 //
1019 // Find all engines sharing a runlist with the input engine, add each to
1020 // the output array.
1021 //
1022 for (i = 0; i < numEngines; i++)
1023 {
1024 NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
1025 ENGINE_INFO_TYPE_INVALID, i,
1026 ENGINE_INFO_TYPE_ENG_DESC, &engDesc));
1027
1028 NV_ASSERT_OK_OR_RETURN(gpuGetClassList(pGpu, &numClasses, NULL, engDesc));
1029 if (numClasses == 0)
1030 {
1031 NV_PRINTF(LEVEL_INFO,
1032 "EngineID %x is not part classDB, skipping\n",
1033 engDesc);
1034 continue;
1035 }
1036
1037 NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
1038 ENGINE_INFO_TYPE_INVALID, i,
1039 ENGINE_INFO_TYPE_RUNLIST, &runlist));
1040
1041 if (runlist == srcRunlist)
1042 {
1043 NvU32 j;
1044 RM_ENGINE_TYPE localRmEngineType;
1045
1046 NV_ASSERT_OK_OR_RETURN(kfifoGetEnginePbdmaIds_HAL(pGpu, pKernelFifo,
1047 ENGINE_INFO_TYPE_INVALID, i,
1048 &pPbdmaIds, &numPbdmaIds));
1049
1050 for (j = 0; j < numPbdmaIds; j++)
1051 {
1052 if (pPbdmaIds[j] == srcPbdmaId)
1053 {
1054 NV_ASSERT_OK_OR_RETURN(kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
1055 ENGINE_INFO_TYPE_INVALID, i,
1056 ENGINE_INFO_TYPE_RM_ENGINE_TYPE, (NvU32 *)&localRmEngineType));
1057
1058 // Don't include input in output list
1059 if (localRmEngineType != rmEngineType)
1060 {
1061 pPartnerListParams->partnerList[pPartnerListParams->numPartners++] =
1062 gpuGetNv2080EngineType(localRmEngineType);
1063
1064 if (pPartnerListParams->numPartners >= NV2080_CTRL_GPU_MAX_ENGINE_PARTNERS)
1065 return NV_ERR_INVALID_ARGUMENT;
1066 }
1067 }
1068 }
1069 }
1070 }
1071
1072 return NV_OK;
1073 }
1074
1075 /**
1076 * @brief Check if the runlist has TSG support
1077 *
1078 * Currently, we only enable the TSG runlist for GR
1079 *
1080 * @return NV_TRUE if TSG is supported, NV_FALSE if not
1081 */
1082 NvBool
kfifoRunlistIsTsgHeaderSupported_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,NvU32 runlistId)1083 kfifoRunlistIsTsgHeaderSupported_GM107
1084 (
1085 OBJGPU *pGpu,
1086 KernelFifo *pKernelFifo,
1087 NvU32 runlistId
1088 )
1089 {
1090 NvU32 tmp_runlist;
1091
1092 if (kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo, ENGINE_INFO_TYPE_ENG_DESC,
1093 ENG_GR(0), ENGINE_INFO_TYPE_RUNLIST, &tmp_runlist) != NV_OK)
1094 {
1095 NV_PRINTF(LEVEL_ERROR,
1096 "can't find runlist ID for engine ENG_GR(0)!\n");
1097 NV_ASSERT(0);
1098 return NV_FALSE;
1099 }
1100
1101 return tmp_runlist == runlistId;
1102 }
1103
1104 /**
1105 * @brief Get the runlist entry size
1106 *
1107 * @param pKernelFifo
1108 *
1109 * @return size in bytes
1110 */
1111 NvU32
kfifoRunlistGetEntrySize_GM107(KernelFifo * pKernelFifo)1112 kfifoRunlistGetEntrySize_GM107
1113 (
1114 KernelFifo *pKernelFifo
1115 )
1116 {
1117 return NV_RAMRL_ENTRY_SIZE;
1118 }
1119
1120 /**
1121 * @brief Get the runlist base shift amount
1122 *
1123 * @param pKernelFifo
1124 *
1125 * @return shift amount
1126 */
1127 NvU32
kfifoRunlistGetBaseShift_GM107(KernelFifo * pKernelFifo)1128 kfifoRunlistGetBaseShift_GM107
1129 (
1130 KernelFifo *pKernelFifo
1131 )
1132 {
1133 return NV_RAMRL_BASE_SHIFT;
1134 }
1135
1136 /**
1137 * @brief Pre-allocate BAR1 userd space
1138 *
1139 * @param pGpu
1140 * @param pKernelFifo
1141 *
1142 * @returns NV_STATUS
1143 */
1144 NV_STATUS
kfifoPreAllocUserD_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo)1145 kfifoPreAllocUserD_GM107
1146 (
1147 OBJGPU *pGpu,
1148 KernelFifo *pKernelFifo
1149 )
1150 {
1151 OBJGPU *pParentGpu = gpumgrGetParentGPU(pGpu);
1152 KernelFifo *pParentKernelFifo = GPU_GET_KERNEL_FIFO(pParentGpu);
1153 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1154 NvBool bCoherentCpuMapping = NV_FALSE;
1155 NV_STATUS status = NV_OK;
1156 NvU64 temp = 0;
1157 NvU32 userdSize;
1158 NvU32 userdShift;
1159 NvU32 numChannels;
1160 NvBool bFifoFirstInit;
1161 NvU32 flags = MEMDESC_FLAGS_NONE;
1162 NvU32 mapFlags = BUS_MAP_FB_FLAGS_MAP_DOWNWARDS |
1163 BUS_MAP_FB_FLAGS_MAP_UNICAST;
1164 NvU32 currentGpuInst = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
1165 CHID_MGR *pChidMgr = kfifoGetChidMgr(pGpu, pKernelFifo, 0);
1166
1167 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
1168 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
1169 PREALLOCATED_USERD_INFO *pUserdInfo = &pParentKernelFifo->userdInfo;
1170
1171 NV_ASSERT_OR_RETURN(!gpumgrGetBcEnabledStatus(pGpu), NV_ERR_INVALID_STATE);
1172
1173 // We don't support RM allocated USERD for vGPU guest with SRIOV
1174 if (IS_VIRTUAL_WITH_SRIOV(pGpu))
1175 {
1176 return NV_OK;
1177 }
1178
1179 bCoherentCpuMapping = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING);
1180
1181 if (pUserdInfo->userdBar1CpuPtr == NULL)
1182 {
1183 bFifoFirstInit = NV_TRUE;
1184 }
1185 else
1186 {
1187 mapFlags |= BUS_MAP_FB_FLAGS_MAP_OFFSET_FIXED;
1188 bFifoFirstInit = NV_FALSE;
1189 }
1190
1191 //
1192 // Allocate the physical memory associated with the UserD if this is
1193 // the first GPU to init fifo. This relies on the assumption that
1194 // UserD is shared physmem.
1195 //
1196 if (bFifoFirstInit)
1197 {
1198 pUserdInfo->userdBar1MapStartOffset = 0;
1199 pUserdInfo->userdBar1MapSize = 0;
1200
1201 // This is a WAR for HW bug 600241
1202 if (pUserdInfo->userdAperture == ADDR_SYSMEM)
1203 {
1204 pKernelFifo->bUserdInSystemMemory = NV_TRUE;
1205 }
1206 }
1207
1208 kfifoGetUserdSizeAlign_HAL(pKernelFifo, &userdSize, &userdShift);
1209
1210 numChannels = kfifoChidMgrGetNumChannels(pGpu, pKernelFifo, pChidMgr);
1211
1212 // Alloc USERD of size numChannels * sizeof( USERD ) for each gpu
1213 status = memdescCreate(&pUserdInfo->userdPhysDesc[currentGpuInst], pGpu,
1214 userdSize * numChannels,
1215 1ULL << userdShift,
1216 NV_TRUE,
1217 pUserdInfo->userdAperture,
1218 pUserdInfo->userdAttr,
1219 flags);
1220 if (status != NV_OK)
1221 {
1222 NV_PRINTF(LEVEL_ERROR,
1223 "Could not memdescCreate for USERD for %x #channels\n",
1224 numChannels);
1225 DBG_BREAKPOINT();
1226 goto fail;
1227 }
1228 temp = pUserdInfo->userdPhysDesc[currentGpuInst]->Size;
1229
1230 //
1231 // For vGPU, do not allocate USERD memory in guest.
1232 // vGPU does all HW management in host, so host RM will
1233 // allocate the real USERD memory.
1234 //
1235 if (IS_VIRTUAL(pGpu))
1236 {
1237 // Force page size to 4KB to match host phys access
1238 memmgrSetMemDescPageSize_HAL(pGpu, pMemoryManager,
1239 pUserdInfo->userdPhysDesc[currentGpuInst],
1240 AT_GPU, RM_ATTR_PAGE_SIZE_4KB);
1241 if (bFifoFirstInit)
1242 {
1243 pUserdInfo->userdBar1MapStartOffset = kfifoGetUserdBar1MapStartOffset_HAL(pGpu, pKernelFifo);
1244 }
1245 }
1246 else
1247 {
1248 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_81,
1249 pUserdInfo->userdPhysDesc[currentGpuInst]);
1250 if (status != NV_OK)
1251 {
1252 NV_PRINTF(LEVEL_ERROR,
1253 "Could not allocate USERD for %x #channels\n",
1254 numChannels);
1255 DBG_BREAKPOINT();
1256 goto fail;
1257 }
1258
1259 // Force page size to 4KB in broadcast to match host phys access
1260 memmgrSetMemDescPageSize_HAL(pGpu, pMemoryManager, pUserdInfo->userdPhysDesc[currentGpuInst],
1261 AT_GPU, RM_ATTR_PAGE_SIZE_4KB);
1262
1263 //
1264 // If coherent link is available, just get a coherent mapping to USERD and
1265 // lie about the BAR1 offset, since we are not using BAR1
1266 // TODO: Make these bar1 offsets unicast on each gpu as well
1267 //
1268 if (bCoherentCpuMapping &&
1269 (memdescGetAddressSpace(pUserdInfo->userdPhysDesc[currentGpuInst]) == ADDR_FBMEM))
1270 {
1271
1272 NV_PRINTF(LEVEL_INFO, "Mapping USERD with coherent link (USERD in FBMEM).\n");
1273 NV_ASSERT(pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED));
1274 NV_ASSERT(pUserdInfo->userdPhysDesc[currentGpuInst]->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS);
1275
1276 if (bFifoFirstInit)
1277 {
1278 pUserdInfo->userdBar1MapStartOffset = pUserdInfo->userdPhysDesc[currentGpuInst]->_pteArray[0] +
1279 pUserdInfo->userdPhysDesc[currentGpuInst]->PteAdjust;
1280 }
1281 }
1282 //
1283 // get sysmem mapping for USERD if USERD is in sysmem and reflected BAR access is not allowed
1284 //
1285 else if ((bCoherentCpuMapping &&
1286 memdescGetAddressSpace(pUserdInfo->userdPhysDesc[currentGpuInst]) == ADDR_SYSMEM &&
1287 !kbusIsReflectedMappingAccessAllowed(pKernelBus)) ||
1288 pGpu->getProperty(pGpu, PDB_PROP_GPU_BAR1_BAR2_DISABLED))
1289 {
1290 NV_PRINTF(LEVEL_INFO, "Mapping USERD with coherent link (USERD in SYSMEM).\n");
1291
1292 if (bFifoFirstInit)
1293 {
1294 pUserdInfo->userdBar1MapStartOffset =
1295 memdescGetPhysAddr(pUserdInfo->userdPhysDesc[currentGpuInst], AT_CPU, 0);
1296 }
1297 }
1298 else
1299 {
1300 // vGpu may boot with partitioning enabled but that's not true for host RM
1301 if ((pKernelMIGManager != NULL) && kmigmgrIsMIGMemPartitioningEnabled(pGpu, pKernelMIGManager))
1302 {
1303 status = NV_ERR_INVALID_STATE;
1304 NV_PRINTF(LEVEL_ERROR, "Pre-allocated USERD is not supported with MIG\n");
1305 DBG_BREAKPOINT();
1306 goto fail;
1307 }
1308 // Now BAR1 map it
1309 status = kbusMapFbAperture_HAL(pGpu, pKernelBus, pUserdInfo->userdPhysDesc[currentGpuInst], 0,
1310 &pUserdInfo->userdBar1MapStartOffset,
1311 &temp, mapFlags | BUS_MAP_FB_FLAGS_PRE_INIT, NULL);
1312
1313 if (status != NV_OK)
1314 {
1315 NV_PRINTF(LEVEL_ERROR, "Could not map USERD to BAR1\n");
1316 DBG_BREAKPOINT();
1317 goto fail;
1318 }
1319
1320 // Add current GPU to list of GPUs referencing pFifo userD bar1
1321 pUserdInfo->userdBar1RefMask |= NVBIT(pGpu->gpuInstance);
1322 }
1323 }
1324
1325 if (bFifoFirstInit)
1326 {
1327 pUserdInfo->userdBar1MapSize = NvU64_LO32(temp);
1328
1329 if (bCoherentCpuMapping &&
1330 (memdescGetAddressSpace(pUserdInfo->userdPhysDesc[currentGpuInst]) == ADDR_FBMEM))
1331 {
1332 pUserdInfo->userdBar1CpuPtr = kbusMapCoherentCpuMapping_HAL(pGpu, pKernelBus,
1333 pUserdInfo->userdPhysDesc[currentGpuInst]);
1334 status = pUserdInfo->userdBar1CpuPtr == NULL ? NV_ERR_GENERIC : NV_OK;
1335 }
1336 else if ((bCoherentCpuMapping &&
1337 memdescGetAddressSpace(pUserdInfo->userdPhysDesc[currentGpuInst]) == ADDR_SYSMEM &&
1338 !kbusIsReflectedMappingAccessAllowed(pKernelBus)) ||
1339 pGpu->getProperty(pGpu, PDB_PROP_GPU_BAR1_BAR2_DISABLED))
1340 {
1341 status = osMapPciMemoryKernelOld(pGpu,
1342 pUserdInfo->userdBar1MapStartOffset,
1343 pUserdInfo->userdBar1MapSize,
1344 NV_PROTECT_READ_WRITE,
1345 (void**)&pUserdInfo->userdBar1CpuPtr,
1346 NV_MEMORY_UNCACHED);
1347 }
1348 else
1349 {
1350 // Cpu map the BAR1 snoop range
1351 status = osMapPciMemoryKernelOld(pGpu, gpumgrGetGpuPhysFbAddr(pGpu) +
1352 pUserdInfo->userdBar1MapStartOffset,
1353 pUserdInfo->userdBar1MapSize,
1354 NV_PROTECT_READ_WRITE,
1355 (void**)&pUserdInfo->userdBar1CpuPtr,
1356 NV_MEMORY_UNCACHED);
1357 }
1358
1359 if ((pUserdInfo->userdBar1CpuPtr == NULL) && (status != NV_OK))
1360 {
1361 NV_PRINTF(LEVEL_ERROR, "Could not cpu map BAR1 snoop range\n");
1362 DBG_BREAKPOINT();
1363 goto fail;
1364 }
1365 }
1366
1367 NV_PRINTF(LEVEL_INFO,
1368 "USERD Preallocated phys @ 0x%llx bar1 offset @ 0x%llx of size 0x%x\n",
1369 memdescGetPhysAddr(pUserdInfo->userdPhysDesc[currentGpuInst], AT_GPU, 0),
1370 pUserdInfo->userdBar1MapStartOffset,
1371 pUserdInfo->userdBar1MapSize);
1372
1373 return status;
1374
1375 fail:
1376 kfifoFreePreAllocUserD_HAL(pGpu, pKernelFifo);
1377
1378 return status;
1379 }
1380
1381 /**
1382 * @brief Free the pre-allocated BAR1 userd space
1383 *
1384 * @param pGpu
1385 * @param pKernelFifo
1386 *
1387 * @returns NV_STATUS
1388 */
1389 void
kfifoFreePreAllocUserD_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo)1390 kfifoFreePreAllocUserD_GM107
1391 (
1392 OBJGPU *pGpu,
1393 KernelFifo *pKernelFifo
1394 )
1395 {
1396 OBJGPU *pParentGpu = gpumgrGetParentGPU(pGpu);
1397 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1398 NvU32 currentGpuInst = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
1399 KernelFifo *pParentKernelFifo = GPU_GET_KERNEL_FIFO(pParentGpu);
1400 PREALLOCATED_USERD_INFO *pUserdInfo = &pParentKernelFifo->userdInfo;
1401 NvBool bCoherentCpuMapping = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING) &&
1402 (memdescGetAddressSpace(pUserdInfo->userdPhysDesc[currentGpuInst]) == ADDR_FBMEM);
1403
1404 // We don't support RM allocated USERD for vGPU guest with SRIOV
1405 if (IS_VIRTUAL_WITH_SRIOV(pGpu))
1406 {
1407 return;
1408 }
1409
1410 if (gpumgrGetBcEnabledStatus(pGpu))
1411 {
1412 DBG_BREAKPOINT();
1413 }
1414
1415 if (bCoherentCpuMapping)
1416 {
1417 NV_PRINTF(LEVEL_INFO, "Unmapping USERD from NVLINK.\n");
1418 NV_ASSERT(pGpu->getProperty(pGpu, PDB_PROP_GPU_ATS_SUPPORTED));
1419 }
1420
1421 if (pUserdInfo->userdBar1CpuPtr)
1422 {
1423 if (bCoherentCpuMapping)
1424 {
1425 kbusUnmapCoherentCpuMapping_HAL(pGpu, pKernelBus,
1426 pUserdInfo->userdPhysDesc[currentGpuInst]);
1427 }
1428 else
1429 {
1430 osUnmapPciMemoryKernelOld(pGpu, pUserdInfo->userdBar1CpuPtr);
1431 }
1432
1433 pUserdInfo->userdBar1CpuPtr = NULL;
1434 }
1435
1436 if (pUserdInfo->userdBar1MapSize)
1437 {
1438 if ((!IS_VIRTUAL(pGpu)) && (!bCoherentCpuMapping))
1439 {
1440 if ((pUserdInfo->userdBar1RefMask & NVBIT(pGpu->gpuInstance)) != 0)
1441 {
1442 //
1443 // Unmap in UC for each GPU with a pKernelFifo userd
1444 // reference mapped through bar1
1445 //
1446 kbusUnmapFbAperture_HAL(pGpu, pKernelBus,
1447 pUserdInfo->userdPhysDesc[currentGpuInst],
1448 pUserdInfo->userdBar1MapStartOffset,
1449 pUserdInfo->userdBar1MapSize,
1450 BUS_MAP_FB_FLAGS_MAP_UNICAST | BUS_MAP_FB_FLAGS_PRE_INIT);
1451 pUserdInfo->userdBar1RefMask &= (~NVBIT(pGpu->gpuInstance));
1452 }
1453
1454 }
1455 }
1456
1457 // Unallocated memdescFrees are allowed.
1458 memdescFree(pUserdInfo->userdPhysDesc[currentGpuInst]);
1459 memdescDestroy(pUserdInfo->userdPhysDesc[currentGpuInst]);
1460 pUserdInfo->userdPhysDesc[currentGpuInst] = NULL;
1461 NV_PRINTF(LEVEL_INFO, "Freeing preallocated USERD phys and bar1 range\n");
1462 }
1463
1464 //
1465 // Returns the BAR1 offset and size of the entire USERD mapping.
1466 //
1467 NV_STATUS
kfifoGetUserdBar1MapInfo_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,NvU64 * pBar1MapOffset,NvU32 * pBar1MapSize)1468 kfifoGetUserdBar1MapInfo_GM107
1469 (
1470 OBJGPU *pGpu,
1471 KernelFifo *pKernelFifo,
1472 NvU64 *pBar1MapOffset,
1473 NvU32 *pBar1MapSize
1474 )
1475 {
1476 const PREALLOCATED_USERD_INFO *pUserdInfo = kfifoGetPreallocatedUserdInfo(pKernelFifo);
1477
1478 // We don't support RM allocated USERD in vGPU guest with SRIOV
1479 if (IS_VIRTUAL_WITH_SRIOV(pGpu))
1480 {
1481 *pBar1MapOffset = 0;
1482 *pBar1MapSize = 0;
1483
1484 return NV_OK;
1485 }
1486
1487 if (pUserdInfo->userdBar1MapSize == 0 )
1488 {
1489 NV_PRINTF(LEVEL_ERROR, "BAR1 map of USERD has not been setup yet\n");
1490 NV_ASSERT( 0 );
1491 return NV_ERR_GENERIC;
1492 }
1493
1494 *pBar1MapOffset = pUserdInfo->userdBar1MapStartOffset;
1495 *pBar1MapSize = pUserdInfo->userdBar1MapSize;
1496
1497 return NV_OK;
1498 }
1499
1500 /**
1501 * @brief Determines the aperture and attribute of memory where userd is located.
1502 *
1503 * @param pKernelFifo[in]
1504 * @param pUserdAperture[out]
1505 * @param pUserdAttribute[out]
1506 *
1507 * @returns NV_STATUS
1508 */
1509 NV_STATUS
kfifoGetUserdLocation_GM107(KernelFifo * pKernelFifo,NvU32 * pUserdAperture,NvU32 * pUserdAttribute)1510 kfifoGetUserdLocation_GM107
1511 (
1512 KernelFifo *pKernelFifo,
1513 NvU32 *pUserdAperture,
1514 NvU32 *pUserdAttribute
1515 )
1516 {
1517 const PREALLOCATED_USERD_INFO *pUserdInfo = kfifoGetPreallocatedUserdInfo(pKernelFifo);
1518
1519 NV_ASSERT_OR_RETURN(pUserdAperture != NULL && pUserdAttribute != NULL,
1520 NV_ERR_INVALID_POINTER);
1521
1522 *pUserdAperture = pUserdInfo->userdAperture;
1523 *pUserdAttribute = pUserdInfo->userdAttr;
1524
1525 return NV_OK;
1526 }
1527
1528 /**
1529 * @brief Returns size/address shift for USERD's BAR1 mapping
1530 *
1531 * @param pKernelFifo
1532 * @param[out] pSize populated with USERD size if non-null
1533 * @param[out] pAddrShift populated with USERD address shift if non-null
1534 */
1535 void
kfifoGetUserdSizeAlign_GM107(KernelFifo * pKernelFifo,NvU32 * pSize,NvU32 * pAddrShift)1536 kfifoGetUserdSizeAlign_GM107
1537 (
1538 KernelFifo *pKernelFifo,
1539 NvU32 *pSize,
1540 NvU32 *pAddrShift
1541 )
1542 {
1543 if (pSize != NULL)
1544 *pSize = 1<<NV_RAMUSERD_BASE_SHIFT;
1545 if (pAddrShift != NULL)
1546 *pAddrShift = NV_RAMUSERD_BASE_SHIFT;
1547 }
1548
1549 /**
1550 * @brief Determines if an engine is a host engine and if so, if it is present.
1551 *
1552 * @param pGpu
1553 * @param pKernelFifo
1554 * @param[in] engDesc
1555 * @param[out] pPresent NV_TRUE if the engine is present, NV_FALSE if not.
1556 *
1557 * @return OK if host could determine the engine's presence. ERROR otherwise
1558 */
1559 NV_STATUS
kfifoCheckEngine_GM107(OBJGPU * pGpu,KernelFifo * pKernelFifo,NvU32 engDesc,NvBool * pPresent)1560 kfifoCheckEngine_GM107
1561 (
1562 OBJGPU *pGpu,
1563 KernelFifo *pKernelFifo,
1564 NvU32 engDesc,
1565 NvBool *pPresent
1566 )
1567 {
1568 NvU32 bEschedDriven = NV_FALSE;
1569 NV_STATUS status;
1570
1571 status = kfifoEngineInfoXlate_HAL(pGpu, pKernelFifo,
1572 ENGINE_INFO_TYPE_ENG_DESC, engDesc,
1573 ENGINE_INFO_TYPE_IS_HOST_DRIVEN_ENGINE, &bEschedDriven);
1574
1575 *pPresent = (status == NV_OK) && bEschedDriven;
1576
1577 return NV_OK;
1578 }
1579