1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "core/core.h"
25 #include "gpu/gpu.h"
26 #include "mem_mgr/vaspace.h"
27 #include "mem_mgr/io_vaspace.h"
28 #include "mem_mgr/gpu_vaspace.h"
29 #include "gpu/mmu/kern_gmmu.h"
30 #include "gpu/bus/kern_bus.h"
31 #include "kernel/gpu/mem_mgr/mem_mgr.h"
32 #include "kernel/gpu/mem_sys/kern_mem_sys.h"
33 #include "kernel/gpu/nvbitmask.h"
34 #include "platform/chipset/chipset.h"
35 #include "rmapi/client.h"
36 #include "nvdevid.h"
37 
38 #include "gpu/subdevice/subdevice.h"
39 #include "gpu/gsp/gsp_static_config.h"
40 #include "vgpu/rpc.h"
41 
42 #include "nvRmReg.h"
43 
44 static NV_STATUS kbusInitRegistryOverrides(OBJGPU *pGpu, KernelBus *pKernelBus);
45 
46 NV_STATUS
47 kbusConstructEngine_IMPL(OBJGPU *pGpu, KernelBus *pKernelBus, ENGDESCRIPTOR engDesc)
48 {
49     NV_STATUS  status;
50 
51     if (IsAMPEREorBetter(pGpu) && pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_ALL_INST_IN_SYSMEM))
52     {
53         pKernelBus->bBar1PhysicalModeEnabled = NV_TRUE;
54     }
55 
56     if (IS_VIRTUAL_WITH_SRIOV(pGpu))
57     {
58         pKernelBus->bUsePhysicalBar2InitPagetable = NV_TRUE;
59     }
60 
61     // allocate HAL private info block
62     status = kbusConstructHal_HAL(pGpu, pKernelBus);
63     if (status != NV_OK)
64         return status;
65 
66     kbusInitRegistryOverrides(pGpu, pKernelBus);
67 
68     kbusInitPciBars_HAL(pKernelBus);
69 
70     // Special handle for VGPU.  WAR for bug 3458057, bug 3458029
71     if (IS_VIRTUAL(pGpu))
72     {
73         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
74             kbusInitBarsSize_HAL(pGpu, pKernelBus));
75     }
76 
77     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
78             kbusInitBarsBaseInfo_HAL(pKernelBus));
79 
80     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
81             kbusSetBarsApertureSize_HAL(pGpu, pKernelBus, GPU_GFID_PF));
82 
83     return NV_OK;
84 }
85 
86 /*!
87  * @brief Initialize all registry overrides for this object
88  *
89  * @param[in]      pGpu
90  * @param[in,out]  pKernelBus
91  */
92 static NV_STATUS
93 kbusInitRegistryOverrides(OBJGPU *pGpu, KernelBus *pKernelBus)
94 {
95     NvU32 data32;
96 
97     switch (DRF_VAL(_REG_STR_RM, _INST_LOC, _BAR_PTE, pGpu->instLocOverrides))
98     {
99         default:
100         case NV_REG_STR_RM_INST_LOC_BAR_PTE_DEFAULT:
101             // Do not override on default..
102             break;
103         case NV_REG_STR_RM_INST_LOC_BAR_PTE_VID:
104             pKernelBus->PTEBAR2Aperture = ADDR_FBMEM;
105             pKernelBus->PTEBAR2Attr = NV_MEMORY_WRITECOMBINED;
106             break;
107 
108         case NV_REG_STR_RM_INST_LOC_BAR_PTE_COH:
109             if (gpuIsBarPteInSysmemSupported(pGpu) || !gpuIsRegUsesGlobalSurfaceOverridesEnabled(pGpu))
110             {
111                 pKernelBus->PTEBAR2Aperture = ADDR_SYSMEM;
112                 pKernelBus->PTEBAR2Attr = NV_MEMORY_CACHED;
113             }
114             else
115             {
116                 //
117                 // BAR PTEs in sysmem is not supported on all hardware.
118                 // HW bug 415430. Once fixed, this property will be set on supported GPUs.
119                 // On unsupported GPUs where the GlobalSurfaceOverrides regkey is used, show a warning and don't override.
120                 //
121                 NV_PRINTF(LEVEL_WARNING,
122                           "BAR PTEs not supported in sysmem. Ignoring global override request.\n");
123             }
124             break;
125 
126         case NV_REG_STR_RM_INST_LOC_BAR_PTE_NCOH:
127             if (gpuIsBarPteInSysmemSupported(pGpu) || !gpuIsRegUsesGlobalSurfaceOverridesEnabled(pGpu))
128             {
129                 pKernelBus->PTEBAR2Aperture = ADDR_SYSMEM;
130                 pKernelBus->PTEBAR2Attr = NV_MEMORY_UNCACHED;
131             }
132             else
133             {
134                 // BAR PTEs in sysmem is not supported on current hardware. See above.
135                 NV_PRINTF(LEVEL_WARNING,
136                           "BAR PTEs not supported in sysmem. Ignoring global override request.\n");
137             }
138             break;
139     }
140 
141     NV_PRINTF(LEVEL_INFO, "Using aperture %d for BAR2 PTEs\n",
142               pKernelBus->PTEBAR2Aperture);
143 
144     switch (DRF_VAL(_REG_STR_RM, _INST_LOC, _BAR_PDE, pGpu->instLocOverrides))
145     {
146         default:
147         case NV_REG_STR_RM_INST_LOC_BAR_PDE_DEFAULT:
148             // Do not override on default.
149             break;
150         case NV_REG_STR_RM_INST_LOC_BAR_PDE_VID:
151             pKernelBus->PDEBAR2Aperture = ADDR_FBMEM;
152             pKernelBus->PDEBAR2Attr = NV_MEMORY_WRITECOMBINED;
153             break;
154 
155         case NV_REG_STR_RM_INST_LOC_BAR_PDE_COH:
156             if (gpuIsBarPteInSysmemSupported(pGpu) || !gpuIsRegUsesGlobalSurfaceOverridesEnabled(pGpu))
157             {
158                 pKernelBus->PDEBAR2Aperture = ADDR_SYSMEM;
159                 pKernelBus->PDEBAR2Attr = NV_MEMORY_CACHED;
160             }
161             else
162             {
163                 // BAR PDEs in sysmem is not supported on all hardware. See above.
164                 NV_PRINTF(LEVEL_WARNING,
165                           "BAR PDEs not supported in sysmem. Ignoring global override request.\n");
166             }
167             break;
168 
169         case NV_REG_STR_RM_INST_LOC_BAR_PDE_NCOH:
170             if (gpuIsBarPteInSysmemSupported(pGpu) || !gpuIsRegUsesGlobalSurfaceOverridesEnabled(pGpu))
171             {
172                 pKernelBus->PDEBAR2Aperture = ADDR_SYSMEM;
173                 pKernelBus->PDEBAR2Attr = NV_MEMORY_UNCACHED;
174             }
175             else
176             {
177                 // BAR PDEs in sysmem is not supported on all hardware. See above.
178                 NV_PRINTF(LEVEL_WARNING,
179                           "BAR PDEs not supported in sysmem. Ignoring global override request.\n");
180             }
181             break;
182     }
183 
184     if (RMCFG_FEATURE_PLATFORM_WINDOWS_LDDM && !pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_TCC_MODE))
185     {
186         //
187         // Aligns to unlinked SLI: Volta and up
188         // Next: Plan for all GPUs after validation
189         //
190         pKernelBus->bP2pMailboxClientAllocated =
191             pKernelBus->bP2pMailboxClientAllocatedBug3466714VoltaAndUp;
192     }
193 
194     if (osReadRegistryDword(pGpu, NV_REG_STR_P2P_MAILBOX_CLIENT_ALLOCATED, &data32) == NV_OK)
195     {
196         pKernelBus->bP2pMailboxClientAllocated = !!data32;
197     }
198 
199     if (osReadRegistryDword(pGpu, NV_REG_STR_RESTORE_BAR1_SIZE_BUG_3249028_WAR, &data32) == NV_OK)
200     {
201         pKernelBus->setProperty(pKernelBus, PDB_PROP_KBUS_RESTORE_BAR1_SIZE_BUG_3249028_WAR, !!data32);
202     }
203 
204     return NV_OK;
205 }
206 
207 /**
208  * @brief  Gets the BAR1 VA range for a device
209  *
210  * @param[in] pGpu
211  * @param[in] pKernelBus
212  * @param[in] pDevice               Device pointer
213  * @param[out] pBar1VARange         BAR1 VA range
214  */
215 
216 NV_STATUS
217 kbusGetBar1VARangeForDevice_IMPL(OBJGPU *pGpu, KernelBus *pKernelBus, Device *pDevice, NV_RANGE *pBar1VARange)
218 {
219     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
220     OBJVASPACE       *pBar1VAS          = kbusGetBar1VASpace_HAL(pGpu, pKernelBus);
221 
222     NV_ASSERT_OR_RETURN(pBar1VAS != NULL, NV_ERR_INVALID_STATE);
223 
224    *pBar1VARange = rangeMake(vaspaceGetVaStart(pBar1VAS), vaspaceGetVaLimit(pBar1VAS));
225 
226     if ((pKernelMIGManager != NULL) && kmigmgrIsMIGMemPartitioningEnabled(pGpu, pKernelMIGManager) &&
227         !rmclientIsCapableByHandle(RES_GET_CLIENT_HANDLE(pDevice), NV_RM_CAP_SYS_SMC_MONITOR) &&
228         !kmigmgrIsDeviceUsingDeviceProfiling(pGpu, pKernelMIGManager, pDevice))
229     {
230         MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
231         KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
232         MIG_INSTANCE_REF ref;
233 
234        *pBar1VARange = memmgrGetMIGPartitionableBAR1Range(pGpu, pMemoryManager);
235 
236         NV_ASSERT_OK_OR_RETURN(kmigmgrGetInstanceRefFromDevice(pGpu, pKernelMIGManager,
237                                    pDevice, &ref));
238         NV_ASSERT_OK_OR_RETURN(kmemsysSwizzIdToMIGMemRange(pGpu, pKernelMemorySystem, ref.pKernelMIGGpuInstance->swizzId,
239                                    *pBar1VARange, pBar1VARange));
240     }
241     return NV_OK;
242 }
243 
244 RmPhysAddr
245 kbusSetupPeerBarAccess_IMPL
246 (
247     OBJGPU *pLocalGpu,
248     OBJGPU *pRemoteGpu,
249     RmPhysAddr base,
250     NvU64 size,
251     PMEMORY_DESCRIPTOR *ppMemDesc
252 )
253 {
254     NV_STATUS          status;
255     MEMORY_DESCRIPTOR *pMemDesc = *ppMemDesc;
256     IOVAMAPPING       *pIovaMapping;
257 
258     NV_ASSERT_OR_RETURN(((base & RM_PAGE_MASK) == 0), ~0ULL);
259 
260     if (pMemDesc == NULL)
261     {
262         status = memdescCreate(&pMemDesc, pLocalGpu, size, 0, NV_TRUE,
263                                ADDR_SYSMEM, NV_MEMORY_UNCACHED,
264                                MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE);
265         NV_ASSERT_OR_RETURN(status == NV_OK, ~0ULL);
266 
267         memdescDescribe(pMemDesc, ADDR_SYSMEM, base, size);
268     }
269     else
270     {
271         NV_ASSERT_OR_RETURN(
272             (memdescGetPhysAddr(pMemDesc, AT_GPU, 0) == base) &&
273             (memdescGetSize(pMemDesc) == size), ~0ULL);
274     }
275 
276     //
277     // Even if IOMMU-remapping fails (which it shouldn't), try to continue
278     // using the CPU physical address. In most cases, this is still sufficient.
279     //
280     status = memdescMapIommu(pMemDesc, pRemoteGpu->busInfo.iovaspaceId);
281     NV_ASSERT(status == NV_OK);
282 
283     pIovaMapping = memdescGetIommuMap(pMemDesc, pRemoteGpu->busInfo.iovaspaceId);
284 
285     *ppMemDesc = pMemDesc;
286 
287     if (pIovaMapping == NULL)
288     {
289         NV_PRINTF(LEVEL_INFO,
290                   "no IOVA mapping found for pre-existing P2P domain memdesc\n");
291         return memdescGetPhysAddr(pMemDesc, AT_GPU, 0);
292     }
293 
294     return pIovaMapping->iovaArray[0];
295 }
296 
297 /*!
298  *  @brief Get the bus flush aperture flag for the NV_ADDRESS_SPACE
299  *         For use with the kbusFlush_HAL() api
300  *
301  *  @param[in]  addrSpace         NV_ADDRESS_SPACE
302  *
303  *  @returns bush flush aperture flag
304  */
305 NvU32 kbusGetFlushAperture_IMPL(KernelBus *pKernelBus, NV_ADDRESS_SPACE addrSpace)
306 {
307     return (addrSpace == ADDR_FBMEM) ? BUS_FLUSH_VIDEO_MEMORY : BUS_FLUSH_SYSTEM_MEMORY;
308 }
309 
310 void
311 kbusDestruct_IMPL(KernelBus *pKernelBus)
312 {
313     OBJGPU *pGpu = ENG_GET_GPU(pKernelBus);
314 
315     //
316     // We need to clean-up the memory resources for BAR2 as late as possible,
317     // and after all memory descriptors have been reclaimed.
318     //
319     kbusDestructVirtualBar2_HAL(pGpu, pKernelBus, NV_TRUE, GPU_GFID_PF);
320 
321     return;
322 }
323 
324 /*! Send sysmembar to all sub-devices */
325 NV_STATUS
326 kbusSendSysmembar_IMPL
327 (
328     OBJGPU      *pGpu,
329     KernelBus   *pKernelBus
330 )
331 {
332     NV_STATUS   status  = NV_OK;
333 
334     // Nothing to be done in guest in the paravirtualization case.
335     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu))
336     {
337         return NV_OK;
338     }
339 
340     // Wait for the flush to flow through
341     SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY);
342         pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
343         if (kbusSendSysmembarSingle_HAL(pGpu, pKernelBus) == NV_ERR_TIMEOUT)
344         {
345             status = NV_ERR_TIMEOUT;
346         }
347     SLI_LOOP_END;
348     pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
349 
350     return status;
351 }
352 
353 /**
354  * @brief Send sysmembar to a single sub-devices
355  *        Trigger RPC to Physical RM.
356  *
357  * @param[in] pGpu
358  * @param[in] pKernelBus
359  */
360 NV_STATUS
361 kbusSendSysmembarSingle_KERNEL
362 (
363     OBJGPU    *pGpu,
364     KernelBus *pKernelBus
365 )
366 {
367     RM_API    *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
368     NV_STATUS  status;
369 
370     status = pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice,
371                                 NV2080_CTRL_CMD_INTERNAL_BUS_FLUSH_WITH_SYSMEMBAR,
372                                 NULL, 0);
373 
374     return status;
375 }
376 
377 /*!
378  * @brief Commit BAR2
379  *
380  * @param[in] pGpu
381  * @param[in] pKernelBus
382  * @param[in] flags         GPU state flag (not used by Kernel RM)
383  *
384  * @returns NV_OK on success.
385  */
386 NV_STATUS
387 kbusCommitBar2_KERNEL
388 (
389     OBJGPU    *pGpu,
390     KernelBus *pKernelBus,
391     NvU32      flags
392 )
393 {
394     if (!KBUS_BAR0_PRAMIN_DISABLED(pGpu) &&
395         !kbusIsBarAccessBlocked(pKernelBus) &&
396         !(flags & GPU_STATE_FLAGS_GC6_TRANSITION))
397     {
398         // we will initialize bar2 to the default big page size of the system
399         NV_ASSERT_OK_OR_RETURN(kbusInitVirtualBar2_HAL(pGpu, pKernelBus));
400         NV_ASSERT_OK_OR_RETURN(kbusSetupCpuPointerForBusFlush_HAL(pGpu, pKernelBus));
401     }
402     return NV_OK;
403 }
404 
405 /*! Get pci bar size in BYTE */
406 NvU64
407 kbusGetPciBarSize_IMPL(KernelBus *pKernelBus, NvU32 index)
408 {
409     if (index >= pKernelBus->totalPciBars)
410     {
411         NV_PRINTF(LEVEL_ERROR, "bad index 0x%x\n", index);
412         return 0;
413     }
414 
415     return pKernelBus->pciBarSizes[index];
416 }
417 
418 RmPhysAddr
419 kbusGetPciBarOffset_IMPL(KernelBus *pKernelBus, NvU32 index)
420 {
421     RmPhysAddr offset = 0x0;
422 
423     if (index < pKernelBus->totalPciBars)
424     {
425         offset = pKernelBus->pciBars[index];
426     }
427     else
428     {
429         NV_PRINTF(LEVEL_ERROR, "bad index 0x%x\n", index);
430     }
431 
432     return offset;
433 }
434 
435 /**
436  * @brief Determine bBar1Force64KBMapping base on regkey and bar1 size
437  *   Determine if 64KB mappings need to be forced based on total BAR1 size.
438  *   Default threshold is 256MB unless overridden by regkey
439  *   Force 64KB for SKUs with BAR1 size <= 256MB
440  *
441  * @param[in] pKernelBus
442  */
443 void
444 kbusDetermineBar1Force64KBMapping_IMPL
445 (
446     KernelBus *pKernelBus
447 )
448 {
449     OBJGPU*   pGpu = ENG_GET_GPU(pKernelBus);
450     NvU32     data;
451 
452     pKernelBus->bBar1Force64KBMapping = NV_TRUE;
453 
454     if (osReadRegistryDword(pGpu, NV_REG_STR_RM_64KB_BAR1_MAPPINGS,
455                             &data) == NV_OK)
456     {
457         if (data == NV_REG_STR_RM_64KB_BAR1_MAPPINGS_DISABLED)
458         {
459             pKernelBus->bBar1Force64KBMapping = NV_FALSE;
460         }
461     }
462     else
463     {
464         NvU32   bar1SizeMB;
465         bar1SizeMB = (NvU32)(kbusGetPciBarSize(pKernelBus, 1) >> 20);
466 
467         if (bar1SizeMB > 256)
468         {
469             pKernelBus->bBar1Force64KBMapping = NV_FALSE;
470         }
471     }
472 }
473 
474 
475 /**
476  * @brief Determine bar1[gfid].apertureLength base on regkey and bar1 size
477  *
478  * @param[in] pKernelBus
479  * @param[in] gfid
480  */
481 void
482 kbusDetermineBar1ApertureLength_IMPL
483 (
484     KernelBus *pKernelBus,
485     NvU32      gfid
486 )
487 {
488     OBJGPU   *pGpu = ENG_GET_GPU(pKernelBus);
489     NvU32     data32;
490 
491     if (IS_GFID_VF(gfid))
492     {
493         pKernelBus->bar1[gfid].apertureLength = pGpu->sriovState.vfBarSize[1];
494     }
495     else
496     {
497         pKernelBus->bar1[gfid].apertureLength = kbusGetPciBarSize(pKernelBus, 1);
498     }
499 
500     // We can shrink BAR1 using this reg key but cannot grow it.
501     if (((NV_OK == osReadRegistryDword(pGpu,
502                         NV_REG_STR_RM_BAR1_APERTURE_SIZE_MB, &data32))) &&
503             data32 && (((NvU64)data32 << 20) < pKernelBus->bar1[gfid].apertureLength))
504     {
505         // Set BAR1 aperture length based on the override
506         pKernelBus->bar1[gfid].apertureLength = (NvU64) data32 << 20;
507     }
508 
509 }
510 
511 /*!
512  * @brief Initialize pciBarSizes[], set pKernelBus->bPciBarSizesValid
513  *        Trigger an internal RMAPI to get the data from Physical RM.
514  *
515  * @param[in] pGpu
516  * @param[in] pKernelBus
517  */
518 NV_STATUS
519 kbusInitBarsSize_KERNEL
520 (
521     OBJGPU    *pGpu,
522     KernelBus *pKernelBus
523 )
524 {
525     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
526     NV2080_CTRL_BUS_GET_PCI_BAR_INFO_PARAMS params;
527     NvU32 i;
528 
529     NV_ASSERT( ! pKernelBus->bPciBarSizesValid);
530 
531     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
532         pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice,
533                         NV2080_CTRL_CMD_BUS_GET_PCI_BAR_INFO,
534                         &params, sizeof(params)));
535 
536     for (i = 0; i< params.pciBarCount; i++)
537     {
538         pKernelBus->pciBarSizes[i] = params.pciBarInfo[i].barSizeBytes;
539     }
540 
541     pKernelBus->bPciBarSizesValid = NV_TRUE;
542 
543     return NV_OK;
544 }
545 
546 /*!
547  * @brief Remove P2P mapping to a given peer GPU
548  *
549  * @param[in]   pGpu0         (Local)
550  * @param[in]   pKernelBus0   (Local)
551  * @param[in]   pGpu1         (Remote)
552  * @param[in]   peerIdx
553  *
554  * return NV_OK on success
555  */
556 void
557 kbusDestroyMailbox_IMPL
558 (
559     OBJGPU      *pGpu0,
560     KernelBus   *pKernelBus0,
561     OBJGPU      *pGpu1,
562     NvU32        peerIdx
563 )
564 {
565     RM_API *pRmApi  = GPU_GET_PHYSICAL_RMAPI(pGpu0);
566     NvBool  bNeedWarBug999673 = kbusNeedWarForBug999673_HAL(pGpu0, pKernelBus0, pGpu1) ||
567                                 kbusNeedWarForBug999673_HAL(pGpu1, GPU_GET_KERNEL_BUS(pGpu1), pGpu0);
568     NV2080_CTRL_INTERNAL_BUS_DESTROY_P2P_MAILBOX_PARAMS busParams   = {0};
569     NV2080_CTRL_INTERNAL_HSHUB_PEER_CONN_CONFIG_PARAMS  hshubParams = {0};
570     NV_STATUS status;
571 
572     kbusDestroyPeerAccess_HAL(pGpu0, pKernelBus0, peerIdx);
573 
574     busParams.peerIdx           = peerIdx;
575     busParams.bNeedWarBug999673 = bNeedWarBug999673;
576     status = pRmApi->Control(pRmApi, pGpu0->hInternalClient, pGpu0->hInternalSubdevice,
577                              NV2080_CTRL_CMD_INTERNAL_BUS_DESTROY_P2P_MAILBOX,
578                              &busParams, sizeof(busParams));
579     NV_ASSERT(status == NV_OK);
580 
581     // Create a peer mask for each peer to program their respective peer_connection_cfg registers
582     hshubParams.invalidatePeerMask = NVBIT32(peerIdx);
583     // Program connection_cfg registers
584     status = pRmApi->Control(pRmApi, pGpu0->hInternalClient, pGpu0->hInternalSubdevice,
585                              NV2080_CTRL_CMD_INTERNAL_HSHUB_PEER_CONN_CONFIG,
586                              &hshubParams, sizeof(hshubParams));
587     NV_ASSERT(status == NV_OK);
588 }
589 
590 NvU8 *
591 kbusCpuOffsetInBar2WindowGet_IMPL
592 (
593     OBJGPU            *pGpu,
594     KernelBus         *pKernelBus,
595     MEMORY_DESCRIPTOR *pMemDesc
596 )
597 {
598     NV_ASSERT_OR_RETURN(NULL != pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping, NULL);
599     NV_ASSERT_OR_RETURN(ADDR_FBMEM == pMemDesc->_addressSpace, NULL);
600 
601     return (NvU8 *)(pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping +
602                     memdescGetPhysAddr(pMemDesc, AT_GPU, 0));
603 }
604 
605 /*!
606  * Calculates the BAR2 VA limit (in Byte units) which usually means the
607  * cpuVisible area limit in CPU-RM.  Can be safely called only after
608  * kbusSetBarsApertureSize_HAL is executed.
609  *
610  * @param   pGpu
611  * @param   pKernelBus
612  *
613  * @return VA limit of BAR2
614  */
615 NvU64
616 kbusGetVaLimitForBar2_KERNEL
617 (
618     OBJGPU    *pGpu,
619     KernelBus *pKernelBus
620 )
621 {
622     NvU64 limit = pKernelBus->bar2[GPU_GFID_PF].cpuVisibleLimit;
623 
624     NV_PRINTF(LEVEL_INFO, "va limit: 0x%llx\n", limit);
625 
626     //
627     // pKernelBus->bar2.vaLimit is set by this function.
628     // Assert to ensure that this value doesn't get changed.
629     //
630     NV_ASSERT(pKernelBus->bar2[GPU_GFID_PF].vaLimit == 0 || pKernelBus->bar2[GPU_GFID_PF].vaLimit == limit);
631 
632     return limit;
633 }
634 
635 /*!
636  * Patch CPU-RM's SW cache of BAR1 PDB to GSP-RM's BAR1 PDB so that CPU-RM can
637  * do TLB invalidation to correct VA space.
638  *
639  * @param   pGpu
640  * @param   pKernelBus
641  *
642  * @return NV_OK if PDB is updated successfully
643  *         Or bubble up the error code returned by the callees
644  */
645 NV_STATUS
646 kbusPatchBar1Pdb_GSPCLIENT
647 (
648     OBJGPU      *pGpu,
649     KernelBus   *pKernelBus
650 )
651 {
652     NV_STATUS            status    = NV_OK;
653     OBJGVASPACE         *pGVAS     = dynamicCast(pKernelBus->bar1[GPU_GFID_PF].pVAS, OBJGVASPACE);
654     KernelGmmu          *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
655     MEMORY_DESCRIPTOR   *pMemDesc  = NULL;
656     GVAS_GPU_STATE      *pGpuState = gvaspaceGetGpuState(pGVAS, pGpu);
657     const MMU_FMT_LEVEL *pRootFmt  = pGpuState->pFmt->pRoot;
658     NvU32                rootSize  = pRootFmt->entrySize;
659     MMU_WALK_USER_CTX    userCtx   = {0};
660     GspStaticConfigInfo *pGSCI     = GPU_GET_GSP_STATIC_INFO(pGpu);
661 
662     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
663          memdescCreate(&pMemDesc, pGpu, rootSize, RM_PAGE_SIZE, NV_TRUE, ADDR_FBMEM,
664                        kgmmuGetPTEAttr(pKernelGmmu), MEMDESC_FLAGS_NONE));
665 
666     memdescDescribe(pMemDesc, ADDR_FBMEM, pGSCI->bar1PdeBase, rootSize);
667     memdescSetPageSize(pMemDesc, VAS_ADDRESS_TRANSLATION(pKernelBus->bar1[GPU_GFID_PF].pVAS), RM_PAGE_SIZE);
668 
669     gvaspaceWalkUserCtxAcquire(pGVAS, pGpu, NULL, &userCtx);
670 
671     //
672     // Modify the CPU-RM's walker state with the new backing memory.
673     // This is intended to replace CPU-RM's PDB by GSP-RM's PDB.
674     //
675     status = mmuWalkModifyLevelInstance(pGpuState->pWalk,
676                                         pRootFmt,
677                                         vaspaceGetVaStart(pKernelBus->bar1[GPU_GFID_PF].pVAS),
678                                         (MMU_WALK_MEMDESC*)pMemDesc,
679                                         mmuFmtLevelSize(pRootFmt),
680                                         NV_TRUE,
681                                         NV_TRUE,
682                                         NV_FALSE);
683     gvaspaceWalkUserCtxRelease(pGVAS, &userCtx);
684     if (NV_OK != status)
685     {
686         NV_PRINTF(LEVEL_ERROR, "Failed to modify CPU-RM's BAR1 PDB to GSP-RM's BAR1 PDB.\n");
687         return status;
688     }
689 
690     gvaspaceInvalidateTlb(pGVAS, pGpu, PTE_DOWNGRADE);
691 
692     return status;
693 }
694 
695 /*!
696  * Patch CPU-RM's SW cache of BAR2 PDB to GSP-RM's BAR2 PDB so that CPU-RM can
697  * do TLB invalidation to correct VA space.
698  *
699  * For the BAR2 support in RM-offload model, CPU-RM owns the VA range under
700  * PDE3[0] and GSP-RM owns the VA range under PDE3[1]. GSP-RM and CPU-RM
701  * establish their own BAR2 page tables respectively. After CPU-RM establishes
702  * its own table, it passes its PDE3[0] value to GSP-RM, then GSP-RM will fill
703  * this value to PDE3[0] of GSP-RM's table (only GSP-RM's BAR2 table will be
704  * bound to HW) so that HW sees single BAR2 page table for both GSP-RM and
705  * CPU-RM.
706  *
707  * @param   pGpu
708  * @param   pKernelBus
709  *
710  * @return NV_OK if PDB is updated successfully
711  *         Or bubble up the error code returned by the callees
712  */
713 NV_STATUS
714 kbusPatchBar2Pdb_GSPCLIENT
715 (
716     OBJGPU      *pGpu,
717     KernelBus   *pKernelBus
718 )
719 {
720     NV_STATUS            status   = NV_OK;
721     PMEMORY_DESCRIPTOR   pMemDesc;
722     GspStaticConfigInfo *pGSCI    = GPU_GET_GSP_STATIC_INFO(pGpu);
723     const MMU_FMT_LEVEL *pRootFmt = pKernelBus->bar2[GPU_GFID_PF].pFmt->pRoot;
724     NvU64                entryValue;
725     MEMORY_DESCRIPTOR   *pOldPdb;
726 
727     pOldPdb = pKernelBus->virtualBar2[GPU_GFID_PF].pPDB;
728 
729     NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
730         memdescCreate(&pMemDesc, pGpu, pKernelBus->bar2[GPU_GFID_PF].pageDirSize, RM_PAGE_SIZE, NV_TRUE,
731                       ADDR_FBMEM, pKernelBus->PDEBAR2Attr, MEMDESC_FLAGS_NONE));
732 
733     memdescDescribe(pMemDesc, ADDR_FBMEM, pGSCI->bar2PdeBase, pKernelBus->bar2[GPU_GFID_PF].pageDirSize);
734 
735     // Update CPU-RM's SW cache of PDB to GSP-RM's PDB address
736     pKernelBus->virtualBar2[GPU_GFID_PF].pPDB = pMemDesc;
737 
738     //
739     // BAR2 page table is not yet working at this point, so retrieving the
740     // PDE3[0] of BAR2 page table via BAR0_WINDOW or GSP-DMA (in case BARs
741     // are blocked)
742     //
743     if (kbusIsBarAccessBlocked(pKernelBus))
744     {
745         MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
746         TRANSFER_SURFACE surf = {0};
747 
748         surf.pMemDesc = pOldPdb;
749         surf.offset = 0;
750 
751         NV_ASSERT_OK_OR_RETURN(
752             memmgrMemRead(pMemoryManager, &surf, &entryValue,
753                           pRootFmt->entrySize, TRANSFER_FLAGS_NONE));
754     }
755     else
756     {
757         entryValue = GPU_REG_RD32(pGpu, (NvU32)pKernelBus->bar2[GPU_GFID_PF].bar2OffsetInBar0Window) |
758                  ((NvU64)GPU_REG_RD32(pGpu, (NvU32)pKernelBus->bar2[GPU_GFID_PF].bar2OffsetInBar0Window + 4) << 32);
759     }
760 
761     //
762     // Provide the PDE3[0] value to GSP-RM so that GSP-RM can merge CPU-RM's
763     // page table to GSP-RM's page table
764     //
765     NV_RM_RPC_UPDATE_BAR_PDE(pGpu, NV_RPC_UPDATE_PDE_BAR_2, entryValue, pRootFmt->virtAddrBitLo, status);
766 
767     return NV_OK;
768 }
769 
770 
771 /*!
772  * @brief Checks whether an engine is available or not.
773  *
774  * The 'engine' is an engine descriptor
775  * This function is different from busProbeRegister in a sense that it doesn't
776  * rely on timeouts after a read of a register in the reg space for engine.
777  * Instead, it
778  *  - Return TRUE for all engines which are must present in GPU.
779  *  - Get information about CE, MSENC, NVJPG and OFA engines from plugin.
780  *  - Rest engines are determined from HAL creation data.
781  *
782  * @param[in] pGpu       OBJGPU pointer
783  * @param[in] pKernelBus KernelBus pointer
784  * @param[in] engDesc    ENGDESCRIPTOR pointer used to check Engine presence
785  *
786  * @returns NV_TRUE if engine is available.
787  *          NV_FALSE if engine is not available or floorswept.
788  *
789  */
790 NvBool
791 kbusCheckEngine_KERNEL
792 (
793     OBJGPU        *pGpu,
794     KernelBus     *pKernelBus,
795     ENGDESCRIPTOR  engDesc
796 )
797 {
798     NvU32     rmEngineCaps[NVGPU_ENGINE_CAPS_MASK_ARRAY_MAX] = {0};
799     NvU32     nv2080EngineCaps[NVGPU_ENGINE_CAPS_MASK_ARRAY_MAX] = {0};
800     NvBool    bSupported;
801     NV_STATUS status;
802 
803     {
804         NvU32 i;
805         GspStaticConfigInfo *pGSCI = GPU_GET_GSP_STATIC_INFO(pGpu);
806         if (pGSCI == NULL)
807         {
808             return NV_FALSE;
809         }
810 
811         for (i = 0; i < NVGPU_ENGINE_CAPS_MASK_ARRAY_MAX; i++)
812         {
813             nv2080EngineCaps[i] = pGSCI->engineCaps[i];
814         }
815     }
816 
817     NV_CHECK_OK_OR_ELSE(status, LEVEL_ERROR,
818         gpuGetRmEngineTypeCapMask(nv2080EngineCaps,
819                                   NVGPU_ENGINE_CAPS_MASK_ARRAY_MAX,
820                                   rmEngineCaps),
821         return NV_FALSE);
822 
823     switch (engDesc)
824     {
825         case ENG_LSFM:
826         case ENG_PMU:
827         case ENG_CLK:
828         case ENG_ACR:
829         case ENG_DISP:
830             return NV_FALSE;
831         //
832         // This function is used in two environments:
833         // (a) vGPU where display is not yet supported.
834         // (b) RM offload (Kernel RM) where display is supported.
835         //
836         case ENG_KERNEL_DISPLAY:
837             return IS_GSP_CLIENT(pGpu);
838 
839         case ENG_BIF:
840         case ENG_KERNEL_BIF:
841         case ENG_MC:
842         case ENG_KERNEL_MC:
843         case ENG_PRIV_RING:
844         case ENG_SW_INTR:
845         case ENG_TMR:
846         case ENG_DMA:
847         case ENG_BUS:
848         case ENG_GR(0):
849         case ENG_CIPHER:
850         case ENG_INTR:
851         case ENG_GPULOG:
852         case ENG_GPUMON:
853         case ENG_FIFO:
854             return NV_TRUE;
855 
856         case ENG_CE(0):
857         case ENG_CE(1):
858         case ENG_CE(2):
859         case ENG_CE(3):
860         case ENG_CE(4):
861         case ENG_CE(5):
862         case ENG_CE(6):
863         case ENG_CE(7):
864         case ENG_CE(8):
865         case ENG_CE(9):
866             return !!NVGPU_GET_ENGINE_CAPS_MASK(rmEngineCaps,
867                 RM_ENGINE_TYPE_COPY(GET_CE_IDX(engDesc)));
868 
869         case ENG_MSENC(0):
870         case ENG_MSENC(1):
871         case ENG_MSENC(2):
872             return !!NVGPU_GET_ENGINE_CAPS_MASK(rmEngineCaps,
873                 RM_ENGINE_TYPE_NVENC(GET_MSENC_IDX(engDesc)));
874         case ENG_SEC2:
875             return !!NVGPU_GET_ENGINE_CAPS_MASK(rmEngineCaps,
876                                                 RM_ENGINE_TYPE_SEC2);
877         case ENG_NVDEC(0):
878         case ENG_NVDEC(1):
879         case ENG_NVDEC(2):
880         case ENG_NVDEC(3):
881         case ENG_NVDEC(4):
882         case ENG_NVDEC(5):
883         case ENG_NVDEC(6):
884         case ENG_NVDEC(7):
885             return !!NVGPU_GET_ENGINE_CAPS_MASK(rmEngineCaps,
886                 RM_ENGINE_TYPE_NVDEC(GET_NVDEC_IDX(engDesc)));
887 
888    case ENG_OFA(0):
889             return !!NVGPU_GET_ENGINE_CAPS_MASK(rmEngineCaps,
890                 RM_ENGINE_TYPE_OFA(GET_OFA_IDX(engDesc)));
891 
892         case ENG_NVJPEG(0):
893         case ENG_NVJPEG(1):
894         case ENG_NVJPEG(2):
895         case ENG_NVJPEG(3):
896         case ENG_NVJPEG(4):
897         case ENG_NVJPEG(5):
898         case ENG_NVJPEG(6):
899         case ENG_NVJPEG(7):
900             return !!NVGPU_GET_ENGINE_CAPS_MASK(rmEngineCaps,
901                 RM_ENGINE_TYPE_NVJPEG(GET_NVJPEG_IDX(engDesc)));
902 
903         case ENG_GR(1):
904         case ENG_GR(2):
905         case ENG_GR(3):
906         case ENG_GR(4):
907         case ENG_GR(5):
908         case ENG_GR(6):
909         case ENG_GR(7):
910         {
911             KernelFifo *pKernelFifo  = GPU_GET_KERNEL_FIFO(pGpu);
912 
913             NV_ASSERT_OR_RETURN(pKernelFifo != NULL, NV_FALSE);
914 
915             return (kfifoCheckEngine_HAL(pGpu, pKernelFifo,
916                                          engDesc,
917                                          &bSupported) == NV_OK &&
918                     bSupported);
919         }
920 
921         case ENG_INVALID:
922             NV_PRINTF(LEVEL_ERROR,
923                       "Query for ENG_INVALID considered erroneous: %d\n",
924                       engDesc);
925             return NV_TRUE;
926         //
927         // Check if engine descriptor is supported by current GPU.
928         // Callee must not send engine descriptor which are not on
929         // HAL lists of GPU. So Add ASSERT there.
930         //
931         default:
932             bSupported = gpuIsEngDescSupported(pGpu, engDesc);
933 
934             if (!bSupported)
935             {
936                 NV_PRINTF(LEVEL_ERROR, "Unable to check engine ID: %d\n",
937                           engDesc);
938                 NV_ASSERT(bSupported);
939             }
940             return bSupported;
941     }
942 }
943 
944 //
945 // kbusGetDeviceCaps
946 //
947 // This routine gets cap bits in unicast. If bCapsInitialized is passed as
948 // NV_FALSE, the caps will be copied into pHostCaps without OR/ANDing. Otherwise,
949 // the caps bits for the current GPU will be OR/ANDed together with pHostCaps to
950 // create a single set of caps that accurately represents the functionality of
951 // the device.
952 //
953 void
954 kbusGetDeviceCaps_IMPL
955 (
956     OBJGPU    *pGpu,
957     KernelBus *pKernelBus,
958     NvU8      *pHostCaps,
959     NvBool     bCapsInitialized
960 )
961 {
962     OBJSYS *pSys = SYS_GET_INSTANCE();
963     OBJCL  *pCl  = SYS_GET_CL(pSys);
964     NvU8 tempCaps[NV0080_CTRL_HOST_CAPS_TBL_SIZE], temp;
965     NvBool bExplicitCacheFlushRequired;
966 
967     NV_ASSERT(!gpumgrGetBcEnabledStatus(pGpu));
968 
969     portMemSet(tempCaps, 0, NV0080_CTRL_HOST_CAPS_TBL_SIZE);
970 
971     /*! DMAs to/from cached memory need to have the cache flushed explicitly */
972     bExplicitCacheFlushRequired = NVCPU_IS_ARM &&
973                                   (RMCFG_FEATURE_PLATFORM_UNIX || RMCFG_FEATURE_PLATFORM_MODS_UNIX);
974     if (bExplicitCacheFlushRequired ||
975         (!pCl->getProperty(pCl, PDB_PROP_CL_IS_CHIPSET_IO_COHERENT)))
976         RMCTRL_SET_CAP(tempCaps, NV0080_CTRL_HOST_CAPS, _EXPLICIT_CACHE_FLUSH_REQD);
977 
978     if ((pCl->FHBBusInfo.vendorID == PCI_VENDOR_ID_NVIDIA) &&
979         ((pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_BR04_PRESENT)) ||
980          ((pCl->FHBBusInfo.deviceID >= NVIDIA_C73_CPU_PCI_0_DEVICE_ID_SLI2) &&
981           (pCl->FHBBusInfo.deviceID <= NVIDIA_C73_CPU_PCI_0_DEVICE_ID_RESERVED_3))))
982     {
983         RMCTRL_SET_CAP(tempCaps, NV0080_CTRL_HOST_CAPS, _CPU_WRITE_WAR_BUG_420495);
984     }
985 
986     // the RM always supports GPU-coherent mappings
987     RMCTRL_SET_CAP(tempCaps, NV0080_CTRL_HOST_CAPS, _GPU_COHERENT_MAPPING_SUPPORTED);
988 
989     // If we don't have existing caps with which to reconcile, then just return
990     if (!bCapsInitialized)
991     {
992         portMemCopy(pHostCaps, NV0080_CTRL_HOST_CAPS_TBL_SIZE, tempCaps, NV0080_CTRL_HOST_CAPS_TBL_SIZE);
993         return;
994     }
995 
996     // factor in this GPUs caps: all these are feature caps, so use AND
997     RMCTRL_AND_CAP(pHostCaps, tempCaps, temp,
998                    NV0080_CTRL_HOST_CAPS, _P2P_4_WAY);
999     RMCTRL_AND_CAP(pHostCaps, tempCaps, temp,
1000                    NV0080_CTRL_HOST_CAPS, _P2P_8_WAY);
1001     RMCTRL_AND_CAP(pHostCaps, tempCaps, temp,
1002                    NV0080_CTRL_HOST_CAPS, _GPU_COHERENT_MAPPING_SUPPORTED);
1003 
1004     RMCTRL_OR_CAP(pHostCaps, tempCaps, temp,
1005                   NV0080_CTRL_HOST_CAPS, _SEMA_ACQUIRE_BUG_105665);
1006     RMCTRL_OR_CAP(pHostCaps, tempCaps, temp,
1007                   NV0080_CTRL_HOST_CAPS, _SYS_SEMA_DEADLOCK_BUG_148216);
1008     RMCTRL_OR_CAP(pHostCaps, tempCaps, temp,
1009                   NV0080_CTRL_HOST_CAPS, _SLOWSLI);
1010     RMCTRL_OR_CAP(pHostCaps, tempCaps, temp,
1011                   NV0080_CTRL_HOST_CAPS, _SEMA_READ_ONLY_BUG);
1012     RMCTRL_OR_CAP(pHostCaps, tempCaps, temp,
1013                   NV0080_CTRL_HOST_CAPS, _MEM2MEM_BUG_365782);
1014     RMCTRL_OR_CAP(pHostCaps, tempCaps, temp,
1015                   NV0080_CTRL_HOST_CAPS, _LARGE_NONCOH_UPSTR_WRITE_BUG_114871);
1016     RMCTRL_OR_CAP(pHostCaps, tempCaps, temp,
1017                   NV0080_CTRL_HOST_CAPS, _LARGE_UPSTREAM_WRITE_BUG_115115);
1018     RMCTRL_OR_CAP(pHostCaps, tempCaps, temp,
1019                   NV0080_CTRL_HOST_CAPS, _SEP_VIDMEM_PB_NOTIFIERS_BUG_83923);
1020     RMCTRL_OR_CAP(pHostCaps, tempCaps, temp,
1021                   NV0080_CTRL_HOST_CAPS, _P2P_DEADLOCK_BUG_203825);
1022     RMCTRL_OR_CAP(pHostCaps, tempCaps, temp,
1023                   NV0080_CTRL_HOST_CAPS, _COMPRESSED_BL_P2P_BUG_257072);
1024     RMCTRL_OR_CAP(pHostCaps, tempCaps, temp,
1025                   NV0080_CTRL_HOST_CAPS, _CROSS_BLITS_BUG_270260);
1026     RMCTRL_OR_CAP(pHostCaps, tempCaps, temp,
1027                   NV0080_CTRL_HOST_CAPS, _CPU_WRITE_WAR_BUG_420495);
1028     RMCTRL_OR_CAP(pHostCaps, tempCaps, temp,
1029                   NV0080_CTRL_HOST_CAPS, _BAR1_READ_DEADLOCK_BUG_511418);
1030 
1031     return;
1032 }
1033 
1034 NV_STATUS
1035 kbusMapFbApertureByHandle_IMPL
1036 (
1037     OBJGPU    *pGpu,
1038     KernelBus *pKernelBus,
1039     NvHandle   hClient,
1040     NvHandle   hMemory,
1041     NvU64      offset,
1042     NvU64      size,
1043     NvU64     *pBar1Va,
1044     Device    *pDevice
1045 )
1046 {
1047     NV_STATUS status;
1048     RsClient *pClient = NULL;
1049     RsResourceRef *pSrcMemoryRef = NULL;
1050     Memory *pSrcMemory = NULL;
1051     MEMORY_DESCRIPTOR *pMemDesc = NULL;
1052     NvU64 fbApertureOffset = 0;
1053     NvU64 fbApertureLength = size;
1054 
1055     NV_ASSERT_OK_OR_RETURN(serverGetClientUnderLock(&g_resServ, hClient, &pClient));
1056 
1057     status = clientGetResourceRef(pClient, hMemory, &pSrcMemoryRef);
1058     if (status != NV_OK)
1059     {
1060         return status;
1061     }
1062 
1063     pSrcMemory = dynamicCast(pSrcMemoryRef->pResource, Memory);
1064     if (pSrcMemory == NULL)
1065     {
1066         return NV_ERR_INVALID_OBJECT;
1067     }
1068 
1069     pMemDesc = pSrcMemory->pMemDesc;
1070 
1071     if (memdescGetAddressSpace(pMemDesc) != ADDR_FBMEM)
1072     {
1073         return NV_ERR_INVALID_ARGUMENT;
1074     }
1075 
1076     status = kbusMapFbAperture_HAL(pGpu, pKernelBus, pMemDesc, offset,
1077                                    &fbApertureOffset, &fbApertureLength,
1078                                    BUS_MAP_FB_FLAGS_MAP_UNICAST, pDevice);
1079     if (status != NV_OK)
1080     {
1081         return status;
1082     }
1083 
1084     NV_ASSERT_OR_GOTO(fbApertureLength >= size, failed);
1085 
1086     if ((!NV_IS_ALIGNED64(fbApertureOffset, osGetPageSize())) ||
1087         (!NV_IS_ALIGNED64(fbApertureLength, osGetPageSize())))
1088     {
1089         status = NV_ERR_NOT_SUPPORTED;
1090         goto failed;
1091     }
1092 
1093     *pBar1Va = gpumgrGetGpuPhysFbAddr(pGpu) + fbApertureOffset;
1094 
1095     if (!NV_IS_ALIGNED64(*pBar1Va, osGetPageSize()))
1096     {
1097         status = NV_ERR_INVALID_ADDRESS;
1098         goto failed;
1099     }
1100 
1101     return NV_OK;
1102 
1103 failed:
1104     // Note: fbApertureLength is not used by kbusUnmapFbAperture_HAL(), so it's passed as 0
1105     kbusUnmapFbAperture_HAL(pGpu, pKernelBus, pMemDesc,
1106                             fbApertureOffset, 0,
1107                             BUS_MAP_FB_FLAGS_MAP_UNICAST);
1108 
1109     return status;
1110 }
1111 
1112 NV_STATUS
1113 kbusUnmapFbApertureByHandle_IMPL
1114 (
1115     OBJGPU    *pGpu,
1116     KernelBus *pKernelBus,
1117     NvHandle   hClient,
1118     NvHandle   hMemory,
1119     NvU64      bar1Va
1120 )
1121 {
1122     NV_STATUS status;
1123     RsClient *pClient = NULL;
1124     RsResourceRef *pSrcMemoryRef = NULL;
1125     Memory *pSrcMemory = NULL;
1126     MEMORY_DESCRIPTOR *pMemDesc = NULL;
1127 
1128     NV_ASSERT_OK_OR_RETURN(serverGetClientUnderLock(&g_resServ, hClient, &pClient));
1129 
1130     status = clientGetResourceRef(pClient, hMemory, &pSrcMemoryRef);
1131     if (status != NV_OK)
1132     {
1133         return status;
1134     }
1135 
1136     pSrcMemory = dynamicCast(pSrcMemoryRef->pResource, Memory);
1137     if (pSrcMemory == NULL)
1138     {
1139         return NV_ERR_INVALID_OBJECT;
1140     }
1141 
1142     pMemDesc = pSrcMemory->pMemDesc;
1143 
1144     // Note: fbApertureLength is not used by kbusUnmapFbAperture_HAL(), so it's passed as 0
1145     status = kbusUnmapFbAperture_HAL(pGpu, pKernelBus, pMemDesc,
1146                                      bar1Va - gpumgrGetGpuPhysFbAddr(pGpu),
1147                                      0, BUS_MAP_FB_FLAGS_MAP_UNICAST);
1148     if (status != NV_OK)
1149     {
1150         return status;
1151     }
1152 
1153     return NV_OK;
1154 }
1155 
1156 /*!
1157  * Helper function to determine if the requested GET_BUS_INFO ctrl call needs to be served
1158  * by GSP/host, then send RPC to GSP/host. Otherwise return directly so that the caller can
1159  * continue the execution on CPU.
1160  *
1161  *  @param[in]       pGpu       OBJGPU pointer
1162  *  @param[in/out]   pBusInfo   Pointer to NV2080_CTRL_BUS_INFO which specifies the index we want to query
1163  *
1164  *  @returns RPC status
1165  */
1166 NV_STATUS
1167 kbusSendBusInfo_IMPL
1168 (
1169     OBJGPU               *pGpu,
1170     KernelBus            *pKernelBus,
1171     NV2080_CTRL_BUS_INFO *pBusInfo
1172 )
1173 {
1174     NV_STATUS status = NV_OK;
1175     NV2080_CTRL_BUS_GET_INFO_V2_PARAMS busGetInfoParams = {0};
1176 
1177     busGetInfoParams.busInfoList[0] = *pBusInfo;
1178     busGetInfoParams.busInfoListSize = 1;
1179 
1180     NV_RM_RPC_CONTROL(pGpu,
1181                       pGpu->hInternalClient,
1182                       pGpu->hInternalSubdevice,
1183                       NV2080_CTRL_CMD_BUS_GET_INFO_V2,
1184                       &busGetInfoParams,
1185                       sizeof(busGetInfoParams),
1186                       status);
1187 
1188     pBusInfo->data = busGetInfoParams.busInfoList[0].data;
1189     return status;
1190 }
1191 
1192 /*!
1193  * @brief Returns the Nvlink peer ID from pGpu0 to pGpu1
1194  *
1195  * @param[in]   pGpu0          (local GPU)
1196  * @param[in]   pKernelBus0    (local GPU)
1197  * @param[in]   pGpu1          (remote GPU)
1198  * @param[in]   pKernelBus1    (remote GPU)
1199  * @param[out]  nvlinkPeer     NvU32 pointer
1200  *
1201  * return NV_OK on success
1202  */
1203 NV_STATUS
1204 kbusGetNvlinkP2PPeerId_VGPU
1205 (
1206     OBJGPU    *pGpu0,
1207     KernelBus *pKernelBus0,
1208     OBJGPU    *pGpu1,
1209     KernelBus *pKernelBus1,
1210     NvU32     *nvlinkPeer,
1211     NvU32      flags
1212 )
1213 {
1214     *nvlinkPeer = kbusGetPeerId_HAL(pGpu0, pKernelBus0, pGpu1);
1215     if (*nvlinkPeer != BUS_INVALID_PEER)
1216     {
1217         return NV_OK;
1218     }
1219 
1220     *nvlinkPeer = kbusGetUnusedPeerId_HAL(pGpu0, pKernelBus0);
1221 
1222     // If could not find a free peer ID, return error
1223     if (*nvlinkPeer == BUS_INVALID_PEER)
1224     {
1225         NV_PRINTF(LEVEL_WARNING,
1226                   "GPU%d: peerID not available for NVLink P2P\n",
1227                   pGpu0->gpuInstance);
1228         return NV_ERR_GENERIC;
1229     }
1230     // Reserve the peer ID for NVLink use
1231     return kbusReserveP2PPeerIds_HAL(pGpu0, pKernelBus0, NVBIT(*nvlinkPeer));
1232 }
1233 
1234 /**
1235  * @brief     Check if the static bar1 is enabled
1236  *
1237  * @param[in] pGpu
1238  * @param[in] pKernelBus
1239  */
1240 NvBool
1241 kbusIsStaticBar1Enabled_IMPL
1242 (
1243     OBJGPU    *pGpu,
1244     KernelBus *pKernelBus
1245 )
1246 {
1247     NvU32 gfid;
1248 
1249     return ((vgpuGetCallingContextGfid(pGpu, &gfid) == NV_OK) &&
1250             pKernelBus->bar1[gfid].bStaticBar1Enabled);
1251 }
1252 
1253 /**
1254  * @brief     Check for any P2P references in to remote GPUs
1255  *            which are still have a P2P api object alive.
1256  *
1257  * @param[in] pGpu
1258  * @param[in] pKernelBus
1259  */
1260 NV_STATUS
1261 kbusIsGpuP2pAlive_IMPL
1262 (
1263     OBJGPU    *pGpu,
1264     KernelBus *pKernelBus
1265 )
1266 {
1267     return (pKernelBus->totalP2pObjectsAliveRefCount > 0);
1268 }
1269