1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "core/core.h"
25 #include "gpu/gpu.h"
26 
27 #include <class/cl00fc.h>      // FABRIC_VASPACE_A
28 #include "gpu/bus/kern_bus.h"
29 #include "gpu/bus/p2p_api.h"
30 #include "gpu/bif/kernel_bif.h"
31 #include "gpu/mmu/kern_gmmu.h"
32 #include "gpu/mem_mgr/mem_mgr.h"
33 #include "gpu/mem_sys/kern_mem_sys.h"
34 #include "kernel/gpu/nvlink/kernel_nvlink.h"
35 #include "kernel/gpu/mem_mgr/virt_mem_allocator_common.h"
36 #include "mem_mgr/fabric_vaspace.h"
37 #include "mem_mgr/virt_mem_mgr.h"
38 #include "vgpu/rpc.h"
39 #include "virtualization/hypervisor/hypervisor.h"
40 #include "os/os.h"
41 
42 #include "mem_mgr/mem_multicast_fabric.h"
43 
44 #include "gpu/gpu_fabric_probe.h"
45 #include "published/hopper/gh100/dev_ram.h"
46 #include "published/hopper/gh100/pri_nv_xal_ep.h"
47 #include "published/hopper/gh100/pri_nv_xal_ep_p2p.h"
48 #include "published/hopper/gh100/dev_vm.h"
49 #include "published/hopper/gh100/dev_mmu.h"
50 #include "ctrl/ctrl2080/ctrl2080fla.h" // NV2080_CTRL_CMD_FLA_SETUP_INSTANCE_MEM_BLOCK
51 
52 #include "nvRmReg.h"
53 
54  // Defines for P2P
55 #define HOPPER_WRITE_MAILBOX_SIZE            ((NvU64)64 * 1024)
56 #define HOPPER_MAX_WRITE_MAILBOX_ADDR(pGpu)                                         \
57     ((HOPPER_WRITE_MAILBOX_SIZE << kbusGetP2PWriteMailboxAddressSize_HAL(pGpu)) - \
58      HOPPER_WRITE_MAILBOX_SIZE)
59 
60 // RM reserved memory region is mapped separately as it is not added to the kernel
61 #define COHERENT_CPU_MAPPING_RM_RESV_REGION   COHERENT_CPU_MAPPING_REGION_1
62 
63 /*!
64  * @brief Gets the P2P write mailbox address size (NV_XAL_EP_P2P_WMBOX_ADDR_ADDR)
65  *
66  * @returns P2P write mailbox address size (NV_XAL_EP_P2P_WMBOX_ADDR_ADDR)
67  */
68 NvU32
69 kbusGetP2PWriteMailboxAddressSize_GH100(OBJGPU *pGpu)
70 {
71     return DRF_SIZE(NV_XAL_EP_P2P_WMBOX_ADDR_ADDR);
72 }
73 
74 /*!
75  * @brief Writes NV_XAL_EP_BAR0_WINDOW_BASE
76  *
77  * @param[in] pGpu
78  * @param[in] pKernelBus
79  * @param[in] base       base address to write
80  *
81  * @returns NV_OK
82  */
83 NV_STATUS
84 kbusWriteBAR0WindowBase_GH100
85 (
86     OBJGPU    *pGpu,
87     KernelBus *pKernelBus,
88     NvU32      base
89 )
90 {
91     GPU_FLD_WR_DRF_NUM(pGpu, _XAL_EP, _BAR0_WINDOW, _BASE, base);
92     return NV_OK;
93 }
94 
95 /*!
96  * @brief Reads NV_XAL_EP_BAR0_WINDOW_BASE
97  *
98  * @param[in] pGpu
99  * @param[in] pKernelBus
100  *
101  * @returns Contents of NV_XAL_EP_BAR0_WINDOW_BASE
102  */
103 NvU32
104 kbusReadBAR0WindowBase_GH100
105 (
106     OBJGPU    *pGpu,
107     KernelBus *pKernelBus
108 )
109 {
110     return GPU_REG_RD_DRF(pGpu, _XAL_EP, _BAR0_WINDOW, _BASE);
111 }
112 
113 /*!
114  * @brief Validates that the given base fits within the width of the window base
115  *
116  * @param[in] pGpu
117  * @param[in] pKernelBus
118  * @param[in] base       base offset to validate
119  *
120  * @returns Whether given base fits within the width of the window base.
121  */
122 NvBool
123 kbusValidateBAR0WindowBase_GH100
124 (
125     OBJGPU    *pGpu,
126     KernelBus *pKernelBus,
127     NvU32      base
128 )
129 {
130     return base <= DRF_MASK(NV_XAL_EP_BAR0_WINDOW_BASE);
131 }
132 
133 NV_STATUS
134 kbusSetBAR0WindowVidOffset_GH100
135 (
136     OBJGPU      *pGpu,
137     KernelBus   *pKernelBus,
138     NvU64        vidOffset
139 )
140 {
141     if (KBUS_BAR0_PRAMIN_DISABLED(pGpu))
142     {
143         NV_ASSERT_FAILED("kbusSetBAR0WindowVidOffset_HAL call in coherent path\n");
144         return NV_ERR_INVALID_STATE;
145     }
146 
147     NV_ASSERT((vidOffset & 0xffff)==0);
148     NV_ASSERT(kbusValidateBAR0WindowBase_HAL(pGpu, pKernelBus, vidOffset >> NV_XAL_EP_BAR0_WINDOW_BASE_SHIFT));
149 
150     //
151     // RM initialises cachedBar0WindowVidOffset with 0. Refresh its value with
152     // current NV_XAL_EP_BAR0_WINDOW_BASE.
153     //
154     if (pKernelBus->cachedBar0WindowVidOffset == 0)
155     {
156         pKernelBus->cachedBar0WindowVidOffset = ((NvU64) kbusReadBAR0WindowBase_HAL(pGpu, pKernelBus))
157             << NV_XAL_EP_BAR0_WINDOW_BASE_SHIFT;
158     }
159 
160     // Update only if the new offset is different from the cached value
161     if (pKernelBus->cachedBar0WindowVidOffset != vidOffset)
162     {
163         NV_PRINTF(LEVEL_INFO,
164                   "mapping BAR0_WINDOW to VID:%x'%08x\n",
165                   NvU64_HI32(vidOffset), NvU64_LO32(vidOffset));
166 
167         // _BAR0_WINDOW_TARGET field is removed. It's always VIDMEM
168         kbusWriteBAR0WindowBase_HAL(pGpu, pKernelBus, NvU64_LO32(vidOffset >> 16));
169 
170         pKernelBus->cachedBar0WindowVidOffset = vidOffset;
171     }
172 
173     return (NV_OK);
174 }
175 
176 NvU64
177 kbusGetBAR0WindowVidOffset_GH100
178 (
179     OBJGPU      *pGpu,
180     KernelBus   *pKernelBus
181 )
182 {
183     NvU64 vidOffset;
184 
185     //
186     // RM initialises cachedBar0WindowVidOffset with 0. Refresh its value with
187     // current NV_XAL_EP_BAR0_WINDOW_BASE.
188     //
189     if (pKernelBus->cachedBar0WindowVidOffset == 0)
190     {
191         pKernelBus->cachedBar0WindowVidOffset = ((NvU64) kbusReadBAR0WindowBase_HAL(pGpu, pKernelBus))
192             << NV_XAL_EP_BAR0_WINDOW_BASE_SHIFT;
193     }
194 
195     vidOffset = pKernelBus->cachedBar0WindowVidOffset;
196 
197     return (vidOffset);
198 }
199 
200 /*!
201  * @brief Tests BAR2 against BAR0.
202  *
203  * @param[in] pGpu
204  * @param[in] pKernelBus
205  * @param[in] pMemDescIn If memDescIn is NULL, a test mem desc is created and map/unmapped.
206  *                       If memDescIn is not NULL and provided, this method assumes that it has
207  *                       already been alloc'ed and mapping/unmapping is handled outside
208  *                       this method.
209  * @param[in] offset     offset of the test memory
210  * @param[in] size       size of the test memory
211  *
212  * @returns NV_OK on success.
213  */
214 NV_STATUS
215 kbusVerifyBar2_GH100
216 (
217     OBJGPU      *pGpu,
218     KernelBus   *pKernelBus,
219     PMEMORY_DESCRIPTOR pMemDescIn,
220     NvU8        *pCpuPtrIn,
221     NvU64        offset,
222     NvU64        size
223 )
224 {
225     MEMORY_DESCRIPTOR memDesc, *pMemDesc = NULL;
226     NvU8             *pOffset          = NULL;
227     NvU32             index            = 0;
228     NvU64             bar0Window       = 0;
229     NvU64             testMemoryOffset = 0;
230     NvU32             testMemorySize   = 0;
231     NV_STATUS         status           = NV_OK;
232     NvU32             testData         = 0;
233     NvU32             temp             = 0;
234     NV_ADDRESS_SPACE  testAddrSpace    = ADDR_FBMEM;
235     NvBool            bIsStandaloneTest;
236     const NvU32       SAMPLEDATA       = 0xabcdabcd;
237     const NvU32       FBSIZETESTED     = 0x10;
238     NvU64             bar0TestAddr     = 0;
239     KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
240     NvU32             flagsClean       = 0;
241     NvU64             bar2VirtualAddr  = 0;
242 
243     NV_ASSERT_OR_RETURN(pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING) == NV_FALSE, NV_ERR_INVALID_STATE);
244 
245     //
246     // kbusVerifyBar2 will test BAR0 against sysmem on Tegra; otherwise skip
247     // the test if inst_in_sys is used
248     //
249     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_ALL_INST_IN_SYSMEM) &&
250         !IsTEGRA(pGpu))
251     {
252         return NV_OK;
253     }
254 
255     // In L2 Cache only mode or FB broken, don't verify Bar2
256     if (gpuIsCacheOnlyModeEnabled(pGpu) ||
257         pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) ||
258         kbusIsBar2TestSkipped(pKernelBus))
259     {
260         return NV_OK;
261     }
262 
263     NV_PRINTF(LEVEL_INFO, "\n");
264 
265     flagsClean = NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_FLAGS_ALL |
266                  NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_FLAGS_CLEAN;
267     if (kmemsysIsL2CleanFbPull(pKernelMemorySystem))
268     {
269         flagsClean |= NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_FLAGS_WAIT_FB_PULL;
270     }
271 
272     if (pMemDescIn && pCpuPtrIn)
273     {
274         if ((size + offset) > pMemDescIn->Size)
275         {
276             NV_PRINTF(LEVEL_ERROR,
277                       "input offset 0x%llx size 0x%llx exceeds surface size 0x%llx\n",
278                       offset, size, pMemDescIn->Size);
279             DBG_BREAKPOINT();
280             return NV_ERR_INVALID_ARGUMENT;
281         }
282         bIsStandaloneTest = NV_FALSE;
283         pOffset = pCpuPtrIn;
284         pMemDesc = pMemDescIn;
285     }
286     else
287     {
288         offset = 0;
289         size = FBSIZETESTED;
290         // Allocate some memory to test virtual BAR2 with
291         if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_ALL_INST_IN_SYSMEM))
292         {
293             memdescCreateExisting(&memDesc, pGpu, size, ADDR_SYSMEM, pGpu->instCacheOverride, MEMDESC_FLAGS_NONE);
294         }
295         else
296         {
297             memdescCreateExisting(&memDesc, pGpu, size, ADDR_FBMEM, NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE);
298         }
299         status = memdescAlloc(&memDesc);
300         if (status != NV_OK)
301         {
302             NV_PRINTF(LEVEL_ERROR,
303                       "Could not allocate vidmem to test bar2 with\n");
304             DBG_BREAKPOINT();
305             return NV_ERR_GENERIC;
306         }
307 
308         bIsStandaloneTest = NV_TRUE;
309         pOffset = kbusMapRmAperture_HAL(pGpu, &memDesc);
310         if (pOffset == NULL)
311         {
312             status = NV_ERR_INSUFFICIENT_RESOURCES;
313             goto kbusVerifyBar2_failed;
314         }
315         pMemDesc = &memDesc;
316     }
317     testMemoryOffset = memdescGetPhysAddr(pMemDesc, AT_GPU, 0) + offset;
318     testMemorySize   = NvU64_LO32(size);
319     testAddrSpace    = kgmmuGetHwPteApertureFromMemdesc(GPU_GET_KERNEL_GMMU(pGpu), pMemDesc);
320 
321     if (testAddrSpace != NV_MMU_PTE_APERTURE_VIDEO_MEMORY)
322     {
323         NV_PRINTF(LEVEL_ERROR,
324             "Test is not supported. NV_XAL_EP_BAR0_WINDOW only supports vidmem\n");
325         DBG_BREAKPOINT();
326         status = NV_ERR_NOT_SUPPORTED;
327         goto kbusVerifyBar2_failed;
328     }
329 
330     // ==========================================================
331     // Does the BAR0 window work?
332 
333     NV_PRINTF((IS_EMULATION(pGpu)) ? LEVEL_ERROR : LEVEL_INFO,
334         "Testing BAR0 window...\n");
335 
336     bar0Window = kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus);
337     bar0TestAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, 0);
338 
339     kbusWriteBAR0WindowBase_HAL(pGpu, pKernelBus, NvU64_LO32(bar0TestAddr >> 16));
340 
341     testData = GPU_REG_RD32(pGpu, DRF_BASE(NV_PRAMIN) + NvU64_LO32(bar0TestAddr & 0xffff));
342 
343     GPU_REG_WR32(pGpu, DRF_BASE(NV_PRAMIN) + NvU64_LO32(bar0TestAddr & 0xffff), SAMPLEDATA);
344 
345     if (GPU_REG_RD32(pGpu, DRF_BASE(NV_PRAMIN) + NvU64_LO32(bar0TestAddr & 0xffff)) != SAMPLEDATA)
346     {
347         //
348         // Ideally, this should hit the L2 cache and even if memory is bad,
349         // unless something in the path up to L2 is messed up, we should not
350         // get here.
351         //
352         NV_PRINTF(LEVEL_ERROR,
353             "Pre-L2 invalidate evict: Address 0x%llx programmed through the bar0 "
354             "window with value 0x%x did not read back the last write.\n",
355             bar0TestAddr, SAMPLEDATA);
356         DBG_BREAKPOINT_REASON(NV_ERR_MEMORY_ERROR);
357         status = NV_ERR_MEMORY_ERROR;
358         goto kbusVerifyBar2_failed;
359     }
360 
361     //
362     // Evict L2 to ensure that the next read doesn't hit L2 and mistakenly
363     // assume that the BAR0 window to vidmem works
364     //
365     status = kmemsysSendL2InvalidateEvict(pGpu, pKernelMemorySystem, flagsClean);
366     if (NV_OK != status)
367     {
368         NV_PRINTF(LEVEL_ERROR, "L2 evict failed\n");
369         goto kbusVerifyBar2_failed;
370     }
371 
372     if (GPU_REG_RD32(pGpu, DRF_BASE(NV_PRAMIN) + NvU64_LO32(bar0TestAddr & 0xffff)) != SAMPLEDATA)
373     {
374         NV_PRINTF(LEVEL_ERROR,
375             "Post-L2 invalidate evict: Address 0x%llx programmed through the bar0 "
376             "window with value 0x%x did not read back the last write\n",
377             bar0TestAddr, SAMPLEDATA);
378         if (IS_EMULATION(pGpu))
379         {
380             NV_PRINTF(LEVEL_ERROR,
381                       "Setup a trigger on write<Bar0+0x1700, 0x40> with a 3 quarters post "
382                       "trigger capture\n");
383             NV_PRINTF(LEVEL_ERROR,
384                       "and search for the last bar0 window write not returning the same value"
385                       " in a subsequent read\n");
386         }
387         DBG_BREAKPOINT_REASON(NV_ERR_MEMORY_ERROR);
388         status = NV_ERR_MEMORY_ERROR;
389         goto kbusVerifyBar2_failed;
390     }
391 
392     NV_PRINTF((IS_EMULATION(pGpu)) ? LEVEL_ERROR : LEVEL_INFO,
393         "Bar0 window tests successfully\n");
394     GPU_REG_WR32(pGpu, DRF_BASE(NV_PRAMIN) + NvU64_LO32(bar0TestAddr & 0xffff), testData);
395 
396     kbusWriteBAR0WindowBase_HAL(pGpu, pKernelBus, NvU64_LO32(bar0Window >> 16));
397 
398     // ==========================================================
399     // Does MMU's translation logic work?
400 
401     bar2VirtualAddr = (NvU64)(pOffset - pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping);
402     NV_PRINTF(LEVEL_INFO,
403               "MMUTest Writing test data through virtual BAR2 starting at bar2 offset"
404               " (%p - %p) = %p and of size 0x%x\n", (NvU8 *)pOffset,
405               (NvU8 *)pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping,
406               (NvU8 *)bar2VirtualAddr, testMemorySize);
407 
408     NV_PRINTF((IS_EMULATION(pGpu) == NV_TRUE) ? LEVEL_ERROR : LEVEL_INFO,
409               "MMUTest The physical address being targetted is 0x%llx\n",
410               testMemoryOffset);
411 
412     for(index = 0; index < testMemorySize; index += 4)
413     {
414         MEM_WR32(pOffset + index, SAMPLEDATA);
415     }
416 
417     // Flush the bar2 writes
418     // A uflush should not be required since a bar0 window read follows after this
419     osFlushCpuWriteCombineBuffer();
420 
421     status = kmemsysSendL2InvalidateEvict(pGpu, pKernelMemorySystem, flagsClean);
422     if (NV_OK != status)
423     {
424         NV_PRINTF(LEVEL_ERROR, "L2 evict failed\n");
425         goto kbusVerifyBar2_failed;
426     }
427 
428     //
429     // Read back using the same BAR2 VA. This can make sure the writes have
430     // gotten to memory after MMU translation.
431     //
432     // What it will catch:
433     // - FB dead (also caught by BAR0 test above)
434     // - MMU translation fails on BAR2
435     // - MMU translation works but other parts of memsys having issues
436     //
437     // It will not verify whether the mapping points to the right physical
438     // memory. The BAR0 readback test will do that next.
439     //
440     for(index = 0; index < testMemorySize; index += 4)
441     {
442         NvU32 bar2ReadbackData = 0;
443         bar2ReadbackData = MEM_RD32(pOffset + index);
444 
445         if (bar2ReadbackData != SAMPLEDATA)
446         {
447             NV_PRINTF(LEVEL_ERROR,
448                       "MMUTest BAR2 readback VA = 0x%llx returned garbage 0x%x\n",
449                       (bar2VirtualAddr + index), bar2ReadbackData);
450 
451             DBG_BREAKPOINT_REASON(NV_ERR_MEMORY_ERROR);
452             status = NV_ERR_MEMORY_ERROR;
453             goto kbusVerifyBar2_failed;
454         }
455     }
456 
457     // Readback through the bar0 window
458     bar0Window = kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus);
459 
460     kbusWriteBAR0WindowBase_HAL(pGpu, pKernelBus, NvU64_LO32(testMemoryOffset >> 16));
461 
462     NV_PRINTF(LEVEL_INFO,
463               "bar0Window = 0x%llx, testMemoryOffset = 0x%llx, testAddrSpace = %d, "
464               "_XAL_EP_BAR0_WINDOW = 0x%08x\n", bar0Window, testMemoryOffset,
465               testAddrSpace, GPU_REG_RD32(pGpu, NV_XAL_EP_BAR0_WINDOW));
466 
467     temp = (DRF_BASE(NV_PRAMIN) + (NvU32)(testMemoryOffset & 0xffff));
468     for(index = 0; index < testMemorySize; index += 4)
469     {
470         NvU32 bar0WindowData = GPU_REG_RD32(pGpu, temp + index);
471         if (bar0WindowData != SAMPLEDATA)
472         {
473             NV_PRINTF(LEVEL_ERROR,
474                       "MMUTest BAR0 window offset 0x%x returned garbage 0x%x\n",
475                       temp + index, bar0WindowData);
476             if (IS_EMULATION(pGpu) == NV_TRUE)
477             {
478                 NV_PRINTF(LEVEL_ERROR,
479                           "Setup a trigger for write<bar0 + 0x1700, 0x40> and in the waves search"
480                           " the last few bar2 virtual writes mixed with bar0 window reads\n");
481             }
482             DBG_BREAKPOINT_REASON(NV_ERR_MEMORY_ERROR);
483             status = NV_ERR_MEMORY_ERROR;
484             goto kbusVerifyBar2_failed;
485         }
486         // Write through the BAR0 window to be readback through BAR2 later
487         GPU_REG_WR32(pGpu, temp + index, SAMPLEDATA + 0x10);
488     }
489 
490     kbusWriteBAR0WindowBase_HAL(pGpu, pKernelBus, NvU64_LO32(bar0Window >> 16));
491 
492     status = kbusFlush_HAL(pGpu, pKernelBus, BUS_FLUSH_VIDEO_MEMORY | BUS_FLUSH_USE_PCIE_READ);
493 
494     // Bail now if we have encountered any error
495     if (status != NV_OK)
496     {
497         goto kbusVerifyBar2_failed;
498     }
499 
500     status = kmemsysSendL2InvalidateEvict(pGpu, pKernelMemorySystem, flagsClean);
501     if (NV_OK != status)
502     {
503         goto kbusVerifyBar2_failed;
504     }
505 
506     // Verify BAR2 virtual reads
507     for(index = 0; index < testMemorySize; index +=4)
508     {
509         temp = MEM_RD32(pOffset + index);
510         if (temp != (SAMPLEDATA + 0x10))
511         {
512             NV_PRINTF(LEVEL_ERROR,
513                       "MMUTest BAR2 Read of virtual addr 0x%x returned garbage 0x%x\n",
514                       (NvU32)(pOffset - pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping) + index,
515                       temp);
516             DBG_BREAKPOINT_REASON(NV_ERR_MEMORY_ERROR);
517             status = NV_ERR_MEMORY_ERROR;
518             goto kbusVerifyBar2_failed;
519         }
520     }
521 
522 kbusVerifyBar2_failed:
523     if (bIsStandaloneTest)
524     {
525         if (pOffset != NULL)
526         {
527             kbusUnmapRmAperture_HAL(pGpu, pMemDesc, &pOffset, NV_TRUE);
528         }
529         memdescFree(pMemDesc);
530         memdescDestroy(pMemDesc);
531     }
532 
533     if (status == NV_OK)
534     {
535         NV_PRINTF(IS_EMULATION(pGpu) ? LEVEL_ERROR : LEVEL_INFO,
536                   "BAR2 virtual test passes\n");
537     }
538 
539     return status;
540 }
541 
542 /*!
543  * @brief Tear down BAR2 CPU aperture
544  *
545  * 1. Release BAR2 GPU vaspace mappings.
546  * 2. Release BAR2 CPU mapping.
547  *
548  * @param[in] pGpu
549  * @param[in] pKernelBus
550  * @param[in] gfid
551  *
552  * @returns NV_OK on success.
553  */
554 NV_STATUS
555 kbusTeardownBar2CpuAperture_GH100
556 (
557     OBJGPU    *pGpu,
558     KernelBus *pKernelBus,
559     NvU32      gfid
560 )
561 {
562     // Nothing to be done in guest in the paravirtualization case.
563     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) || IS_GFID_VF(gfid))
564     {
565         return NV_OK;
566     }
567 
568     if (KBUS_BAR2_TUNNELLED(pKernelBus))
569     {
570         // KBUS-TODO -- dead code path?
571         //   KBUS_BAR2_TUNNELLED is never true on HOPPER+
572 
573         // Unmap bar2 space
574         if (pKernelBus->virtualBar2[gfid].pCpuMapping)
575         {
576             // Remove the memory access filter
577             osMemRemoveFilter((NvU64)((NvUPtr)(pKernelBus->virtualBar2[gfid].pCpuMapping)));
578             portMemFree(pKernelBus->virtualBar2[gfid].pCpuMapping);
579             pKernelBus->virtualBar2[gfid].pCpuMapping = NULL;
580         }
581     }
582     else
583     {
584         if (pKernelBus->virtualBar2[gfid].pPageLevels != NULL &&
585             pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc != NULL)
586         {
587             memmgrMemDescEndTransfer(GPU_GET_MEMORY_MANAGER(pGpu),
588                          pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc,
589                          TRANSFER_FLAGS_NONE);
590             pKernelBus->virtualBar2[gfid].pPageLevels = NULL;
591         }
592 
593         kbusDestroyCpuPointerForBusFlush_HAL(pGpu, pKernelBus);
594 
595         kbusFlushVirtualBar2_HAL(pGpu, pKernelBus, NV_FALSE, gfid);
596 
597         if (pKernelBus->virtualBar2[gfid].pCpuMapping)
598         {
599             osUnmapPciMemoryKernelOld(pGpu, (void*)pKernelBus->virtualBar2[gfid].pCpuMapping);
600             // Mark the BAR as un-initialized so that a later call
601             // to initbar2 can succeed.
602             pKernelBus->virtualBar2[gfid].pCpuMapping = NULL;
603         }
604 
605         //
606         // make sure that the bar2 mode is physical so that the vesa extended
607         // linear framebuffer works after driver unload.  Clear other bits to force
608         // vid.
609         //
610         // if BROKEN_FB, merely rewriting this to 0 (as it already was) causes
611         // FBACKTIMEOUT -- don't do it (Bug 594539)
612         //
613         if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB))
614         {
615             GPU_VREG_FLD_WR_DRF_DEF(pGpu, _VIRTUAL_FUNCTION_PRIV_FUNC, _BAR2_BLOCK_LOW_ADDR, _MODE, _PHYSICAL);
616             // bug 1738008: temporary fix to unblock -inst_in_sys argument
617             // we tried to correct bar2 unbind sequence but didn't fix the real issue
618             // will fix this soon 4/8/16
619             GPU_VREG_RD32(pGpu, NV_VIRTUAL_FUNCTION_PRIV_FUNC_BAR2_BLOCK_LOW_ADDR);
620         }
621     }
622 
623     return NV_OK;
624 }
625 
626 //
627 // Returns the P2P mailbox attributes such as:
628 // - pMailboxAreaSize: total size
629 // - pMailboxAlignmentBits: aligment in number of bits
630 // - pMailboxMaxOffset: max supported offset
631 //
632 void
633 kbusGetP2PMailboxAttributes_GH100
634 (
635     OBJGPU    *pGpu,
636     KernelBus *pKernelBus,
637     NvU32*     pMailboxAreaSize,
638     NvU32*     pMailboxAlignmentSize,
639     NvU32*     pMailboxBar1MaxOffset64KB
640 )
641 {
642     KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
643 
644     // Initialize null values by default
645     if (pMailboxAreaSize != NULL)
646     {
647         *pMailboxAreaSize = 0;
648     }
649     if (pMailboxAlignmentSize != NULL)
650     {
651         *pMailboxAlignmentSize = 0;
652     }
653     if (pMailboxBar1MaxOffset64KB != NULL)
654     {
655         *pMailboxBar1MaxOffset64KB = 0;
656     }
657 
658     if (pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_READS_DISABLED) &&
659         pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_WRITES_DISABLED))
660     {
661         // Return null values
662         return;
663     }
664 
665     // Retrieve attributes
666     if (pMailboxAreaSize != NULL)
667     {
668         *pMailboxAreaSize = HOPPER_WRITE_MAILBOX_SIZE * P2P_MAX_NUM_PEERS;
669     }
670 
671     if (pMailboxAlignmentSize != NULL)
672     {
673         // Write mailbox data window needs to be 64KB aligned.
674         *pMailboxAlignmentSize = 0x10000;
675     }
676 
677     if (pMailboxBar1MaxOffset64KB != NULL)
678     {
679         *pMailboxBar1MaxOffset64KB =
680             NvU64_LO32(
681                 (HOPPER_MAX_WRITE_MAILBOX_ADDR(pGpu) + HOPPER_WRITE_MAILBOX_SIZE) >> 16
682             );
683     }
684 
685     return;
686 }
687 
688 RmPhysAddr
689 kbusSetupP2PDomainAccess_GH100
690 (
691     OBJGPU    *pGpu0,
692     KernelBus *pKernelBus0,
693     OBJGPU    *pGpu1,
694     PMEMORY_DESCRIPTOR *ppP2PDomMemDesc
695 )
696 {
697     return kbusSetupPeerBarAccess(pGpu0, pGpu1,
698                 pGpu0->busInfo.gpuPhysAddr + DRF_BASE(NV_XAL_EP_P2P),
699                 DRF_SIZE(NV_XAL_EP_P2P), ppP2PDomMemDesc);
700 }
701 
702 NV_STATUS
703 kbusFlushPcieForBar0Doorbell_GH100
704 (
705     OBJGPU      *pGpu,
706     KernelBus   *pKernelBus
707 )
708 {
709     return kbusFlush_HAL(pGpu, pKernelBus, BUS_FLUSH_VIDEO_MEMORY | BUS_FLUSH_USE_PCIE_READ);
710 }
711 
712 /*!
713  * @brief Create a P2P mapping to a given peer GPU
714  *
715  * @param[in]   pGpu0          (local GPU)
716  * @param[in]   pKernelBus0    (local GPU)
717  * @param[in]   pGpu1          (remote GPU)
718  * @param[in]   pKernelBus1    (remote GPU)
719  * @param[out]  peer0  Peer ID (local to remote)
720  * @param[out]  peer1  Peer ID (remote to local)
721  * @param[in]   attributes Sepcial attributes for the mapping
722  *
723  * return NV_OK on success
724  */
725 NV_STATUS
726 kbusCreateP2PMapping_GH100
727 (
728     OBJGPU    *pGpu0,
729     KernelBus *pKernelBus0,
730     OBJGPU    *pGpu1,
731     KernelBus *pKernelBus1,
732     NvU32     *peer0,
733     NvU32     *peer1,
734     NvU32      attributes
735 )
736 {
737     if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _C2C, attributes))
738     {
739         return kbusCreateP2PMappingForC2C_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes);
740     }
741 
742     if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _NVLINK, attributes) ||
743         FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _NVLINK_INDIRECT, attributes))
744     {
745         return kbusCreateP2PMappingForNvlink_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes);
746     }
747 
748     if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE_BAR1, attributes))
749     {
750         return kbusCreateP2PMappingForBar1P2P_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, attributes);
751     }
752 
753     if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE, attributes))
754     {
755         return kbusCreateP2PMappingForMailbox_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes);
756     }
757 
758     NV_PRINTF(LEVEL_ERROR, "P2P type %d is not supported\n", DRF_VAL(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, attributes));
759 
760     return NV_ERR_NOT_SUPPORTED;
761 }
762 
763 /*!
764  * @brief Remove the P2P mapping to a given peer GPU
765  *
766  * @param[in]   pGpu0          (local GPU)
767  * @param[in]   pKernelBus0    (local GPU)
768  * @param[in]   pGpu1          (remote GPU)
769  * @param[in]   pKernelBus1    (remote GPU)
770  * @param[out]  peer0  Peer ID (local to remote)
771  * @param[out]  peer1  Peer ID (remote to local)
772  * @param[in]   attributes Sepcial attributes for the mapping
773  *
774  * return NV_OK on success
775  */
776 NV_STATUS
777 kbusRemoveP2PMapping_GH100
778 (
779     OBJGPU    *pGpu0,
780     KernelBus *pKernelBus0,
781     OBJGPU    *pGpu1,
782     KernelBus *pKernelBus1,
783     NvU32      peer0,
784     NvU32      peer1,
785     NvU32      attributes
786 )
787 {
788     if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _C2C, attributes))
789     {
790         return kbusRemoveP2PMappingForC2C_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes);
791     }
792 
793     if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _NVLINK, attributes) ||
794         FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _NVLINK_INDIRECT, attributes))
795     {
796         return kbusRemoveP2PMappingForNvlink_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes);
797     }
798 
799     if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE_BAR1, attributes))
800     {
801         return kbusRemoveP2PMappingForBar1P2P_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, attributes);
802     }
803 
804     if (FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, _PCIE, attributes))
805     {
806         return kbusRemoveP2PMappingForMailbox_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1, peer0, peer1, attributes);
807     }
808 
809     NV_PRINTF(LEVEL_ERROR, "P2P type %d is not supported\n", DRF_VAL(_P2PAPI, _ATTRIBUTES, _CONNECTION_TYPE, attributes));
810 
811     return NV_ERR_NOT_SUPPORTED;
812 }
813 
814 /*!
815  * @brief Returns the peer number from pGpu (Local) to pGpuPeer
816  *
817  * @param[in] pGpu          Local
818  * @param[in] pKernelBus    Local
819  * @param[in] pGpuPeer      Remote
820  *
821  * @returns NvU32 bus peer number
822  */
823 NvU32
824 kbusGetPeerId_GH100
825 (
826     OBJGPU    *pGpu,
827     KernelBus *pKernelBus,
828     OBJGPU    *pGpuPeer
829 )
830 {
831     NvU32   gpuPeerInst = gpuGetInstance(pGpuPeer);
832     NvU32   peerId      = pKernelBus->c2cPeerInfo.busC2CPeerNumberMask[gpuPeerInst];
833 
834     // Fall back to Nvlink
835     if (peerId == 0)
836     {
837         NV_PRINTF(LEVEL_INFO,
838                   "C2C P2P not set up between GPU%u and GPU%u, checking for Nvlink...\n",
839                   gpuGetInstance(pGpu), gpuPeerInst);
840         return kbusGetPeerId_GP100(pGpu, pKernelBus, pGpuPeer);
841     }
842 
843     LOWESTBITIDX_32(peerId);
844     return peerId;
845 }
846 
847 /**
848  * @brief      Returns if the given peerId is a valid for a given GPU
849  *
850  * @param[in]  pGpu
851  * @param[in]  pKernelBus
852  * @param[in]  peerId       The peer identifier
853  *
854  * @return     return NV_OK is valid
855  */
856 NV_STATUS
857 kbusIsPeerIdValid_GH100
858 (
859     OBJGPU    *pGpu,
860     KernelBus *pKernelBus,
861     NvU32      peerId
862 )
863 {
864     NV_ASSERT_OR_RETURN(peerId < P2P_MAX_NUM_PEERS, NV_ERR_INVALID_INDEX);
865     if (pKernelBus->c2cPeerInfo.busC2CPeerNumberMask[gpuGetInstance(pGpu)] & NVBIT(peerId))
866         return NV_OK;
867     return kbusIsPeerIdValid_GP100(pGpu, pKernelBus, peerId);
868 }
869 
870 /*!
871  * @brief Create C2C mappings for FB memory
872  * When this is called, we should not have any BAR1/BAR2 mappings
873  *
874  * @param[in] pGpu                  OBJGPU pointer
875  * @param[in] pKernelBus            Kernel bus pointer
876  * @param[in] numaOnlineMemorySize  Size of FB memory to online in
877  *                                  kernel as a NUMA node
878  * @param[in] bFlush                Flush CPU cache or not
879  *
880  * @return 'NV_OK' if successful, an RM error code otherwise.
881  */
882 NV_STATUS
883 kbusCreateCoherentCpuMapping_GH100
884 (
885     OBJGPU    *pGpu,
886     KernelBus *pKernelBus,
887     NvU64     numaOnlineMemorySize,
888     NvBool    bFlush
889 )
890 {
891     MemoryManager      *pMemoryManager             = GPU_GET_MEMORY_MANAGER(pGpu);
892     KernelMemorySystem *pKernelMemorySystem        = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
893     NV_STATUS           status                     = NV_OK;
894     KernelBif          *pKernelBif                 = GPU_GET_KERNEL_BIF(pGpu);
895     NvP64               pCpuMapping                = NvP64_NULL;
896     NvU64               fbSize;
897     NvU64               busAddrStart;
898     NvU64               busAddrSize;
899     NvU32               i;
900     NvU64               memblockSize;
901     NvU32               cachingMode[COHERENT_CPU_MAPPING_TOTAL_REGIONS];
902 
903     NV_ASSERT_OR_RETURN(gpuIsSelfHosted(pGpu) && pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_IS_C2C_LINK_UP), NV_ERR_INVALID_STATE);
904 
905     // Assert no BAR1/BAR2 mappings
906     NV_ASSERT_OR_RETURN(kbusGetBar1VASpace_HAL(pGpu, pKernelBus) == NULL,
907                         NV_ERR_INVALID_STATE);
908     NV_ASSERT_OR_RETURN(listCount(&pKernelBus->virtualBar2[GPU_GFID_PF].usedMapList) == 0,
909                         NV_ERR_INVALID_STATE);
910 
911     fbSize = (pMemoryManager->Ram.fbTotalMemSizeMb << 20);
912 
913     NV_ASSERT_OK_OR_RETURN(osNumaMemblockSize(&memblockSize));
914 
915     pKernelBus->coherentCpuMapping.nrMapping = 2;
916 
917     pKernelBus->coherentCpuMapping.physAddr[COHERENT_CPU_MAPPING_REGION_0] = pMemoryManager->Ram.fbRegion[0].base;
918     pKernelBus->coherentCpuMapping.size[COHERENT_CPU_MAPPING_REGION_0] = numaOnlineMemorySize;
919     cachingMode[COHERENT_CPU_MAPPING_REGION_0] = NV_MEMORY_CACHED;
920 
921     pKernelBus->coherentCpuMapping.physAddr[COHERENT_CPU_MAPPING_RM_RESV_REGION] =
922         pKernelBus->coherentCpuMapping.physAddr[COHERENT_CPU_MAPPING_REGION_0] +
923         pKernelBus->coherentCpuMapping.size[COHERENT_CPU_MAPPING_REGION_0];
924     pKernelBus->coherentCpuMapping.size[COHERENT_CPU_MAPPING_RM_RESV_REGION] =
925         fbSize - pKernelBus->coherentCpuMapping.size[COHERENT_CPU_MAPPING_REGION_0];
926 
927     if (pKernelMemorySystem->bBug3656943WAR)
928     {
929         //
930         // RM reserved region should be mapped as Normal Non-cacheable as a SW WAR
931         // for the bug 3656943. NV_MEMORY_WRITECOMBINED translates to linux
932         // kernel ioremap_wc which actually uses the normal non-cacheable type
933         // PROT_NORMAL_NC
934         //
935         cachingMode[COHERENT_CPU_MAPPING_RM_RESV_REGION] = NV_MEMORY_WRITECOMBINED;
936     }
937     else
938     {
939         cachingMode[COHERENT_CPU_MAPPING_RM_RESV_REGION] = NV_MEMORY_CACHED;
940     }
941 
942     for (i = COHERENT_CPU_MAPPING_REGION_0; i < pKernelBus->coherentCpuMapping.nrMapping; ++i)
943     {
944         busAddrStart = pKernelMemorySystem->coherentCpuFbBase + pKernelBus->coherentCpuMapping.physAddr[i];
945         busAddrSize  = pKernelBus->coherentCpuMapping.size[i];
946 
947         // In SHH, CPU uses coherent C2C link to access GPU memory and hence it can be accessed cached.
948         status = osMapPciMemoryKernel64(pGpu,
949                                         (NvUPtr)busAddrStart,
950                                         (NvU64)busAddrSize,
951                                         NV_PROTECT_READ_WRITE,
952                                         &(pCpuMapping),
953                                         cachingMode[i]);
954 
955         NV_ASSERT_OR_RETURN(status == NV_OK, NV_ERR_GENERIC);
956 
957         pKernelBus->coherentCpuMapping.pCpuMapping[i] = (NvP64)pCpuMapping;
958         pKernelBus->coherentCpuMapping.size[i] = busAddrSize;
959 
960         NV_ASSERT_OR_RETURN(bFlush == NV_FALSE, NV_ERR_NOT_SUPPORTED);
961 
962         // Counts the number of outstanding mappings in FB.
963         pKernelBus->coherentCpuMapping.refcnt[i] = 0;
964     }
965 
966     pKernelBus->coherentCpuMapping.bCoherentCpuMapping  = NV_TRUE;
967 
968     NV_PRINTF(LEVEL_INFO, "Enabling CPU->C2C->FBMEM path\n");
969 
970     return status;
971 }
972 
973 /*!
974  * @brief Sanity test coherent link between CPU and GPU.
975  *
976  * @param[in] pGpu       OBJGPU pointer
977  * @param[in] pKernelBus Kernel bus pointer
978  *
979  * @returns NV_OK on success.
980  */
981 NV_STATUS
982 kbusVerifyCoherentLink_GH100
983 (
984     OBJGPU    *pGpu,
985     KernelBus *pKernelBus
986 )
987 {
988     NvU64             size             = BUS_COHERENT_LINK_TEST_BUFFER_SIZE;
989     MEMORY_DESCRIPTOR *pMemDesc        = NULL;
990     NvU8              *pOffset         = NULL;
991     const NvU32       sampleData       = 0x12345678;
992     NV_STATUS         status           = NV_OK;
993     KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
994     NvU32             index            = 0;
995     NvU32             flagsClean       = 0;
996     MEMORY_DESCRIPTOR memDesc;
997 
998     // Skip the test if 0FB configuration is used.
999     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_ALL_INST_IN_SYSMEM))
1000     {
1001         NV_PRINTF(IS_EMULATION(pGpu) ? LEVEL_ERROR : LEVEL_INFO,
1002                   "Skipping Coherent link test\n");
1003         return NV_OK;
1004     }
1005 
1006     NV_ASSERT_OR_RETURN(pKernelBus->coherentLinkTestBufferBase != 0, NV_ERR_INVALID_STATE);
1007     memdescCreateExisting(&memDesc, pGpu, size, ADDR_FBMEM, NV_MEMORY_CACHED, MEMDESC_FLAGS_NONE);
1008     memdescDescribe(&memDesc, ADDR_FBMEM, pKernelBus->coherentLinkTestBufferBase, size);
1009 
1010     pOffset = kbusMapRmAperture_HAL(pGpu, &memDesc);
1011     if (pOffset == NULL)
1012     {
1013         status = NV_ERR_INSUFFICIENT_RESOURCES;
1014         goto busVerifyCoherentLink_failed;
1015     }
1016     pMemDesc = &memDesc;
1017 
1018     for(index = 0; index < size; index += 4)
1019     {
1020         MEM_WR32(pOffset + index, sampleData);
1021     }
1022 
1023     // Ensure the writes are flushed out of the CPU caches.
1024     osFlushGpuCoherentCpuCacheRange(pGpu->pOsGpuInfo, (NvUPtr)pOffset, size);
1025 
1026     flagsClean = NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_FLAGS_ALL |
1027                  NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_FLAGS_CLEAN;
1028     if (kmemsysIsL2CleanFbPull(pKernelMemorySystem))
1029     {
1030         flagsClean |= NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_FLAGS_WAIT_FB_PULL;
1031     }
1032     status = kmemsysSendL2InvalidateEvict(pGpu, pKernelMemorySystem, flagsClean);
1033     if (NV_OK != status)
1034     {
1035         NV_PRINTF(LEVEL_ERROR, "L2 evict failed\n");
1036         goto busVerifyCoherentLink_failed;
1037     }
1038 
1039     for(index = 0; index < size; index += 4)
1040     {
1041         NvU32 readbackData = MEM_RD32(pOffset + index);
1042 
1043         if (readbackData != sampleData)
1044         {
1045             NV_PRINTF(LEVEL_ERROR,
1046                       "Coherent Link test readback VA = 0x%llx returned garbage 0x%x\n",
1047                       (NvUPtr)(pOffset + index), readbackData);
1048 
1049             DBG_BREAKPOINT_REASON(NV_ERR_MEMORY_ERROR);
1050             status = NV_ERR_GENERIC;
1051         }
1052     }
1053 
1054 busVerifyCoherentLink_failed:
1055     if (pOffset != NULL)
1056     {
1057         kbusUnmapRmAperture_HAL(pGpu, pMemDesc, &pOffset, NV_TRUE);
1058     }
1059     memdescDestroy(pMemDesc);
1060 
1061     if (status == NV_OK)
1062     {
1063         NV_PRINTF(IS_EMULATION(pGpu) ? LEVEL_ERROR : LEVEL_INFO,
1064                   "Coherent link test passes\n");
1065     }
1066 
1067     return status;
1068 
1069 }
1070 
1071 /**
1072  * @brief Setup BAR1 P2P capability property.
1073  * All Hopper+ are BAR1 P2P capable.
1074  *
1075  * @param pGpu
1076  * @param pBus
1077  *
1078  * @return void
1079  */
1080 void kbusSetupBar1P2PCapability_GH100
1081 (
1082     OBJGPU *pGpu,
1083     KernelBus *pKernelBus
1084 )
1085 {
1086     NvU64 bar1Size = kbusGetPciBarSize(pKernelBus, 1);
1087     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
1088     NvU64 fbSize = pMemoryManager->Ram.fbUsableMemSize;
1089 
1090     // Make sure the BAR1 size is big enough to cover all FB
1091     if((bar1Size >= fbSize) && (fbSize != 0))
1092     {
1093          NV_PRINTF(LEVEL_INFO, "The gpu %d is BAR1 P2P capable.\n", pGpu->gpuInstance);
1094          kbusSetBar1P2pCapable(pKernelBus, NV_TRUE);
1095     }
1096     else
1097     {
1098          NV_PRINTF(LEVEL_INFO, "The gpu %d is not BAR1 P2P capable.\n", pGpu->gpuInstance);
1099          kbusSetBar1P2pCapable(pKernelBus, NV_FALSE);
1100     }
1101 }
1102 
1103 /*!
1104  * @brief check if it can support BAR1 P2P between the GPUs
1105  *        At the point this function is called, the system do not support C2C and
1106  *        NVLINK P2P and the BAR1 P2P is the preferred option.
1107  *
1108  * @param[in]   pGpu0         (local GPU)
1109  * @param[in]   pKernelBus0   (local GPU)
1110  * @param[in]   pGpu1         (remote GPU)
1111  * @param[in]   pKernelBus1   (remote GPU)
1112  *
1113  * return NV_TRUE if the GPU support BAR1 P2P
1114  */
1115 NvBool
1116 kbusIsPcieBar1P2PMappingSupported_GH100
1117 (
1118     OBJGPU    *pGpu0,
1119     KernelBus *pKernelBus0,
1120     OBJGPU    *pGpu1,
1121     KernelBus *pKernelBus1
1122 )
1123 {
1124     NvU32   gpuInst0 = gpuGetInstance(pGpu0);
1125     NvU32   gpuInst1 = gpuGetInstance(pGpu1);
1126     KernelBif *pKernelBif0 = GPU_GET_KERNEL_BIF(pGpu0);
1127     NvU32   gpu0Gfid;
1128     NvU32   gpu1Gfid;
1129     NV_STATUS  status = NV_OK;
1130 
1131     // Check if BAR1 P2P is disabled by a regkey
1132     if ((pKernelBif0->forceP2PType != NV_REG_STR_RM_FORCE_P2P_TYPE_DEFAULT) &&
1133         (pKernelBif0->forceP2PType != NV_REG_STR_RM_FORCE_P2P_TYPE_BAR1P2P))
1134     {
1135         return NV_FALSE;
1136     }
1137 
1138     // Not loopback support
1139     if (pGpu0 == pGpu1)
1140     {
1141         return NV_FALSE;
1142     }
1143 
1144     // Both of GPUs need to support BAR1P2P
1145     if (!kbusIsBar1P2PCapable(pKernelBus0) ||
1146         !kbusIsBar1P2PCapable(pKernelBus1))
1147     {
1148         return NV_FALSE;
1149     }
1150 
1151     //
1152     // TODO: To move this check to kbusSetupBar1P2PCapability. It should check bStaticBar1Enabled
1153     //       to determine if the GPU is Bar1P2P Capable.
1154     //
1155     NV_ASSERT_OK_OR_ELSE(status, vgpuGetCallingContextGfid(pGpu0, &gpu0Gfid), return NV_FALSE);
1156     NV_ASSERT_OK_OR_ELSE(status, vgpuGetCallingContextGfid(pGpu1, &gpu1Gfid), return NV_FALSE);
1157     if (!pKernelBus0->bar1[gpu0Gfid].bStaticBar1Enabled ||
1158         !pKernelBus1->bar1[gpu1Gfid].bStaticBar1Enabled)
1159     {
1160         return NV_FALSE;
1161     }
1162 
1163     //
1164     // RM only supports one type of PCIE P2P protocol, either BAR1 P2P or mailbox P2P, between
1165     // two GPUs at a time. For more info on this topic, please check bug 3274549 comment 10
1166     //
1167     // Check if there is p2p mailbox connection between the GPUs.
1168     //
1169     if ((pKernelBus0->p2pPcie.peerNumberMask[gpuInst1] != 0) ||
1170         (pKernelBus1->p2pPcie.peerNumberMask[gpuInst0] != 0))
1171     {
1172         return NV_FALSE;
1173     }
1174 
1175     return NV_TRUE;
1176 }
1177 
1178 /*!
1179  *  @brief Remove source GPU IOMMU mapping for the peer GPU
1180  *
1181  *  @param[in]  pSrcGpu             The source GPU
1182  *  @param[in]  pSrcKernelBus       The source Kernel Bus
1183  *  @param[in]  pPeerGpu            The peer GPU
1184  *
1185  *  @returns void
1186  */
1187 static void
1188 _kbusRemoveStaticBar1IOMMUMapping
1189 (
1190     OBJGPU    *pSrcGpu,
1191     KernelBus *pSrcKernelBus,
1192     OBJGPU    *pPeerGpu,
1193     KernelBus *pPeerKernelBus
1194 )
1195 {
1196     NvU32 peerGfid;
1197 
1198     NV_CHECK_OR_RETURN_VOID(LEVEL_ERROR,
1199                             vgpuGetCallingContextGfid(pPeerGpu, &peerGfid) == NV_OK);
1200 
1201     NV_ASSERT_OR_RETURN_VOID(pPeerKernelBus->bar1[peerGfid].staticBar1.pDmaMemDesc != NULL);
1202 
1203     memdescUnmapIommu(pPeerKernelBus->bar1[peerGfid].staticBar1.pDmaMemDesc,
1204                       pSrcGpu->busInfo.iovaspaceId);
1205 }
1206 
1207 /*!
1208  *  @brief Remove GPU IOMMU mapping between the pair of GPUs
1209  *
1210  *  @param[in]  pGpu0
1211  *  @param[in]  pKernelBus0
1212  *  @param[in]  pGpu1
1213  *  @param[in]  pKernelBus0
1214  *
1215  *  @returns void
1216  */
1217 static void
1218 _kbusRemoveStaticBar1IOMMUMappingForGpuPair
1219 (
1220     OBJGPU    *pGpu0,
1221     KernelBus *pKernelBus0,
1222     OBJGPU    *pGpu1,
1223     KernelBus *pKernelBus1
1224 )
1225 {
1226     _kbusRemoveStaticBar1IOMMUMapping(pGpu0, pKernelBus0, pGpu1, pKernelBus1);
1227     _kbusRemoveStaticBar1IOMMUMapping(pGpu1, pKernelBus1, pGpu0, pKernelBus0);
1228 }
1229 
1230 /*!
1231  *  @brief Create source GPU IOMMU mapping for the peer GPU
1232  *
1233  *  @param[in]  pSrcGpu             The source GPU
1234  *  @param[in]  pSrcKernelBus       The source Kernel Bus
1235  *  @param[in]  pPeerGpu            The peer GPU
1236  *  @param[in]  pPeerKernelBus      The peer Kernel Bus
1237  *
1238  *  @returns NV_OK on success
1239  */
1240 static NV_STATUS
1241 _kbusCreateStaticBar1IOMMUMapping
1242 (
1243     OBJGPU    *pSrcGpu,
1244     KernelBus *pSrcKernelBus,
1245     OBJGPU    *pPeerGpu,
1246     KernelBus *pPeerKernelBus
1247 )
1248 {
1249     NvU32 peerGpuGfid;
1250     MEMORY_DESCRIPTOR *pPeerDmaMemDesc = NULL;
1251     RmPhysAddr peerDmaAddr;
1252 
1253     NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pPeerGpu, &peerGpuGfid));
1254 
1255     pPeerDmaMemDesc = pPeerKernelBus->bar1[peerGpuGfid].staticBar1.pDmaMemDesc;
1256 
1257     NV_ASSERT_OR_RETURN(pPeerDmaMemDesc != NULL, NV_ERR_INVALID_STATE);
1258 
1259     // Create the source GPU IOMMU mapping on the peer static bar1
1260     NV_ASSERT_OK_OR_RETURN(memdescMapIommu(pPeerDmaMemDesc,
1261                                            pSrcGpu->busInfo.iovaspaceId));
1262 
1263     // To get the peer DMA address of the memory for the GPU was mapped to
1264     memdescGetPhysAddrsForGpu(pPeerDmaMemDesc, pSrcGpu,
1265                               AT_GPU, 0, 0, 1, &peerDmaAddr);
1266 
1267     // Check the if it is aligned to max RM_PAGE_SIZE 512M.
1268     if (!NV_IS_ALIGNED64(peerDmaAddr, RM_PAGE_SIZE_512M))
1269     {
1270         NV_PRINTF(LEVEL_ERROR, "The peer DMA address 0x%llx is not aligned at 0x%llx\n",
1271                                peerDmaAddr, RM_PAGE_SIZE_512M);
1272 
1273         memdescUnmapIommu(pPeerDmaMemDesc, pSrcGpu->busInfo.iovaspaceId);
1274 
1275         return NV_ERR_INVALID_ADDRESS;
1276     }
1277 
1278     return NV_OK;
1279 }
1280 
1281 /*!
1282  *  @brief To create IOMMU mapping between the pair of GPUs
1283  *
1284  *  @param[in]  pGpu0
1285  *  @param[in]  pKernelBus0
1286  *  @param[in]  pGpu1
1287  *  @param[in]  pKernelBus0
1288  *
1289  *  @returns NV_OK on success
1290  */
1291 static NV_STATUS
1292 _kbusCreateStaticBar1IOMMUMappingForGpuPair
1293 (
1294     OBJGPU    *pGpu0,
1295     KernelBus *pKernelBus0,
1296     OBJGPU    *pGpu1,
1297     KernelBus *pKernelBus1
1298 )
1299 {
1300     NvU32 gpuInst0 = gpuGetInstance(pGpu0);
1301     NvU32 gpuInst1 = gpuGetInstance(pGpu1);
1302     NV_STATUS status;
1303 
1304     // Create GPU0 IOMMU mapping to GPU1 BAR1
1305     status = _kbusCreateStaticBar1IOMMUMapping(pGpu0, pKernelBus0, pGpu1, pKernelBus1);
1306     if (status != NV_OK)
1307     {
1308         NV_PRINTF(LEVEL_ERROR, "IOMMU mapping failed from GPU%u to GPU%u\n",
1309                   gpuInst0, gpuInst1);
1310         return status;
1311     }
1312 
1313     // Create GPU1 IOMMU mapping to GPU0 BAR1
1314     status = _kbusCreateStaticBar1IOMMUMapping(pGpu1, pKernelBus1, pGpu0, pKernelBus0);
1315     if (status != NV_OK)
1316     {
1317         NV_PRINTF(LEVEL_ERROR, "IOMMU mapping failed from GPU%u to GPU%u\n",
1318                   gpuInst1, gpuInst0);
1319 
1320         // Remove the previous created IOMMU mapping
1321         _kbusRemoveStaticBar1IOMMUMapping(pGpu0, pKernelBus0, pGpu1, pKernelBus1);
1322     }
1323 
1324     return status;
1325 }
1326 
1327 /*!
1328  *  @brief To get the DMA information from the source GPU to the peer GPU
1329  *
1330  *  @param[in]  pSrcGpu             The source GPU
1331  *  @param[in]  pPeerGpu            The peer GPU
1332  *  @param[in]  pPeerKernelBus      The peer Kernel Bus
1333  *  @param[out] pDmaAddress         The start DMA address for the source GPU
1334  *                                  to access the peer GPU
1335  *  @param[out] pDmaSize            The size of the DMA transfer range
1336  *
1337  *  @returns NV_OK on success
1338  */
1339 NV_STATUS kbusGetBar1P2PDmaInfo_GH100
1340 (
1341     OBJGPU      *pSrcGpu,
1342     OBJGPU      *pPeerGpu,
1343     KernelBus   *pPeerKernelBus,
1344     NvU64       *pDmaAddress,
1345     NvU64       *pDmaSize
1346 )
1347 {
1348     NvU32 peerGfid;
1349     MEMORY_DESCRIPTOR *pPeerDmaMemDesc;
1350 
1351     NV_ASSERT_OR_RETURN((pDmaAddress != NULL) && (pDmaSize != NULL),
1352                         NV_ERR_INVALID_ARGUMENT);
1353 
1354     // Set the default value
1355     *pDmaAddress = NV_U64_MAX;
1356     *pDmaSize = 0;
1357 
1358     NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pPeerGpu, &peerGfid));
1359 
1360     pPeerDmaMemDesc = pPeerKernelBus->bar1[peerGfid].staticBar1.pDmaMemDesc;
1361     NV_ASSERT_OR_RETURN(pPeerDmaMemDesc != NULL, NV_ERR_NOT_SUPPORTED);
1362 
1363     // Get the peer GPU DMA address for the source GPU
1364     memdescGetPhysAddrsForGpu(pPeerDmaMemDesc, pSrcGpu,
1365                               AT_GPU, 0, 0, 1, pDmaAddress);
1366 
1367     *pDmaSize = memdescGetSize(pPeerDmaMemDesc);
1368 
1369     return NV_OK;
1370 }
1371 
1372 /*!
1373  * @brief check if there is BAR1 P2P mapping between given GPUs
1374  *
1375  * @param[in]   pGpu0         (local GPU)
1376  * @param[in]   pKernelBus0   (local GPU)
1377  * @param[in]   pGpu1         (remote GPU)
1378  * @param[in]   pKernelBus1   (remote GPU)
1379  *
1380  * return NV_TRUE if the P2P is using BAR1
1381  */
1382 NvBool
1383 kbusHasPcieBar1P2PMapping_GH100
1384 (
1385     OBJGPU    *pGpu0,
1386     KernelBus *pKernelBus0,
1387     OBJGPU    *pGpu1,
1388     KernelBus *pKernelBus1
1389 )
1390 {
1391     return ((pKernelBus0->p2pPcieBar1.busBar1PeerRefcount[gpuGetInstance(pGpu1)] != 0) &&
1392             (pKernelBus1->p2pPcieBar1.busBar1PeerRefcount[gpuGetInstance(pGpu0)] != 0));
1393 }
1394 
1395 /*!
1396  * @brief Create a Bar1 P2P mapping to given GPUs
1397  *
1398  * @param[in]   pGpu0         (local GPU)
1399  * @param[in]   pKernelBus0   (local GPU)
1400  * @param[in]   pGpu1         (remote GPU)
1401  * @param[in]   pKernelBus1   (remote GPU)
1402  * @param[in]   attributes    attributes to control the mapping
1403  *
1404  * return NV_OK on success
1405  *        NV_ERR_NOT_SUPPORTED if it fails
1406  */
1407 NV_STATUS
1408 kbusCreateP2PMappingForBar1P2P_GH100
1409 (
1410     OBJGPU    *pGpu0,
1411     KernelBus *pKernelBus0,
1412     OBJGPU    *pGpu1,
1413     KernelBus *pKernelBus1,
1414     NvU32      attributes
1415 )
1416 {
1417     NvU32 gpuInst0 = gpuGetInstance(pGpu0);
1418     NvU32 gpuInst1 = gpuGetInstance(pGpu1);
1419     NV_STATUS status = NV_OK;
1420 
1421     if (IS_VIRTUAL(pGpu0) || IS_VIRTUAL(pGpu1))
1422     {
1423         return NV_ERR_NOT_SUPPORTED;
1424     }
1425 
1426     if (!kbusIsPcieBar1P2PMappingSupported_HAL(pGpu0, pKernelBus0, pGpu1, pKernelBus1))
1427     {
1428         return NV_ERR_NOT_SUPPORTED;
1429     }
1430 
1431     // Only create IOMMU mapping between the pair of GPUs at the first time.
1432     if ((pKernelBus0->p2pPcieBar1.busBar1PeerRefcount[gpuInst1] == 0) &&
1433         (pKernelBus1->p2pPcieBar1.busBar1PeerRefcount[gpuInst0] == 0))
1434     {
1435         NV_ASSERT_OK_OR_RETURN(_kbusCreateStaticBar1IOMMUMappingForGpuPair(pGpu0, pKernelBus0,
1436                                                                            pGpu1, pKernelBus1));
1437     }
1438 
1439     pKernelBus0->p2pPcieBar1.busBar1PeerRefcount[gpuInst1]++;
1440     pKernelBus1->p2pPcieBar1.busBar1PeerRefcount[gpuInst0]++;
1441 
1442     NV_PRINTF(LEVEL_INFO, "added PCIe BAR1 P2P mapping between GPU%u and GPU%u\n",
1443               gpuInst0, gpuInst1);
1444 
1445     return status;
1446 }
1447 
1448 /*!
1449  * @brief remove a Bar1 P2P mapping to given GPUs
1450  *
1451  * @param[in]   pGpu0         (local GPU)
1452  * @param[in]   pKernelBus0   (local GPU)
1453  * @param[in]   pGpu1         (remote GPU)
1454  * @param[in]   pKernelBus1   (remote GPU)
1455  * @param[in]   attributes    attributes of the P2P
1456  *
1457  * return NV_OK on success
1458  */
1459 NV_STATUS
1460 kbusRemoveP2PMappingForBar1P2P_GH100
1461 (
1462     OBJGPU    *pGpu0,
1463     KernelBus *pKernelBus0,
1464     OBJGPU    *pGpu1,
1465     KernelBus *pKernelBus1,
1466     NvU32      attributes
1467 )
1468 {
1469     NvU32 gpuInst0, gpuInst1;
1470 
1471     if (IS_VIRTUAL(pGpu0) || IS_VIRTUAL(pGpu1))
1472     {
1473         return NV_ERR_NOT_SUPPORTED;
1474     }
1475 
1476     gpuInst0 = gpuGetInstance(pGpu0);
1477     gpuInst1 = gpuGetInstance(pGpu1);
1478 
1479     if ((pKernelBus0->p2pPcieBar1.busBar1PeerRefcount[gpuInst1] == 0) ||
1480         (pKernelBus1->p2pPcieBar1.busBar1PeerRefcount[gpuInst0] == 0))
1481     {
1482         return NV_ERR_INVALID_STATE;
1483     }
1484 
1485     pKernelBus0->p2pPcieBar1.busBar1PeerRefcount[gpuInst1]--;
1486     pKernelBus1->p2pPcieBar1.busBar1PeerRefcount[gpuInst0]--;
1487 
1488     // Only remove the IOMMU mapping between the pair of GPUs when it is the last mapping.
1489     if ((pKernelBus0->p2pPcieBar1.busBar1PeerRefcount[gpuInst1] == 0) &&
1490         (pKernelBus1->p2pPcieBar1.busBar1PeerRefcount[gpuInst0] == 0))
1491     {
1492         _kbusRemoveStaticBar1IOMMUMappingForGpuPair(pGpu0, pKernelBus0, pGpu1, pKernelBus1);
1493     }
1494 
1495     NV_PRINTF(LEVEL_INFO, "removed PCIe BAR1 P2P mapping between GPU%u and GPU%u\n",
1496                           gpuInst0, gpuInst1);
1497 
1498     return NV_OK;
1499 }
1500 
1501 /*!
1502  * @brief Returns the C2C peer ID from pGpu0 to pGpu1 after
1503  *        reserving it if peer mapping doesn't exist already
1504  *        for the GPU pair
1505  *
1506  * @param[in]   pGpu0          (local GPU)
1507  * @param[in]   pKernelBus0    (local GPU)
1508  * @param[in]   pGpu1          (remote GPU)
1509  * @param[in]   pKernelBus1    (remote GPU)
1510  * @param[out]  c2cPeer        NvU32  pointer contains the peer ID to use
1511  *                             for local GPU to remote GPU when return value
1512  *                             is NV_OK
1513  *
1514  * return NV_OK on success
1515  */
1516 static NV_STATUS
1517 _kbusGetC2CP2PPeerId
1518 (
1519     OBJGPU    *pGpu0,
1520     KernelBus *pKernelBus0,
1521     OBJGPU    *pGpu1,
1522     KernelBus *pKernelBus1,
1523     NvU32     *c2cPeer
1524 )
1525 {
1526     NV_STATUS  status       = NV_OK;
1527     return status;
1528 }
1529 
1530 /*!
1531  * @brief Create C2C mapping to a given peer GPU
1532  *
1533  *
1534  * @param[in]   pGpu0         (Local)
1535  * @param[in]   pKernelBus0   (Local)
1536  * @param[in]   pGpu1         (Remote)
1537  * @param[in]   peerId  Peer ID to use for local GPU to
1538  *              remote GPU mapping.
1539  *
1540  * return NV_OK on success
1541  */
1542 static NV_STATUS
1543 _kbusCreateC2CPeerMapping
1544 (
1545     OBJGPU    *pGpu0,
1546     KernelBus *pKernelBus0,
1547     OBJGPU    *pGpu1,
1548     NvU32      peerId
1549 )
1550 {
1551     NvU32      gpuInstance = gpuGetInstance(pGpu1);
1552     RM_API    *pRmApi      = GPU_GET_PHYSICAL_RMAPI(pGpu0);
1553     NV2080_CTRL_INTERNAL_BUS_CREATE_C2C_PEER_MAPPING_PARAMS params = {0};
1554     NV_STATUS  status = NV_OK;
1555 
1556     //
1557     // Increment the mapping refcount per peerID - since there is a new mapping that
1558     // will use this peerID
1559     //
1560     pKernelBus0->c2cPeerInfo.busC2CMappingRefcountPerPeerId[peerId]++;
1561 
1562     // Set the peer IDs in the corresponding peer number masks
1563     pKernelBus0->c2cPeerInfo.busC2CPeerNumberMask[gpuInstance] |= NVBIT(peerId);
1564 
1565     params.peerId = peerId;
1566     status = pRmApi->Control(pRmApi,
1567                              pGpu0->hInternalClient,
1568                              pGpu0->hInternalSubdevice,
1569                              NV2080_CTRL_CMD_INTERNAL_BUS_CREATE_C2C_PEER_MAPPING,
1570                              &params,
1571                              sizeof(NV2080_CTRL_INTERNAL_BUS_CREATE_C2C_PEER_MAPPING_PARAMS));
1572     NV_ASSERT(status == NV_OK);
1573 
1574     return status;
1575 }
1576 
1577 /*!
1578  * @brief Create a C2C P2P mapping to a given peer GPU
1579  *
1580  * @param[in]   pGpu0          (local GPU)
1581  * @param[in]   pKernelBus0    (local GPU)
1582  * @param[in]   pGpu1          (remote GPU)
1583  * @param[in]   pKernelBus1    (remote GPU)
1584  * @param[out]  peer0  Peer ID (local to remote)
1585  * @param[out]  peer1  Peer ID (remote to local)
1586  *
1587  * return NV_OK on success
1588  */
1589 NV_STATUS
1590 kbusCreateP2PMappingForC2C_GH100
1591 (
1592     OBJGPU    *pGpu0,
1593     KernelBus *pKernelBus0,
1594     OBJGPU    *pGpu1,
1595     KernelBus *pKernelBus1,
1596     NvU32     *peer0,
1597     NvU32     *peer1,
1598     NvU32      attributes
1599 )
1600 {
1601     NvU32              gpu0Instance   = gpuGetInstance(pGpu0);
1602     NvU32              gpu1Instance   = gpuGetInstance(pGpu1);
1603     NvU32              c2cPeer0;
1604     NvU32              c2cPeer1;
1605     NV_STATUS          status;
1606 
1607     if (IS_VIRTUAL(pGpu0) || IS_VIRTUAL(pGpu1))
1608     {
1609         return NV_ERR_NOT_SUPPORTED;
1610     }
1611 
1612     if (peer0 == NULL || peer1 == NULL)
1613     {
1614         return NV_ERR_INVALID_ARGUMENT;
1615     }
1616 
1617     if ((*peer0 != BUS_INVALID_PEER && *peer0 >= P2P_MAX_NUM_PEERS) ||
1618         (*peer1 != BUS_INVALID_PEER && *peer1 >= P2P_MAX_NUM_PEERS))
1619     {
1620         return NV_ERR_INVALID_ARGUMENT;
1621     }
1622 
1623     c2cPeer0 = *peer0;
1624     c2cPeer1 = *peer1;
1625 
1626     // Get the peer ID pGpu0 should use for P2P over C2C to pGpu1
1627     if ((status = _kbusGetC2CP2PPeerId(pGpu0, pKernelBus0,
1628                                        pGpu1, pKernelBus1,
1629                                        &c2cPeer0)) != NV_OK)
1630     {
1631         return status;
1632     }
1633 
1634     // Get the peer ID pGpu1 should use for P2P over C2C to pGpu0
1635     if ((status = _kbusGetC2CP2PPeerId(pGpu1, pKernelBus1,
1636                                        pGpu0, pKernelBus0,
1637                                        &c2cPeer1)) != NV_OK)
1638     {
1639         return status;
1640     }
1641 
1642     if ((c2cPeer0 == BUS_INVALID_PEER) || (c2cPeer1 == BUS_INVALID_PEER))
1643     {
1644         NV_PRINTF(LEVEL_ERROR, "Failed to create C2C P2P mapping between GPU%u and GPU%u\n",
1645                                 gpu0Instance, gpu1Instance);
1646 
1647         return NV_ERR_INVALID_REQUEST;
1648     }
1649 
1650     *peer0 = c2cPeer0;
1651     *peer1 = c2cPeer1;
1652 
1653     //
1654     // Does the mapping already exist between the given pair of GPUs using the peerIDs
1655     // *peer0 and *peer1 respectively ?
1656     //
1657     if ((pKernelBus0->c2cPeerInfo.busC2CPeerNumberMask[gpu1Instance] & NVBIT(*peer0)) &&
1658         (pKernelBus1->c2cPeerInfo.busC2CPeerNumberMask[gpu0Instance] & NVBIT(*peer1)))
1659     {
1660         //
1661         // Increment the mapping refcount per peerID - since there is another usage
1662         // of a mapping that is using this peerID
1663         //
1664         pKernelBus0->c2cPeerInfo.busC2CMappingRefcountPerPeerId[*peer0]++;
1665         pKernelBus1->c2cPeerInfo.busC2CMappingRefcountPerPeerId[*peer1]++;
1666 
1667         NV_PRINTF(LEVEL_INFO,
1668                   "- P2P: Peer mapping is already in use for gpu instances %x and %x "
1669                   "with peer id's %d and %d. Increasing the mapping refcounts for the"
1670                   " peer IDs to %d and %d respectively.\n",
1671                   gpu0Instance, gpu1Instance, *peer0, *peer1,
1672                   pKernelBus0->c2cPeerInfo.busC2CMappingRefcountPerPeerId[*peer0],
1673                   pKernelBus1->c2cPeerInfo.busC2CMappingRefcountPerPeerId[*peer1]);
1674 
1675         return NV_OK;
1676     }
1677 
1678     //
1679     // Reached here implies the mapping between the given pair of GPUs using the peerIDs
1680     // *peer0 and *peer1 does not exist. Create the mapping
1681     //
1682 
1683     NV_ASSERT_OK_OR_RETURN(_kbusCreateC2CPeerMapping(pGpu0, pKernelBus0, pGpu1, *peer0));
1684     NV_ASSERT_OK_OR_RETURN(_kbusCreateC2CPeerMapping(pGpu1, pKernelBus1, pGpu0, *peer1));
1685 
1686     NV_PRINTF(LEVEL_INFO,
1687               "added C2C P2P mapping between GPU%u (peer %u) and GPU%u (peer %u)\n",
1688               gpu0Instance, *peer0, gpu1Instance, *peer1);
1689 
1690     return NV_OK;
1691 }
1692 
1693 /*!
1694  * @brief Remove C2C mapping to a given peer GPU
1695  *
1696  * @param[in]   pGpu0          (local GPU)
1697  * @param[in]   pKernelBus0    (local GPU)
1698  * @param[in]   pGpu1          (remote GPU)
1699  * @param[in]   peerId         Peer ID for local to remote GPU
1700  *
1701  * return NV_OK on success
1702  */
1703 static NV_STATUS
1704 _kbusRemoveC2CPeerMapping
1705 (
1706     OBJGPU    *pGpu0,
1707     KernelBus *pKernelBus0,
1708     OBJGPU    *pGpu1,
1709     NvU32      peerId
1710 )
1711 {
1712     NV_STATUS          status          = NV_OK;
1713 
1714     if (IS_VIRTUAL(pGpu0) || IS_VIRTUAL(pGpu1))
1715     {
1716         return NV_ERR_NOT_SUPPORTED;
1717     }
1718 
1719     return status;
1720 }
1721 
1722 /*!
1723  * @brief Remove the C2C P2P mapping to a pair of GPU
1724  *
1725  * @param[in]   pGpu0          (local GPU)
1726  * @param[in]   pKernelBus0    (local GPU)
1727  * @param[in]   pGpu1          (remote GPU)
1728  * @param[in]   pKernelBus1    (remote GPU)
1729  * @param[in]   peer0          Peer ID (local to remote)
1730  * @param[in]   peer1          Peer ID (remote to local)
1731  *
1732  * return NV_OK on success
1733  */
1734 NV_STATUS
1735 kbusRemoveP2PMappingForC2C_GH100
1736 (
1737     OBJGPU    *pGpu0,
1738     KernelBus *pKernelBus0,
1739     OBJGPU    *pGpu1,
1740     KernelBus *pKernelBus1,
1741     NvU32      peer0,
1742     NvU32      peer1,
1743     NvU32      attributes
1744 )
1745 {
1746     NV_STATUS          status        = NV_OK;
1747 
1748     // Check if there's C2C mapping
1749     if (((pKernelBus0->c2cPeerInfo.busC2CPeerNumberMask[pGpu1->gpuInstance] & NVBIT(peer0)) == 0) ||
1750         ((pKernelBus1->c2cPeerInfo.busC2CPeerNumberMask[pGpu0->gpuInstance] & NVBIT(peer1)) == 0))
1751     {
1752         return NV_ERR_INVALID_STATE;
1753     }
1754 
1755     // C2C mapping exists, remove the C2C mapping
1756     NV_ASSERT_OK_OR_RETURN(_kbusRemoveC2CPeerMapping(pGpu0, pKernelBus0, pGpu1, peer0));
1757     NV_ASSERT_OK_OR_RETURN(_kbusRemoveC2CPeerMapping(pGpu1, pKernelBus1, pGpu0, peer1));
1758 
1759     return status;
1760 }
1761 
1762 NvBool
1763 kbusNeedStaticBar1Mapping_GH100(OBJGPU *pGpu, KernelBus *pKernelBus)
1764 {
1765     KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
1766 
1767     // Check if BAR1 P2P is enabled by a regkey
1768     if (pKernelBif->forceP2PType != NV_REG_STR_RM_FORCE_P2P_TYPE_BAR1P2P)
1769     {
1770         return NV_FALSE;
1771     }
1772 
1773     // We need static Bar1 only when the GPU is BAR1 P2P capable.
1774     return kbusIsBar1P2PCapable(pKernelBus);
1775 }
1776 
1777 /*!
1778  * @brief Setup static Bar1 mapping.
1779  *
1780  * @param[in]   pGpu                GPU pointer
1781  * @param[in]   pKernelBus          Kernel bus pointer
1782  * @param[in]   reservedFbSize      The size to reserve in FB from the address 0
1783  * @param[in]   gfid                The GFID
1784  *
1785  * @returns NV_OK on success, or rm_status from called functions on failure.
1786  */
1787 NV_STATUS
1788 kbusEnableStaticBar1Mapping_GH100
1789 (
1790     OBJGPU *pGpu,
1791     KernelBus *pKernelBus,
1792     NvU64 reservedFbSize,
1793     NvU32 gfid
1794 )
1795 {
1796     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
1797     MEMORY_DESCRIPTOR *pMemDesc = NULL;
1798     MEMORY_DESCRIPTOR *pDmaMemDesc = NULL;
1799     NvU64 bar1Size = kbusGetPciBarSize(pKernelBus, 1);
1800     NV_STATUS status = NV_OK;
1801     OBJEHEAP   *pVASpaceHeap = vaspaceGetHeap(pKernelBus->bar1[gfid].pVAS);
1802     NvU64 bar1Offset = RM_ALIGN_UP(reservedFbSize, RM_PAGE_SIZE_2M);
1803     NvU64 bar1MapSize;
1804     NvU64 staticBar1PhyAddr;
1805 
1806     if (!kbusNeedStaticBar1Mapping_HAL(pGpu, pKernelBus))
1807     {
1808         return NV_ERR_INVALID_STATE;
1809     }
1810 
1811     NV_PRINTF(LEVEL_INFO, "Static bar1 size 0x%llx fb size 0x%llx\n",
1812                            bar1Size, pMemoryManager->Ram.fbUsableMemSize);
1813 
1814     // BAR1 VA size can be smaller than BAR1 size
1815     bar1MapSize = NV_MIN(bar1Size, pVASpaceHeap->rangeHi);
1816     bar1MapSize = NV_MIN(bar1MapSize, pMemoryManager->Ram.fbUsableMemSize);
1817 
1818     NV_ASSERT_OR_RETURN(bar1MapSize > bar1Offset, NV_ERR_INVALID_STATE);
1819 
1820     // Adjust the offset
1821     bar1MapSize -= bar1Offset;
1822 
1823     //
1824     // GPU BAR1 VA also supports the SYSMEM mapping, we need to reserve some
1825     // spaces for such cases, like doorbell mapping which is not backed by
1826     // FBMEM.
1827     //
1828     if ((bar1Size - (bar1MapSize + bar1Offset)) < (4 * RM_PAGE_SIZE_2M))
1829     {
1830         //
1831         // When BAR1 size much bigger than FB, then there are plenty of
1832         // VA space left for other type of mapping.
1833         // When BAR1 size is slightly bigger or equal FB, the available
1834         // BAR1 VA is very limited.
1835         // Here reserves 4 * 2MB blocks.
1836         // !!! NOTE: Not sure how big Rm need to reserve
1837         // TODO: Need to find a better solution, bug 3869651
1838         //
1839         bar1MapSize -= 4 * RM_PAGE_SIZE_2M;
1840 
1841         NV_PRINTF(LEVEL_INFO, "Static bar1 reserved 8 MB from the top of FB\n");
1842     }
1843 
1844     // align to 2MB page size
1845     bar1MapSize  = RM_ALIGN_UP(bar1MapSize, RM_PAGE_SIZE_2M);
1846 
1847     //
1848     // The static mapping is not backed by an allocated physical FB.
1849     // Here RM describes the memory for the static mapping.
1850     //
1851     NV_ASSERT_OK_OR_RETURN(memdescCreate(&pMemDesc, pGpu, bar1MapSize, 0,
1852                                          NV_MEMORY_CONTIGUOUS, ADDR_FBMEM,
1853                                          NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE));
1854 
1855     memdescDescribe(pMemDesc, ADDR_FBMEM, bar1Offset, bar1MapSize);
1856 
1857     // Set to use RM_PAGE_SIZE_HUGE, 2MB
1858     memdescSetPageSize(pMemDesc, AT_GPU, RM_PAGE_SIZE_HUGE);
1859 
1860     // Setup GMK PTE type for this memory
1861     memdescSetPteKind(pMemDesc, NV_MMU_PTE_KIND_GENERIC_MEMORY);
1862 
1863     // Deploy the static mapping.
1864     NV_ASSERT_OK_OR_GOTO(status,
1865                          kbusMapFbAperture_HAL(pGpu, pKernelBus, pMemDesc, 0,
1866                              &bar1Offset, &bar1MapSize,
1867                              BUS_MAP_FB_FLAGS_MAP_UNICAST | BUS_MAP_FB_FLAGS_MAP_OFFSET_FIXED,
1868                              NV01_NULL_OBJECT),
1869                          cleanup_mem);
1870 
1871     // Get the system physical address the base address of staticBar1
1872     staticBar1PhyAddr = gpumgrGetGpuPhysFbAddr(pGpu) + bar1Offset;
1873 
1874     //
1875     // Create a memory descriptor to describe a SYSMEM target of the GPU
1876     // BAR1 region. This memDesc will be used for P2P DMA related mapping.
1877     //
1878     NV_ASSERT_OK_OR_GOTO(status,
1879                          memdescCreate(&pDmaMemDesc,
1880                                        pGpu,
1881                                        bar1MapSize,
1882                                        0,
1883                                        NV_MEMORY_CONTIGUOUS,
1884                                        ADDR_SYSMEM,
1885                                        NV_MEMORY_UNCACHED,
1886                                        MEMDESC_FLAGS_NONE),
1887                         cleanup_bus_map);
1888 
1889     memdescDescribe(pDmaMemDesc, ADDR_SYSMEM, staticBar1PhyAddr, bar1MapSize);
1890 
1891     pKernelBus->bar1[gfid].bStaticBar1Enabled = NV_TRUE;
1892     pKernelBus->bar1[gfid].staticBar1.pVidMemDesc = pMemDesc;
1893     pKernelBus->bar1[gfid].staticBar1.pDmaMemDesc = pDmaMemDesc;
1894     pKernelBus->bar1[gfid].staticBar1.base = bar1Offset;
1895     pKernelBus->bar1[gfid].staticBar1.size = bar1MapSize;
1896 
1897     NV_PRINTF(LEVEL_INFO, "Static bar1 mapped offset 0x%llx size 0x%llx\n",
1898                            bar1Offset, bar1MapSize);
1899 
1900     return NV_OK;
1901 
1902 cleanup_bus_map:
1903     NV_ASSERT_OK(kbusUnmapFbAperture_HAL(pGpu, pKernelBus,
1904                                          pMemDesc, bar1Offset, bar1MapSize,
1905                                          BUS_MAP_FB_FLAGS_MAP_UNICAST |
1906                                          BUS_MAP_FB_FLAGS_MAP_OFFSET_FIXED));
1907 
1908 cleanup_mem:
1909     NV_PRINTF(LEVEL_ERROR, "Failed to create the static bar1 mapping offset"
1910                            "0x%llx size 0x%llx\n", bar1Offset, bar1MapSize);
1911 
1912     pKernelBus->bar1[gfid].bStaticBar1Enabled = NV_FALSE;
1913     pKernelBus->bar1[gfid].staticBar1.pVidMemDesc = NULL;
1914     pKernelBus->bar1[gfid].staticBar1.pDmaMemDesc = NULL;
1915 
1916     memdescDestroy(pDmaMemDesc);
1917     memdescDestroy(pMemDesc);
1918 
1919     return status;
1920 }
1921 
1922 /*!
1923  * @brief tear down static Bar1 mapping.
1924  *
1925  * @param[in]   pGpu                GPU pointer
1926  * @param[in]   pKernelBus          Kernel bus pointer
1927  * @param[in]   gfid                The GFID
1928  *
1929  * @returns NV_OK on success, or rm_status from called functions on failure.
1930  */
1931 NV_STATUS
1932 kbusDisableStaticBar1Mapping_GH100(OBJGPU *pGpu, KernelBus *pKernelBus, NvU32 gfid)
1933 {
1934     if (pKernelBus->bar1[gfid].bStaticBar1Enabled)
1935     {
1936         if (pKernelBus->bar1[gfid].staticBar1.pVidMemDesc != NULL)
1937         {
1938             NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
1939                                   kbusUnmapFbAperture_HAL(pGpu, pKernelBus,
1940                                      pKernelBus->bar1[gfid].staticBar1.pVidMemDesc,
1941                                      pKernelBus->bar1[gfid].staticBar1.base,
1942                                      pKernelBus->bar1[gfid].staticBar1.size,
1943                                      BUS_MAP_FB_FLAGS_MAP_UNICAST | BUS_MAP_FB_FLAGS_MAP_OFFSET_FIXED));
1944 
1945             memdescDestroy(pKernelBus->bar1[gfid].staticBar1.pVidMemDesc);
1946 
1947             pKernelBus->bar1[gfid].staticBar1.pVidMemDesc = NULL;
1948         }
1949 
1950         memdescDestroy(pKernelBus->bar1[gfid].staticBar1.pDmaMemDesc);
1951         pKernelBus->bar1[gfid].staticBar1.pDmaMemDesc = NULL;
1952 
1953         pKernelBus->bar1[gfid].bStaticBar1Enabled = NV_FALSE;
1954     }
1955 
1956     return NV_OK;
1957 }
1958 
1959 /*!
1960  * @brief To update the StaticBar1 PTE kind for the specified memory.
1961  *
1962  *        The staticbar1 only support GMK (generic memory kind) and other compressed kind.
1963  *        By default, the bar1 is statically mapped with GMK at boot when the static bar1 is enabled.
1964  *
1965  *        When to map a uncompressed kind memory, RM just return the static bar1 address which is mapped
1966  *        to the specified memory.
1967  *
1968  *        When to map a compressed kind memory, RM must call this function to change the static mapped
1969  *        bar1 range to the specified memory from GMK to the compressed kind. And RM needs to
1970  *        call this function to change it back to GMK from the compressed kind after this mapping is released.
1971  *
1972  * @param[in]   pGpu            GPU pointer
1973  * @param[in]   pKernelBus      Kernel bus pointer
1974  * @param[in]   pMemDesc        The memory to update
1975  * @param[in]   offset          The offset of the memory to update
1976  * @param[in]   length          The length of the memory to update
1977  * @param[in]   bRelease        Call to release the mapping
1978  * @param[in]   gfid            The GFID
1979  *
1980  * return NV_OK on success
1981  */
1982 NV_STATUS
1983 _kbusUpdateStaticBAR1VAMapping_GH100
1984 (
1985     OBJGPU *pGpu,
1986     KernelBus *pKernelBus,
1987     MEMORY_DESCRIPTOR  *pMemDesc,
1988     NvU64   offset,
1989     NvU64   length,
1990     NvBool  bRelease,
1991     NvU32   gfid
1992 )
1993 {
1994     NV_STATUS           status = NV_OK;
1995     VirtMemAllocator   *pDma = GPU_GET_DMA(pGpu);
1996     MemoryManager      *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
1997     OBJVASPACE         *pVAS = pKernelBus->bar1[gfid].pVAS;
1998     NvU32               kind;
1999     MEMORY_DESCRIPTOR  *pTempMemDesc;
2000     NvU64               vAddr;
2001     NvU64               vaLo;
2002     NvU64               vaHi;
2003     NvU64               physAddr;
2004     NvU64               pageOffset;
2005     NvU64               mapLength;
2006     NvU64               pageSize;
2007     DMA_PAGE_ARRAY      pageArray = {0};
2008     COMPR_INFO          comprInfo;
2009     NvBool              bCompressed;
2010 
2011     NV_ASSERT_OR_RETURN(pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT);
2012 
2013     NV_ASSERT_OR_RETURN(memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM, NV_ERR_INVALID_ARGUMENT);
2014 
2015     // It only support contiguous memory
2016     NV_ASSERT_OR_RETURN(memdescGetPteArraySize(pMemDesc, AT_GPU) == 1, NV_ERR_INVALID_ARGUMENT);
2017 
2018     pTempMemDesc = memdescGetMemDescFromGpu(pMemDesc, pGpu);
2019 
2020     pageSize = memdescGetPageSize(pTempMemDesc, VAS_ADDRESS_TRANSLATION(pVAS));
2021 
2022     NV_ASSERT_OK_OR_RETURN(memmgrGetKindComprFromMemDesc(pMemoryManager, pTempMemDesc, 0, &kind, &comprInfo));
2023     bCompressed = memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, kind);
2024 
2025     // Static BAR1 mapping only support >=2MB page size for compressed memory
2026     NV_CHECK_OR_RETURN(LEVEL_WARNING, bCompressed && (pageSize >= RM_PAGE_SIZE_HUGE), NV_ERR_INVALID_STATE);
2027 
2028     if (bRelease)
2029     {
2030         // update the PTE kind to be the uncompressed kind
2031         comprInfo.kind = memmgrGetUncompressedKind_HAL(pGpu, pMemoryManager, kind, NV_FALSE);
2032     }
2033 
2034     // Under static BAR1 mapping, BAR1 VA equal to physAddr
2035     physAddr    = memdescGetPhysAddr(pTempMemDesc, VAS_ADDRESS_TRANSLATION(pVAS), offset);
2036     vAddr       = RM_ALIGN_DOWN(physAddr, pageSize);
2037 
2038     pageOffset  = physAddr & (pageSize - 1);
2039     mapLength   = RM_ALIGN_UP(pageOffset + length, pageSize);
2040 
2041     vaLo        = vAddr;
2042     vaHi        = vaLo + mapLength - 1;
2043 
2044     pageArray.count = 1;
2045     pageArray.pData = &physAddr;
2046 
2047     status = dmaUpdateVASpace_HAL(pGpu, pDma, pVAS,
2048                                   pTempMemDesc, NULL,
2049                                   vaLo, vaHi,
2050                                   DMA_UPDATE_VASPACE_FLAGS_UPDATE_KIND, // only change KIND
2051                                   &pageArray, 0,
2052                                   &comprInfo, 0,
2053                                   NV_MMU_VER3_PTE_VALID_TRUE,
2054                                   NV_MMU_VER3_PTE_APERTURE_VIDEO_MEMORY,
2055                                   BUS_INVALID_PEER,
2056                                   NVLINK_INVALID_FABRIC_ADDR,
2057                                   DMA_TLB_INVALIDATE,
2058                                   NV_FALSE,
2059                                   pageSize);
2060 
2061     if (status != NV_OK)
2062     {
2063         NV_PRINTF(LEVEL_ERROR, "error updating static bar1 VA space.\n");
2064     }
2065 
2066     return status;
2067 }
2068 
2069 /*!
2070  * @brief To unmap FB aperture for the specified memory under the static mapping.
2071  *
2072  * @param[in]   pGpu            GPU pointer
2073  * @param[in]   pKernelBus      Kernel bus pointer
2074  * @param[in]   pMemDesc        The memory to update
2075  * @param[in]   gfid            The GFID
2076  *
2077  * return NV_OK on success
2078  */
2079 NV_STATUS
2080 kbusStaticUnmapFbAperture_GH100
2081 (
2082     OBJGPU             *pGpu,
2083     KernelBus          *pKernelBus,
2084     MEMORY_DESCRIPTOR  *pMemDesc,
2085     NvU32               gfid
2086 )
2087 {
2088     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2089     NvBool bCompressedkind = memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE,
2090                                               memdescGetPteKind(pMemDesc));
2091 
2092     //
2093     // For uncompressed type, nothing to do
2094     // For compressed type, restore PTE kind to GMK
2095     //
2096     if (bCompressedkind)
2097     {
2098         NV_ASSERT_OK_OR_RETURN(_kbusUpdateStaticBAR1VAMapping_GH100(pGpu, pKernelBus,
2099                                pMemDesc, 0, memdescGetSize(pMemDesc), NV_TRUE, gfid));
2100     }
2101 
2102     // Nothing else to do on static mapping mode
2103     NV_PRINTF(LEVEL_INFO,
2104               "StaticBar1 unmapped at 0x%llx size 0x%llx%s\n",
2105               memdescGetPhysAddr(pMemDesc, AT_GPU, 0),
2106               memdescGetSize(pMemDesc),
2107               bCompressedkind ? " [compressed]" : "");
2108 
2109     return NV_OK;
2110 }
2111 
2112 /*!
2113  * @brief To map FB aperture for the specified memory under the static mapping.
2114  *
2115  * @param[in]   pGpu            GPU pointer
2116  * @param[in]   pKernelBus      Kernel bus pointer
2117  * @param[in]   pMemDesc        The memory to update
2118  * @param[in]   offset          The offset of the memory to map
2119  * @param[out]  pAperOffset     The Fb Aperture(BAR1) offset of the mapped vidmem
2120  * @param[in]   pLength         The size of vidmem to map
2121  * @param[in]   gfid            The GFID
2122  *
2123  * return NV_OK on success
2124  */
2125 NV_STATUS
2126 kbusStaticMapFbAperture_GH100
2127 (
2128     OBJGPU     *pGpu,
2129     KernelBus  *pKernelBus,
2130     MEMORY_DESCRIPTOR *pMemDesc,
2131     NvU64       offset,
2132     NvU64      *pAperOffset,
2133     NvU64      *pLength,
2134     NvU32       gfid
2135 )
2136 {
2137     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2138     NvU64 physAddr;
2139     NvU32 status = NV_OK;
2140     NvBool bCompressedkind = memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_COMPRESSIBLE, memdescGetPteKind(pMemDesc));
2141 
2142     // It only support contiguous memory
2143     NV_ASSERT_OR_RETURN(memdescGetPteArraySize(pMemDesc, AT_GPU) == 1, NV_ERR_INVALID_ARGUMENT);
2144 
2145     physAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, offset);
2146 
2147     if (physAddr < pKernelBus->bar1[gfid].staticBar1.base ||
2148         physAddr + *pLength >= pKernelBus->bar1[gfid].staticBar1.size)
2149     {
2150         NV_PRINTF(LEVEL_ERROR,
2151                   "0x%llx + 0x%llx is out of the range of the StaticBar1 map [0x%llx, 0x%llx]\n",
2152                   physAddr, *pLength, pKernelBus->bar1[gfid].staticBar1.base,
2153                   pKernelBus->bar1[gfid].staticBar1.base + pKernelBus->bar1[gfid].staticBar1.size);
2154 
2155         return NV_ERR_INVALID_ARGUMENT;
2156     }
2157 
2158     if (bCompressedkind)
2159     {
2160         // Update PTE to be the compressed kind
2161         NV_ASSERT_OK_OR_RETURN(_kbusUpdateStaticBAR1VAMapping_GH100(pGpu, pKernelBus, pMemDesc,
2162                                                                     offset, *pLength, NV_FALSE, gfid));
2163     }
2164 
2165     // When the static bar1 enabled, the Fb aperture offset is the physical address.
2166     *pAperOffset = physAddr;
2167 
2168     NV_PRINTF(LEVEL_INFO, "StaticBar1 mapped at 0x%llx size 0x%llx%s\n",
2169                           physAddr, *pLength,
2170                           bCompressedkind ? " [compressed]" : "");
2171 
2172     return status;
2173 }
2174 
2175 void
2176 kbusWriteP2PWmbTag_GH100
2177 (
2178     OBJGPU    *pGpu,
2179     KernelBus *pKernelBus,
2180     NvU32      remote2Local,
2181     NvU64      p2pWmbTag
2182 )
2183 {
2184     // See bug 3558208 comment 34 and 50
2185     GPU_REG_RD32(pGpu, NV_XAL_EP_P2P_WREQMB_L(remote2Local));
2186     GPU_REG_WR32(pGpu, NV_XAL_EP_P2P_WREQMB_L(remote2Local), NvU64_LO32(p2pWmbTag));
2187     GPU_REG_WR32(pGpu, NV_XAL_EP_P2P_WREQMB_H(remote2Local), NvU64_HI32(p2pWmbTag));
2188 }
2189 
2190 /*!
2191  * @brief Determine FLA Base and Size for direct-connected and NvSwitch systems.
2192  *
2193  * @param[in]  base       VASpace base
2194  * @param[in]  size       VASpace size
2195  *
2196  * @return NV_OK if successful
2197  */
2198 NV_STATUS
2199 kbusDetermineFlaRangeAndAllocate_GH100
2200 (
2201     OBJGPU    *pGpu,
2202     KernelBus *pKernelBus,
2203     NvU64      base,
2204     NvU64      size
2205 )
2206 {
2207     NV_STATUS      status        = NV_OK;
2208 
2209     OBJSYS *pSys = SYS_GET_INSTANCE();
2210 
2211     if ((pSys->getProperty(pSys, PDB_PROP_SYS_NVSWITCH_IS_PRESENT) ||
2212          GPU_IS_NVSWITCH_DETECTED(pGpu)) && !gpuFabricProbeIsSupported(pGpu))
2213     {
2214         return kbusDetermineFlaRangeAndAllocate_GA100(pGpu, pKernelBus, base, size);
2215     }
2216 
2217     NV_ASSERT_OK_OR_RETURN(kbusAllocateFlaVaspace_HAL(pGpu, pKernelBus, 0x0, NVBIT64(52)));
2218 
2219     return status;
2220 }
2221 
2222 /*!
2223  * @brief Sets up the Fabric FLA state for the GPU. This function will allocate fabric VASpace,
2224  *        allocates PDB for fabric VAS, allocates instance block and initialize with
2225  *        fabric VAS and binds the instance block to HW.
2226  *
2227  * @param[in]  base       VASpace base
2228  * @param[in]  size       VASpace size
2229  *
2230  * @return NV_OK if successful
2231  */
2232 NV_STATUS
2233 kbusAllocateFlaVaspace_GH100
2234 (
2235     OBJGPU    *pGpu,
2236     KernelBus *pKernelBus,
2237     NvU64      base,
2238     NvU64      size
2239 )
2240 {
2241     NV_STATUS    status = NV_OK;
2242     OBJVMM      *pVmm   = SYS_GET_VMM(SYS_GET_INSTANCE());
2243     KernelGmmu  *pKernelGmmu  = GPU_GET_KERNEL_GMMU(pGpu);
2244     INST_BLK_INIT_PARAMS pInstblkParams = {0};
2245     FABRIC_VASPACE *pFabricVAS;
2246     RM_API   *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
2247 
2248     NV_ASSERT_OR_RETURN(pGpu != NULL, NV_ERR_INVALID_ARGUMENT);
2249     NV_ASSERT_OR_RETURN(size != 0, NV_ERR_INVALID_ARGUMENT);
2250     NV_ASSERT_OR_RETURN(!pKernelBus->flaInfo.bFlaAllocated, NV_ERR_INVALID_ARGUMENT);
2251 
2252     pKernelBus->flaInfo.base = base;
2253     pKernelBus->flaInfo.size = size;
2254 
2255     OBJSYS *pSys = SYS_GET_INSTANCE();
2256 
2257     if ((pSys->getProperty(pSys, PDB_PROP_SYS_NVSWITCH_IS_PRESENT) ||
2258          GPU_IS_NVSWITCH_DETECTED(pGpu)) && !gpuFabricProbeIsSupported(pGpu))
2259     {
2260         return kbusAllocateFlaVaspace_GA100(pGpu, pKernelBus, base, size);
2261     }
2262 
2263     // TODO: Remove allocating legaccy FLA Vaspace once CUDA removes the dependency
2264     NV_ASSERT_OK_OR_RETURN(kbusAllocateLegacyFlaVaspace_HAL(pGpu, pKernelBus, base, size));
2265 
2266     // Allocate a FABRIC_VASPACE_A object
2267     status = vmmCreateVaspace(pVmm, FABRIC_VASPACE_A, pGpu->gpuId, gpumgrGetGpuMask(pGpu),
2268                               base, base + size - 1, 0, 0, NULL, 0,
2269                               &pGpu->pFabricVAS);
2270 
2271     if (status != NV_OK)
2272     {
2273         NV_PRINTF(LEVEL_ERROR, "failed allocating fabric vaspace, status=0x%x\n",
2274                   status);
2275         goto cleanup;
2276     }
2277 
2278     // Pin the VASPACE page directory for pFabricVAS before writing the instance block
2279     status = vaspacePinRootPageDir(pGpu->pFabricVAS, pGpu);
2280     if (status != NV_OK)
2281     {
2282         NV_PRINTF(LEVEL_ERROR, "failed pinning down fabric vaspace, status=0x%x\n",
2283                     status);
2284         goto cleanup;
2285     }
2286 
2287     // Construct instance block
2288     status = kbusConstructFlaInstBlk_HAL(pGpu, pKernelBus, GPU_GFID_PF);
2289     if (status != NV_OK)
2290     {
2291         NV_PRINTF(LEVEL_ERROR,
2292                 "failed constructing instblk for FLA, status=0x%x\n",
2293                 status);
2294         goto unpin_rootpagedir;
2295     }
2296 
2297     pFabricVAS = dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE);
2298 
2299     // Instantiate Inst Blk for pFlaVAS
2300     status = kgmmuInstBlkInit(pKernelGmmu,
2301                                 pKernelBus->flaInfo.pInstblkMemDesc,
2302                                 pFabricVAS->pGVAS, FIFO_PDB_IDX_BASE,
2303                                 &pInstblkParams);
2304     if (status != NV_OK)
2305     {
2306         NV_PRINTF(LEVEL_ERROR,
2307                 "failed instantiating instblk for FLA, status=0x%x\n",
2308                 status);
2309         goto free_instblk;
2310     }
2311 
2312     //
2313     // For SRIOV PF/VF system, always check for P2P allocation to determine whether
2314     // this function is allowed to bind FLA
2315     //
2316     if (gpuIsSriovEnabled(pGpu) || IS_VIRTUAL(pGpu))
2317     {
2318         if (gpuCheckIsP2PAllocated_HAL(pGpu))
2319         {
2320             status = kbusSetupBindFla(pGpu, pKernelBus, pGpu->sriovState.pP2PInfo->gfid);
2321         }
2322         else
2323         {
2324             NV_PRINTF(LEVEL_INFO, "Skipping binding FLA, because no P2P GFID is"
2325                       " validated yet\n");
2326         }
2327     }
2328     else
2329     {
2330         status = kbusSetupBindFla(pGpu, pKernelBus, GPU_GFID_PF);
2331     }
2332 
2333     if (status != NV_OK)
2334     {
2335         NV_PRINTF(LEVEL_ERROR,
2336                   "failed binding instblk for FLA, status=0x%x\n", status);
2337         goto free_instblk;
2338     }
2339     if (GPU_GET_KERNEL_NVLINK(pGpu) != NULL)
2340     {
2341         NVLINK_INBAND_MSG_CALLBACK inbandMsgCbParams;
2342 
2343         inbandMsgCbParams.messageType = NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_RSP;
2344         inbandMsgCbParams.pCallback = &memorymulticastfabricTeamSetupResponseCallback;
2345         inbandMsgCbParams.wqItemFlags = OS_QUEUE_WORKITEM_FLAGS_LOCK_SEMA |
2346                                         OS_QUEUE_WORKITEM_FLAGS_LOCK_GPUS_RW;
2347 
2348         status = knvlinkRegisterInbandCallback(pGpu,
2349                                                GPU_GET_KERNEL_NVLINK(pGpu),
2350                                                &inbandMsgCbParams);
2351         if (status != NV_OK)
2352         {
2353             NV_PRINTF(LEVEL_ERROR, "GPU (ID: %d) Registering Inband Cb failed\n",
2354                     gpuGetInstance(pGpu));
2355             goto free_instblk;
2356         }
2357 
2358     }
2359 
2360     // setup Unicast FLA range in Fabric VAS object
2361     if (!GPU_IS_NVSWITCH_DETECTED(pGpu))
2362     {
2363         size = gpuGetFlaVasSize_HAL(pGpu, NV_FALSE);
2364         base = pGpu->gpuInstance * size;
2365 
2366         NV_ASSERT_OK_OR_GOTO(status, fabricvaspaceInitUCRange(
2367                                      dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE), pGpu,
2368                                      base, size), free_instblk);
2369     }
2370 
2371     pKernelBus->flaInfo.bFlaAllocated       = NV_TRUE;
2372 
2373     return NV_OK;
2374 
2375 free_instblk:
2376     kbusDestructFlaInstBlk_HAL(pGpu, pKernelBus);
2377 
2378 unpin_rootpagedir:
2379     if (pGpu->pFabricVAS != NULL)
2380     {
2381         vaspaceUnpinRootPageDir(pGpu->pFabricVAS, pGpu);
2382     }
2383 
2384 cleanup:
2385     if (pGpu->pFabricVAS != NULL)
2386     {
2387         vmmDestroyVaspace(pVmm, pGpu->pFabricVAS);
2388         pGpu->pFabricVAS = NULL;
2389     }
2390 
2391     // TODO: remove this once legacy FLA VAS support is removed.
2392     pRmApi->Free(pRmApi, pKernelBus->flaInfo.hClient, pKernelBus->flaInfo.hClient);
2393 
2394     pKernelBus->flaInfo.bFlaAllocated = NV_FALSE;
2395 
2396     NV_PRINTF(LEVEL_ERROR, "failed allocating FLA VASpace status=0x%x\n",
2397               status);
2398 
2399     return status;
2400 }
2401 
2402 void
2403 kbusDestroyFla_GH100
2404 (
2405     OBJGPU    *pGpu,
2406     KernelBus *pKernelBus
2407 )
2408 {
2409     OBJSYS *pSys   = SYS_GET_INSTANCE();
2410     OBJVMM *pVmm   = SYS_GET_VMM(pSys);
2411     RM_API   *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
2412 
2413     if (pGpu->pFabricVAS != NULL)
2414     {
2415         if (pKernelBus->flaInfo.bFlaBind)
2416         {
2417             if (IS_VIRTUAL(pGpu) || IS_GSP_CLIENT(pGpu))
2418             {
2419                 kbusSetupUnbindFla_HAL(pGpu, pKernelBus);
2420             }
2421         }
2422 
2423         if (pKernelBus->flaInfo.bFlaAllocated)
2424         {
2425             vaspaceUnpinRootPageDir(pGpu->pFabricVAS, pGpu);\
2426             kbusDestructFlaInstBlk_HAL(pGpu, pKernelBus);
2427             vmmDestroyVaspace(pVmm, pGpu->pFabricVAS);
2428 
2429             pGpu->pFabricVAS = NULL;
2430             // TODO: Remove this once legacy FLA  VAS support is deprecated
2431             pRmApi->Free(pRmApi, pKernelBus->flaInfo.hClient, pKernelBus->flaInfo.hClient);
2432             portMemSet(&pKernelBus->flaInfo, 0, sizeof(pKernelBus->flaInfo));
2433             if (GPU_GET_KERNEL_NVLINK(pGpu) != NULL)
2434              {
2435                 // Unregister the receive callback
2436                 NV_ASSERT_OK(knvlinkUnregisterInbandCallback(pGpu, GPU_GET_KERNEL_NVLINK(pGpu),
2437                                 NVLINK_INBAND_MSG_TYPE_MC_TEAM_SETUP_RSP));
2438              }
2439         }
2440     }
2441 }
2442 
2443 /*!
2444  * @brief Helper function to extract information from FLA data structure and
2445  *        to trigger RPC to Physical RM to BIND FLA VASpace
2446  *
2447  * @param[in]  gfid     GFID
2448  *
2449  * @return NV_OK if successful
2450  */
2451 NV_STATUS
2452 kbusSetupBindFla_GH100
2453 (
2454     OBJGPU    *pGpu,
2455     KernelBus *pKernelBus,
2456     NvU32      gfid
2457 )
2458 {
2459     NV_STATUS status = NV_OK;
2460     NV2080_CTRL_FLA_SETUP_INSTANCE_MEM_BLOCK_PARAMS params = {0};
2461     MEMORY_DESCRIPTOR  *pMemDesc;
2462     RM_API *pRmApi = IS_GSP_CLIENT(pGpu) ? GPU_GET_PHYSICAL_RMAPI(pGpu)
2463                                          : rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
2464 
2465     pMemDesc = pKernelBus->flaInfo.pInstblkMemDesc;
2466 
2467     switch( memdescGetAddressSpace(pMemDesc))
2468     {
2469         case ADDR_FBMEM:
2470             params.addrSpace = NV2080_CTRL_FLA_ADDRSPACE_FBMEM;
2471             break;
2472         case ADDR_SYSMEM:
2473             params.addrSpace = NV2080_CTRL_FLA_ADDRSPACE_SYSMEM;
2474             break;
2475     }
2476     params.imbPhysAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, 0);
2477     params.flaAction   = NV2080_CTRL_FLA_ACTION_BIND;
2478 
2479     status = pRmApi->Control(pRmApi,
2480                              pGpu->hInternalClient,
2481                              pGpu->hInternalSubdevice,
2482                              NV2080_CTRL_CMD_FLA_SETUP_INSTANCE_MEM_BLOCK,
2483                              &params,
2484                              sizeof(params));
2485 
2486    if (status != NV_OK)
2487     {
2488         NV_PRINTF(LEVEL_ERROR, "FLA bind failed, status: %x \n", status);
2489         return status;
2490     }
2491 
2492     // Since FLA state is tracked in the Guest, Guest RM needs to set it here
2493     pKernelBus->flaInfo.bFlaBind = NV_TRUE;
2494     pKernelBus->bFlaEnabled      = NV_TRUE;
2495 
2496     return status;
2497 }
2498 
2499 /*!
2500  * @brief Helper function to trigger RPC to Physical RM to unbind FLA VASpace
2501  *
2502  * @return NV_OK if successful
2503  */
2504 NV_STATUS
2505 kbusSetupUnbindFla_GH100
2506 (
2507     OBJGPU    *pGpu,
2508     KernelBus *pKernelBus
2509 )
2510 {
2511     NV_STATUS status = NV_OK;
2512     NV2080_CTRL_FLA_SETUP_INSTANCE_MEM_BLOCK_PARAMS params = { 0 };
2513     RM_API *pRmApi = IS_GSP_CLIENT(pGpu) ? GPU_GET_PHYSICAL_RMAPI(pGpu)
2514                                          : rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
2515 
2516     if (!pKernelBus->flaInfo.bFlaBind)
2517         return NV_OK;
2518 
2519     params.flaAction = NV2080_CTRL_FLA_ACTION_UNBIND;
2520 
2521     status = pRmApi->Control(pRmApi,
2522                              pGpu->hInternalClient,
2523                              pGpu->hInternalSubdevice,
2524                              NV2080_CTRL_CMD_FLA_SETUP_INSTANCE_MEM_BLOCK,
2525                              &params,
2526                              sizeof(params));
2527 
2528     pKernelBus->flaInfo.bFlaBind = NV_FALSE;
2529     pKernelBus->bFlaEnabled      = NV_FALSE;
2530 
2531     return status;
2532 }
2533 
2534 NV_STATUS
2535 kbusGetFlaRange_GH100
2536 (
2537     OBJGPU    *pGpu,
2538     KernelBus *pKernelBus,
2539     NvU64     *ucFlaBase,
2540     NvU64     *ucFlaSize,
2541     NvBool     bIsConntectedToNvswitch
2542 )
2543 {
2544     if (!GPU_IS_NVSWITCH_DETECTED(pGpu))
2545     {
2546         *ucFlaSize = gpuGetFlaVasSize_HAL(pGpu, NV_FALSE);
2547         *ucFlaBase = pGpu->gpuInstance * (*ucFlaSize);
2548     }
2549     else
2550     {
2551         FABRIC_VASPACE *pFabricVAS = dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE);
2552         NvU64           ucFlaLimit;
2553 
2554         if (pFabricVAS == NULL)
2555             return NV_ERR_INVALID_STATE;
2556 
2557         ucFlaLimit = fabricvaspaceGetUCFlaLimit(pFabricVAS);
2558         if (ucFlaLimit == 0)
2559             return NV_ERR_INVALID_STATE;
2560 
2561         *ucFlaBase = fabricvaspaceGetUCFlaStart(pFabricVAS);
2562         *ucFlaSize = ucFlaLimit - *ucFlaBase + 1;
2563     }
2564 
2565     return NV_OK;
2566 }
2567 
2568 /*!
2569  * @brief Returns the EGM peer ID of pRemoteGpu if it was
2570  *        reserved already.
2571  *
2572  * @param[in]  pLocalGpu      local OBJGPU pointer
2573  * @param[in]  pLocalBus      local OBJBUS pointer
2574  * @param[in]  pRemoteGpu     remote OBJGPU pointer
2575  *
2576  * return NV_OK on success
2577  *        BUS_INVALID_PEER otherwise
2578  *
2579  */
2580 NvU32
2581 kbusGetEgmPeerId_GH100
2582 (
2583     OBJGPU    *pLocalGpu,
2584     KernelBus *pLocalKernelBus,
2585     OBJGPU    *pRemoteGpu
2586 )
2587 {
2588     NvU32 gpuPeerInst = gpuGetInstance(pRemoteGpu);
2589     NvU32 peerMask    = pLocalKernelBus->p2p.busNvlinkPeerNumberMask[gpuPeerInst];
2590     NvU32 peerId;
2591 
2592     if (peerMask == 0)
2593     {
2594         NV_PRINTF(LEVEL_INFO,
2595                   "NVLINK P2P not set up between GPU%u and GPU%u\n",
2596                   gpuGetInstance(pLocalGpu), gpuPeerInst);
2597         return BUS_INVALID_PEER;
2598     }
2599 
2600     FOR_EACH_INDEX_IN_MASK(32, peerId, peerMask)
2601     {
2602         if (pLocalKernelBus->p2p.bEgmPeer[peerId])
2603         {
2604             return peerId;
2605         }
2606     }
2607     FOR_EACH_INDEX_IN_MASK_END;
2608 
2609     return BUS_INVALID_PEER;
2610 }
2611