1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2004-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "core/core.h"
25 #include "gpu/gpu.h"
26 #include "gpu/mmu/kern_gmmu.h"
27 #include "gpu/bus/kern_bus.h"
28 #include "gpu/bif/kernel_bif.h"
29 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h"
30 #include "kernel/gpu/nvlink/kernel_nvlink.h"
31 #include "gpu/mem_mgr/mem_mgr.h"
32 #include "mem_mgr/vaspace.h"
33 #include "mem_mgr/gpu_vaspace.h"
34 #include "gpu/mem_mgr/virt_mem_allocator.h"
35 #include "gpu/mem_sys/kern_mem_sys.h"
36 #include "core/system.h"
37 #include "mem_mgr/virt_mem_mgr.h"
38 #include "rmapi/rs_utils.h"
39 #include "vgpu/rpc.h"
40 #include "nvRmReg.h"
41 
42 #include "gpu/mem_mgr/fermi_dma.h"
43 
44 #include "published/maxwell/gm107/dev_ram.h"
45 #include "published/maxwell/gm107/dev_bus.h"
46 #include "published/maxwell/gm107/dev_mmu.h"
47 
48 #include "class/cl90f1.h"  // FERMI_VASPACE_A
49 
50 //
51 // forwards
52 //
53 static NV_STATUS _kbusInitP2P_GM107(OBJGPU *, KernelBus *);
54 static NV_STATUS _kbusDestroyP2P_GM107(OBJGPU *, KernelBus *);
55 static void _kbusLinkP2P_GM107(OBJGPU *, KernelBus *);
56 
57 static NvU32 _kbusGetSizeOfBar2PageDir_GM107(NvU64 vaBase, NvU64 vaLimit, NvU64 vaPerEntry, NvU32 entrySize);
58 
59 static NV_STATUS _kbusBar0TunnelCb_GM107(void *pPrivData, NvU64 addr, void *pData, NvU64 size, NvBool bRead);
60 
61 NV_STATUS _kbusMapAperture_GM107(OBJGPU *, PMEMORY_DESCRIPTOR, OBJVASPACE *, NvU64, NvU64 *,
62                                  NvU64 *, NvU32 mapFlags, NvHandle hClient);
63 NV_STATUS _kbusUnmapAperture_GM107(OBJGPU *, OBJVASPACE *, PMEMORY_DESCRIPTOR, NvU64);
64 MEMORY_DESCRIPTOR* kbusCreateStagingMemdesc(OBJGPU *pGpu);
65 
66 // This is the peer number assignment for SLI with
67 // 8 GPUs. The peer ID's should be symmetrical
68 static const NvU32 peerNumberTable_GM107[8][8] =
69 {
70     {0, 0, 1, 2, 3, 4, 5, 6},
71     {0, 0, 2, 3, 4, 5, 6, 7},
72     {1, 2, 0, 4, 5, 6, 7, 0},
73     {2, 3, 4, 0, 6, 7, 0, 1},
74     {3, 4, 5, 6, 0, 0, 1, 2},
75     {4, 5, 6, 7, 0, 0, 2, 3},
76     {5, 6, 7, 0, 1, 2, 0, 4},
77     {6, 7, 0, 1, 2, 3, 4, 0}
78 };
79 
80 // Helper function to create a staging buffer memdesc with a size of one page
81 MEMORY_DESCRIPTOR*
82 kbusCreateStagingMemdesc(OBJGPU *pGpu)
83 {
84     return NULL;
85 }
86 
87 NV_STATUS
88 kbusConstructHal_GM107(OBJGPU *pGpu, KernelBus *pKernelBus)
89 {
90 
91     NV_PRINTF(LEVEL_INFO, "Entered \n");
92 
93     pKernelBus->p2pPcie.writeMailboxBar1Addr = PCIE_P2P_INVALID_WRITE_MAILBOX_ADDR;
94 
95     pKernelBus->bar2[GPU_GFID_PF].pdeBase   = 0xdeadbeef;
96     pKernelBus->bar2[GPU_GFID_PF].pteBase   = 0xdeadbeef;
97 
98     pKernelBus->bar2[GPU_GFID_PF].cpuInvisibleBase = 0;
99     pKernelBus->bar2[GPU_GFID_PF].cpuInvisibleLimit = 0;
100 
101     pKernelBus->virtualBar2[GPU_GFID_PF].pVASpaceHeap = NULL;
102     pKernelBus->virtualBar2[GPU_GFID_PF].pMapListMemory = NULL;
103 
104     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) &&
105         !pGpu->getProperty(pGpu, PDB_PROP_GPU_ZERO_FB))
106     {
107         pKernelBus->bFbFlushDisabled = NV_TRUE;
108     }
109 
110     //
111     // Conditions to disable CPU pointer for flushing VBAR2:
112     // 1. If inst_in_sys is passed in (regkey setting)
113     // 2. If FB flushing is disabled (brokenFB or regkey setting)
114     // 3. If we are on GSP firmware
115     //
116     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_ALL_INST_IN_SYSMEM) ||
117         kbusIsFbFlushDisabled(pKernelBus) ||
118         RMCFG_FEATURE_PLATFORM_GSP)
119     {
120         pKernelBus->bReadCpuPointerToFlush = NV_FALSE;
121     }
122 
123     // indicate that Bar2 is not initialized yet
124     pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping = NULL;
125 
126     pKernelBus->numPeers = P2P_MAX_NUM_PEERS;
127 
128     //
129     // Default apertures for BAR2 PTEs/PDEsr
130     //
131     pKernelBus->PTEBAR2Aperture = ADDR_FBMEM;
132     pKernelBus->PTEBAR2Attr = NV_MEMORY_WRITECOMBINED;
133     pKernelBus->PDEBAR2Aperture = ADDR_FBMEM;
134     pKernelBus->PDEBAR2Attr = NV_MEMORY_WRITECOMBINED;
135 
136     return NV_OK;
137 }
138 
139 NV_STATUS
140 kbusStatePreInitLocked_GM107
141 (
142     OBJGPU    *pGpu,
143     KernelBus *pKernelBus
144 )
145 {
146     NV_PRINTF(LEVEL_INFO, "gpu:%d\n", pGpu->gpuInstance);
147 
148     // kbusInitBarsSize_HAL for VGPU is called in early phase
149     if (! IS_VIRTUAL(pGpu))
150     {
151         NV_CHECK_OK_OR_RETURN(LEVEL_ERROR,
152             kbusInitBarsSize_HAL(pGpu, pKernelBus));
153     }
154 
155     kbusDetermineBar1Force64KBMapping(pKernelBus);
156 
157     kbusDetermineBar1ApertureLength(pKernelBus, GPU_GFID_PF);
158 
159     kbusSetupBar1P2PCapability(pGpu, pKernelBus);
160 
161     if (NV_OK != kbusConstructVirtualBar2_HAL(pGpu, pKernelBus, GPU_GFID_PF))
162     {
163         return NV_ERR_INSUFFICIENT_RESOURCES;
164     }
165 
166     return NV_OK;
167 }
168 
169 /*!
170  * Stub eheap free for address reuse case below. This allows us to not if the code.
171  */
172 static NV_STATUS nullEHeapFree(OBJEHEAP *thisHeap, NvU64 offset)
173 {
174     return NV_OK;
175 }
176 
177 /*!
178  * @brief program the default BAR0 window based on the mode we are running at.
179  */
180 static void
181 kbusSetupDefaultBar0Window
182 (
183     OBJGPU    *pGpu,
184     KernelBus *pKernelBus
185 )
186 {
187     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
188     const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
189         kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
190     NvU64 offsetBar0;
191 
192     //
193     // Initialize BAR0 window to the last 1MB of FB. Since this is where it
194     // should already be positioned by the vbios, this should not be an issue.
195     // Do NOT ever move this BAR0 window away from the last 1MB since it's
196     // shared with the vbios
197     //
198     if (pMemorySystemConfig->bReservedMemAtBottom)
199     {
200         offsetBar0 = memmgrGetRsvdMemoryBase(pMemoryManager);
201     }
202     else
203     {
204         offsetBar0 = (pMemoryManager->Ram.fbAddrSpaceSizeMb << 20) - DRF_SIZE(NV_PRAMIN);
205     }
206 
207     //
208     // However, when running in L2 cache only mode, there is typically
209     // less than 1MB of L2 cache, so just position the BAR0 either at
210     // the start of FB or at the end of L2 depending on how big the
211     // window is compared to the size of L2.  We want to make sure that
212     // the window overlaps reserved memory.
213     //
214     if (gpuIsCacheOnlyModeEnabled(pGpu) ||
215         !(pMemorySystemConfig->bFbpaPresent))
216     {
217         if (pMemorySystemConfig->l2CacheSize < DRF_SIZE(NV_PRAMIN))
218         {
219             // L2 Cache size is < BAR0 window size so just set it offset to 0
220             offsetBar0 = 0;
221         }
222         else
223         {
224             //
225             // L2 Cache size is > BAR0 window, so position it at the end of L2 to
226             // make sure it overlaps reserved memory, which is at the end of L2
227             //
228             offsetBar0 = pMemorySystemConfig->l2CacheSize - DRF_SIZE(NV_PRAMIN);
229         }
230     }
231 
232     if (!IS_VIRTUAL_WITH_SRIOV(pGpu))
233     {
234         DEVICE_MAPPING *pDeviceMapping = gpuGetDeviceMapping(pGpu, DEVICE_INDEX_GPU, 0);
235         pKernelBus->pUncachedBar0Window = (NvU8*) &pDeviceMapping->gpuNvAddr->Reg008[DRF_BASE(NV_PRAMIN)];
236         pKernelBus->pDefaultBar0Pointer = pKernelBus->pUncachedBar0Window;
237         pKernelBus->physicalBar0WindowSize = DRF_SIZE(NV_PRAMIN);
238 
239         kbusSetBAR0WindowVidOffset_HAL(pGpu, pKernelBus, offsetBar0);
240 
241     }
242 }
243 
244 /*!
245  * @brief  kbusStateInit routine for Kernel RM functionality.
246  */
247 NV_STATUS
248 kbusStateInitLockedKernel_GM107
249 (
250     OBJGPU    *pGpu,
251     KernelBus *pKernelBus
252 )
253 {
254     KernelBif        *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
255     VirtMemAllocator *pDma       = GPU_GET_DMA(pGpu);
256     NvU32             data;
257 
258     if ((pKernelBif != NULL) && (!pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_READS_DISABLED) ||
259                                  !pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_WRITES_DISABLED)))
260     {
261         if (osReadRegistryDword(pGpu, NV_REG_STR_RM_MAP_P2P_PEER_ID, &data) == NV_OK)
262         {
263             pKernelBus->p2pMapSpecifyId = NV_TRUE;
264             pKernelBus->p2pMapPeerId = data;
265         }
266         else
267         {
268             pKernelBus->p2pMapSpecifyId = NV_FALSE;
269         }
270 
271         if (gpumgrGetGpuLinkCount(pGpu->gpuInstance) > 0)
272         {
273             if (!kbusIsP2pInitialized(pKernelBus) &&
274                 !kbusIsP2pMailboxClientAllocated(pKernelBus))
275             {
276                 _kbusInitP2P_GM107(pGpu, pKernelBus);
277             }
278         }
279         else
280         {
281             pKernelBus->bP2pInitialized = NV_TRUE;
282         }
283     }
284 
285     kbusSetupDefaultBar0Window(pGpu, pKernelBus);
286 
287     //
288     // Initialize BAR2 before initializing BAR1.  That way, we can use BAR2
289     // rather than BAR0 to set up the BAR1 page table.  This is faster because
290     // BAR2 can be write-combined
291     //
292     NV_ASSERT_OK_OR_RETURN(kbusInitBar2_HAL(pGpu, pKernelBus, GPU_GFID_PF));
293 
294     if (IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu) && IS_VIRTUAL_WITH_SRIOV(pGpu))
295     {
296         vgpuGspSetupBuffers(pGpu);
297     }
298 
299     if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING))
300     {
301         // Verify that BAR2 and the MMU actually works
302         NV_ASSERT_OK_OR_RETURN(kbusVerifyBar2_HAL(pGpu, pKernelBus, NULL, NULL, 0, 0));
303     }
304 
305     //
306     // For "unsupported" mmu invalidate skipping mode, we align virtual BAR2
307     // to avoid false TLB fills and disallow address reuse.
308     //
309     if (pDma->getProperty(pDma, PDB_PROP_DMA_MMU_INVALIDATE_DISABLE))
310     {
311         pKernelBus->virtualBar2[GPU_GFID_PF].vAlignment = 16 * RM_PAGE_SIZE;
312         pKernelBus->virtualBar2[GPU_GFID_PF].pVASpaceHeap->eheapFree = nullEHeapFree;
313     }
314 
315     return NV_OK;
316 }
317 
318 NV_STATUS
319 kbusStateInitLocked_IMPL(OBJGPU *pGpu, KernelBus *pKernelBus)
320 {
321     // Nothing to be done in guest for the paravirtualization case.
322     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu))
323     {
324         return NV_OK;
325     }
326 
327     if (RMCFG_FEATURE_PLATFORM_GSP)
328     {
329         NV_ASSERT_OK_OR_RETURN(kbusInitBar2_HAL(pGpu, pKernelBus, GPU_GFID_PF));
330     }
331 
332     NV_ASSERT_OK_OR_RETURN(kbusStateInitLockedKernel_HAL(pGpu, pKernelBus));
333 
334     NV_ASSERT_OK_OR_RETURN(kbusStateInitLockedPhysical_HAL(pGpu, pKernelBus));
335 
336     return NV_OK;
337 }
338 
339 NV_STATUS
340 kbusStateLoad_GM107
341 (
342     OBJGPU *pGpu,
343     KernelBus *pKernelBus,
344     NvU32 flags
345 )
346 {
347 
348     if (flags & GPU_STATE_FLAGS_PRESERVING)
349     {
350         MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
351 
352         // FB address space may not be available on Tegra (see fbInitFbRegions)
353         if (pMemoryManager->Ram.fbAddrSpaceSizeMb != 0)
354         {
355             // Bind the BAR0 window to its default location
356             // note: we can't move the window for all intents and purposes since VBIOS
357             //       will also use the window at arbitrary locations (eg during an SMI event
358             NvU64 offsetBar0 = (pMemoryManager->Ram.fbAddrSpaceSizeMb << 20) - DRF_SIZE(NV_PRAMIN);
359             kbusSetBAR0WindowVidOffset_HAL(pGpu, pKernelBus, offsetBar0);
360         }
361         else
362         {
363             NV_ASSERT(IsTEGRA(pGpu));
364         }
365 
366         if (!(flags & GPU_STATE_FLAGS_GC6_TRANSITION))
367         {
368             if (NULL == pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping)
369             {
370                 NV_ASSERT_OK_OR_RETURN(kbusSetupBar2CpuAperture_HAL(pGpu, pKernelBus, GPU_GFID_PF));
371             }
372         }
373         NV_ASSERT_OK_OR_RETURN(kbusCommitBar2_HAL(pGpu, pKernelBus, flags));
374 
375         //
376         // If we are exiting GC6 and the SKIP_BAR2_TEST_GC6 is set for the
377         // chip, then don't verify BAR2. The time taken to verify causes a
378         // a hit on the GC6 exit times, so this verif only feature does not
379         // come for free.
380         //
381         if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING) &&
382             !(IS_GPU_GC6_STATE_EXITING(pGpu) && pKernelBus->bSkipBar2TestOnGc6Exit))
383         {
384             // Verify that BAR2 and the MMU actually works
385             NV_ASSERT_OK_OR_RETURN(kbusVerifyBar2_HAL(pGpu, pKernelBus, NULL, NULL, 0, 0));
386 
387             // Fail to set this status meaning kbusVerifyBar2_HAL() failed
388         }
389     }
390 
391     return NV_OK;
392 }
393 
394 NV_STATUS
395 kbusStatePostLoad_GM107
396 (
397     OBJGPU *pGpu,
398     KernelBus *pKernelBus,
399     NvU32 flags
400 )
401 {
402     NV_STATUS  status     = NV_OK;
403     KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
404 
405     if ( ! IS_GPU_GC6_STATE_EXITING(pGpu))
406     {
407         // Bar1 is created once per Gpu on each Gpu call to kbusStatePostLoad_GM107
408         if ((status = kbusInitBar1_HAL(pGpu, pKernelBus, GPU_GFID_PF)) != NV_OK)
409         {
410             return status;
411         }
412     }
413 
414     if ((pKernelBif != NULL)
415         &&
416         // RM managed P2P or restoring the HW state for OS resume
417         (!kbusIsP2pMailboxClientAllocated(pKernelBus) ||
418          (flags & GPU_STATE_FLAGS_PM_TRANSITION))
419         &&
420         (!pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_READS_DISABLED) ||
421          !pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_WRITES_DISABLED)))
422     {
423         _kbusLinkP2P_GM107(pGpu, pKernelBus);
424     }
425 
426     return status;
427 }
428 
429 NV_STATUS
430 kbusStatePreUnload_GM107
431 (
432     OBJGPU    *pGpu,
433     KernelBus *pKernelBus,
434     NvU32      flags
435 )
436 {
437     if (!((flags & GPU_STATE_FLAGS_PRESERVING) ||
438           IS_GPU_GC6_STATE_ENTERING(pGpu)))
439     {
440         //
441         // Get rid of the bars if this is not PM. There were/are issues with user-mode
442         // OGL on XP not knowing that the system has enter suspend and so continuing to
443         // run (and issue APIs, touch bar1 resources, whatever). Therefore we cannot
444         // teardown bar1 path when entering suspend.
445         //
446         kbusDestroyBar1_HAL(pGpu, pKernelBus, GPU_GFID_PF);
447     }
448 
449     return NV_OK;
450 }
451 
452 NV_STATUS
453 kbusStateUnload_GM107
454 (
455     OBJGPU    *pGpu,
456     KernelBus *pKernelBus,
457     NvU32      flags
458 )
459 {
460     NV_STATUS          status     = NV_OK;
461     KernelBif         *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
462 
463     if ((pKernelBif != NULL)
464         &&
465         (!pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_READS_DISABLED) ||
466          !pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_WRITES_DISABLED))
467         &&
468         // RM managed P2P or unconfiguring HW P2P for OS suspend/hibernate
469         (!kbusIsP2pMailboxClientAllocated(pKernelBus) ||
470          (flags & GPU_STATE_FLAGS_PM_TRANSITION)))
471     {
472         kbusUnlinkP2P_HAL(pGpu, pKernelBus);
473     }
474 
475     if (flags & GPU_STATE_FLAGS_PRESERVING)
476     {
477         if (!IS_GPU_GC6_STATE_ENTERING(pGpu))
478         {
479             status = kbusTeardownBar2CpuAperture_HAL(pGpu, pKernelBus, GPU_GFID_PF);
480             // Do not use BAR2 physical mode for bootstrapping BAR2 across S/R.
481             pKernelBus->bUsePhysicalBar2InitPagetable = NV_FALSE;
482         }
483     }
484     else
485     {
486         // Clear write mailbox data window info.
487         pKernelBus->p2pPcie.writeMailboxBar1Addr  = PCIE_P2P_INVALID_WRITE_MAILBOX_ADDR;
488         pKernelBus->p2pPcie.writeMailboxTotalSize = 0;
489     }
490 
491     pKernelBus->cachedBar0WindowVidOffset = 0x0;
492 
493     return status;
494 }
495 
496 /*!
497  * @brief Init BAR1.
498  *
499  *  - Inits FERMI BUS HALINFO Bar1 structure
500  *  - Sets up BAR1 address space
501  *  - The function is skipped during GC6 cycle.  It can update page table in
502  *    VIDMEM/SYSMEM but all register access should be avoid in the function
503  *
504  * @param[in] pGpu
505  * @param[in] pKernelBus
506  *
507  * @returns NV_OK on success, or rm_status from called functions on failure.
508  */
509 NV_STATUS
510 kbusInitBar1_GM107(OBJGPU *pGpu, KernelBus *pKernelBus, NvU32 gfid)
511 {
512     OBJEHEAP         *pVASpaceHeap              = NULL;
513     NV_STATUS         rmStatus                  = NV_OK;
514     NvU64             apertureVirtAddr, apertureVirtLength;
515     NvU64             vaRangeMax;
516     NvU32             vaflags;
517     KernelBif        *pKernelBif                = GPU_GET_KERNEL_BIF(pGpu);
518     NvU32             vaSpaceBigPageSize        = 0;
519     OBJSYS           *pSys                      = SYS_GET_INSTANCE();
520     OBJVMM           *pVmm                      = SYS_GET_VMM(pSys);
521     NvU32             gpuMask                   = 0;
522     NvBool            bSmoothTransitionEnabled  = ((pGpu->uefiScanoutSurfaceSizeInMB != 0) &&
523                                                    RMCFG_FEATURE_PLATFORM_WINDOWS_LDDM);
524 
525     vaRangeMax = pKernelBus->bar1[gfid].apertureLength - 1;
526 
527     //
528     // In sleep-resume, BAR1 is not destroyed - but we would have to rebind the BAR1.
529     // Since that's done already. Exit early from here.
530     //
531     if (pKernelBus->bar1[gfid].pVAS != NULL)
532     {
533         return rmStatus;
534     }
535 
536     //
537     // For BAR address spaces, leave a valid PTE pointed to page 0.
538     // According to page 196 of revision 2.1 of the PCI spec, prefetchable
539     // memory must have no side effects on reads, return all bytes on reads
540     // regardless of byte enables, and host bridges can merge processor
541     // writes without errors.
542     //
543     // Setting this is done by a combination of two steps. Sparsify the VAS
544     // to prevent faults during CPU access and set FULL_PTE.
545     //
546     // For front door simulation and mods emulation however this leads to an
547     // excessive amount of time updating BAR1 PTEs.  So for mods in simulation
548     // and emulation we don't set the FULL_PTE flag.  The VMA code will only
549     // validate the used parts of the PDE in this case, but will make sure to
550     // leave one unused scratch page at the end of the valid range.
551     //
552     vaflags = VASPACE_FLAGS_BAR | VASPACE_FLAGS_BAR_BAR1;
553     vaflags |= VASPACE_FLAGS_ALLOW_ZERO_ADDRESS; // BAR1 requires a zero VAS base.
554     vaflags |= VASPACE_FLAGS_ENABLE_VMM;
555 
556 #if defined(DEVELOP) || defined(DEBUG) || RMCFG_FEATURE_MODS_FEATURES
557     {
558         NvU32 data32 = 0;
559         //
560         // The BAR1 page size can be only configured for mods verification.
561         // for mods only we will override the default bar1 big page size if this regkey is set.
562         // This is the mods plan for testing interop between clients with multiple
563         // big page sizes.
564         //
565         if (osReadRegistryDword(pGpu,
566                     NV_REG_STR_RM_SET_BAR1_ADDRESS_SPACE_BIG_PAGE_SIZE, &data32) == NV_OK)
567         {
568             KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
569             if (kgmmuIsPerVaspaceBigPageEn(pKernelGmmu))
570             {
571                 vaSpaceBigPageSize = data32;
572             }
573             else
574             {
575             NV_PRINTF(LEVEL_ERROR,
576                         "Arch doesnt support BAR1 Big page Override- Using defaults\n");
577             NV_ASSERT(0);
578             vaSpaceBigPageSize = 0;
579             }
580         }
581     }
582 #endif // defined(DEVELOP) || defined(DEBUG) || RMCFG_FEATURE_MODS_FEATURES
583 
584     switch (vaSpaceBigPageSize)
585     {
586         case FERMI_BIG_PAGESIZE_64K:
587             vaflags |= DRF_DEF(_VASPACE, _FLAGS, _BIG_PAGE_SIZE, _64K);
588             break;
589         case FERMI_BIG_PAGESIZE_128K:
590             vaflags |= DRF_DEF(_VASPACE, _FLAGS, _BIG_PAGE_SIZE, _128K);
591             break;
592         default:
593             vaflags |= DRF_DEF(_VASPACE, _FLAGS, _BIG_PAGE_SIZE, _DEFAULT);
594             break;
595     }
596 
597     //
598     // kbusIsP2pMailboxClientAllocated:
599     //     The client allocates the mailbox area
600     //     It is not safe to disable smooth transition from RM as it assumed to be enabled in KMD
601     //
602     if (kbusIsP2pMailboxClientAllocated(pKernelBus))
603     {
604         // KMD requires smooth transition to have a reverse BAR1 VA space
605         if (bSmoothTransitionEnabled)
606             vaflags |= VASPACE_FLAGS_REVERSE;
607     }
608     else
609     {
610         //
611         // Smooth transition is enabled
612         //     Bug# 3208922: For BAR1 range > 4gig on notebooks.
613         //     For BAR1 range less than 4gig, otherwise
614         //
615         if (bSmoothTransitionEnabled && (IsMobile(pGpu) || (vaRangeMax < NV_U32_MAX)))
616         {
617             //
618             // If UEFI scanoutsurface size is configured to be non-zero,
619             // we are going to move all BAR1 vaspace requests to not
620             // conflict the UEFI scanout surface at offset 0 to the higher
621             // address range.
622             //
623             // P2P mailbox registers are 34 bit wide and hence can only address
624             // first 16 GiG of BAR1 due to the limited address width. Hence,
625             // they cannot be moved to the top of the BAR1 always.
626             //
627             // We are restricting this feature only to those SKUs which
628             // has BAR1 aperture within 4gig range, because this feature is
629             // notebook only, and the expectation is the BAR1 va range will
630             // not be that huge. Once BAR1 va range crosses 4gig (eventhough smaller?
631             // than 16 gig), we may have to revisit p2p mailbox and expand it to
632             // full fb range - as there will be new features such as dynamic BAR1.
633             //
634             // Choosing the smallest 4gig range for now.
635             //
636             vaflags |= VASPACE_FLAGS_REVERSE;
637         }
638         else
639         {
640             bSmoothTransitionEnabled = NV_FALSE;
641             pGpu->uefiScanoutSurfaceSizeInMB = 0;
642         }
643     }
644 
645     if (IS_GFID_VF(gfid))
646     {
647         vaflags |= VASPACE_FLAGS_ALLOW_PAGES_IN_PHYS_MEM_SUBALLOCATOR;
648     }
649 
650     gpuMask = NVBIT(pGpu->gpuInstance);
651 
652     rmStatus = vmmCreateVaspace(pVmm, FERMI_VASPACE_A, 0, gpuMask,
653                                 0, vaRangeMax, 0, 0, NULL,
654                                 vaflags, &pKernelBus->bar1[gfid].pVAS);
655     if (NV_OK != rmStatus)
656     {
657         NV_PRINTF(LEVEL_ERROR,
658                     "Could not construct BAR1 VA space object.\n");
659         pKernelBus->bar1[gfid].pVAS = NULL;
660         DBG_BREAKPOINT();
661         return rmStatus;
662     }
663 
664     // Restrict normal BAR1 alloc to be within the aperture
665     pVASpaceHeap = vaspaceGetHeap(pKernelBus->bar1[gfid].pVAS);
666 
667 
668     //
669     // Reduce BAR1 VA space by FERMI_SMALL_PAGESIZE for host overfetch bug
670     // WAR (Bug 529932/525381). (FERMI_SMALL_PAGESIZE is sufficient to
671     // avoid big pagesize allocations at the end of BAR1 VA space.)
672     //
673     vaRangeMax -= FERMI_SMALL_PAGESIZE;
674     rmStatus = pVASpaceHeap->eheapSetAllocRange(pVASpaceHeap,
675                                                 0, vaRangeMax);
676     if (rmStatus != NV_OK)
677     {
678         NV_PRINTF(LEVEL_ERROR,
679                     "Unable to set BAR1 alloc range to aperture size!\n");
680         goto kbusInitBar1_failed;
681     }
682 
683     //
684     // Make sure the aperture length we are using not larger than the maximum length available.
685     // Usually, bar1.apertureLength should be equal to the return value of kbusGetPciBarSize, however,
686     // in L2 cache only mode, the aperture length being used may have been overridden to a smaller size,
687     // so take that into account in the assert.
688     //
689     NV_ASSERT(pKernelBus->bar1[gfid].apertureLength <= kbusGetPciBarSize(pKernelBus, 1));
690 
691     //
692     // If we need to preserve a console mapping at the start of BAR1, we
693     // need to allocate the VA space before anything else gets allocated.
694     //
695     if (IS_GFID_PF(gfid) &&
696         (kbusIsPreserveBar1ConsoleEnabled(pKernelBus) || bSmoothTransitionEnabled))
697     {
698         MemoryManager     *pMemoryManager  = GPU_GET_MEMORY_MANAGER(pGpu);
699         NvU64              bar1VAOffset    = 0;
700         NvU64              fbPhysOffset    = 0;
701         NvU64              consoleSize     = 0;
702         PMEMORY_DESCRIPTOR pConsoleMemDesc = NULL;
703         MEMORY_DESCRIPTOR  memdesc;
704 
705         if (bSmoothTransitionEnabled)
706         {
707             //
708             // Smooth transition - The physical fb offset 0 to uefiScanoutSurfaceSize(InMB) should be identity mapped.
709             // The lower FB region at offset 0 is owned by PMA and OS in wddm and hence RM will not reserve the physical
710             // FB memory but only describe it.
711             //
712             pConsoleMemDesc = &memdesc;
713             memdescCreateExisting(pConsoleMemDesc, pGpu, pGpu->uefiScanoutSurfaceSizeInMB * 1024 * 1024, ADDR_FBMEM, NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE);
714             memdescDescribe(pConsoleMemDesc, ADDR_FBMEM, 0, pGpu->uefiScanoutSurfaceSizeInMB * 1024 * 1024);
715             pConsoleMemDesc->_pageSize = RM_PAGE_SIZE;
716         }
717         else if (kbusIsPreserveBar1ConsoleEnabled(pKernelBus))
718         {
719             pConsoleMemDesc = memmgrGetReservedConsoleMemDesc(pGpu, pMemoryManager);
720         }
721 
722         if (pConsoleMemDesc)
723         {
724             consoleSize = memdescGetSize(pConsoleMemDesc);
725 
726             NV_PRINTF(LEVEL_INFO,
727                         "preserving console BAR1 mapping (0x%llx)\n",
728                         consoleSize);
729 
730             rmStatus = kbusMapFbAperture_HAL(pGpu, pKernelBus, pConsoleMemDesc, fbPhysOffset,
731                                              &bar1VAOffset, &consoleSize,
732                                              BUS_MAP_FB_FLAGS_MAP_UNICAST | BUS_MAP_FB_FLAGS_MAP_OFFSET_FIXED,
733                                              NV01_NULL_OBJECT);
734             if (rmStatus != NV_OK)
735             {
736                 NV_PRINTF(LEVEL_ERROR,
737                             "cannot preserve console mapping in BAR1 (0x%llx, 0x%x)\n",
738                             consoleSize, rmStatus);
739                 goto kbusInitBar1_failed;
740             }
741 
742             //
743             // The reserved console is assumed by the console-driving code to
744             // be at offset 0 of BAR1; anything else will break it.
745             // NOTE: Since BUS_MAP_FB_FLAGS_MAP_OFFSET_FIXED is passed we should never
746             // get here. But this is legacy code leaving it here.
747             //
748             if (bar1VAOffset != 0)
749             {
750                 NV_PRINTF(LEVEL_ERROR,
751                             "expected console @ BAR1 offset 0 (0x%llx, 0x%x)\n",
752                             bar1VAOffset, rmStatus);
753                 DBG_BREAKPOINT();
754                 kbusUnmapFbAperture_HAL(pGpu, pKernelBus, pConsoleMemDesc,
755                                         bar1VAOffset, consoleSize,
756                                         BUS_MAP_FB_FLAGS_MAP_UNICAST | BUS_MAP_FB_FLAGS_PRE_INIT);
757                 goto kbusInitBar1_failed;
758             }
759 
760             pKernelBus->bBar1ConsolePreserved = NV_TRUE;
761         }
762         else
763         {
764             NV_PRINTF(LEVEL_ERROR,
765                         "no console memdesc available to preserve\n");
766             DBG_BREAKPOINT();
767             goto kbusInitBar1_failed;
768         }
769     }
770 
771     // Reserve space for max number of peers regardless of SLI config
772     if ((!pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_READS_DISABLED) ||
773          !pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_WRITES_DISABLED))
774         &&
775         IS_GFID_PF(gfid)
776         &&
777         !kbusIsP2pMailboxClientAllocated(pKernelBus))
778     {
779         rmStatus = kbusAllocP2PMailboxBar1_HAL(pGpu, pKernelBus, gfid, vaRangeMax);
780 
781         if (NV_OK != rmStatus)
782         {
783             goto kbusInitBar1_failed;
784         }
785     }
786 
787     //
788     // BAR1 vaspace is sparsified during vaspace creation
789     // and hence pdb is allocated during vaspace and destroyed
790     // when BAR1 is destroyed. During power-save restore cycle
791     // BAR1 is not destroyed, but only the instance memory is
792     // unbound and put in physical mode and rebound upon restore.
793     // Hence pdb of BAR1 is static and can be updated here during
794     // initialization instead of previously in mmu update pdb.
795     //
796     rmStatus = kbusBar1InstBlkVasUpdate_HAL(pGpu, pKernelBus);
797 
798     if (NV_OK != rmStatus)
799     {
800         goto kbusInitBar1_failed;
801     }
802 
803     kbusPatchBar1Pdb_HAL(pGpu, pKernelBus);
804 
805     apertureVirtAddr   = pKernelBus->p2pPcie.writeMailboxBar1Addr;
806     apertureVirtLength = pKernelBus->p2pPcie.writeMailboxTotalSize;
807 
808     //
809     // Copy the mailbox setup to other GPUs
810     //
811     // This SLI_LOOP is only necessary because _kbusLinkP2P_GM107 is called
812     // after each call to kbusInitBar1_GM107 in the function busStatePostLoad_GM107.
813     // _kbusLinkP2P_GM107 requires that the writeMailboxAddr of every GPU be set, but
814     // that can only happen after kbusInitbar1_GM107 is called on every GPU. In the
815     // future, if we can separate the function that kbusInitBar1_GM107 is called in
816     // and the function that _kbusLinkP2P_GM107 is called in. Then, all of the
817     // kbusInitBar1_GM107 calls can finish and create writeMailboxes, and we can
818     // remove this SLI_LOOP.
819     //
820     if (gpumgrIsParentGPU(pGpu) &&
821         !kbusIsP2pMailboxClientAllocated(pKernelBus))
822     {
823         SLI_LOOP_START(SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
824         {
825             pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
826             pKernelBus->p2pPcie.writeMailboxBar1Addr  = apertureVirtAddr;
827             pKernelBus->p2pPcie.writeMailboxTotalSize = apertureVirtLength;
828         }
829         SLI_LOOP_END
830         pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
831     }
832 
833 kbusInitBar1_failed:
834     if (rmStatus != NV_OK)
835     {
836         kbusDestroyBar1_HAL(pGpu, pKernelBus, gfid);
837     }
838 
839     if (!bSmoothTransitionEnabled || (rmStatus != NV_OK))
840     {
841         pGpu->uefiScanoutSurfaceSizeInMB = 0;
842     }
843 
844     return rmStatus;
845 }
846 
847 /*!
848  * @brief Destroy BAR1
849  *
850  * Destroys Bar1 VA Space. BAR1 vaspace is not destroyed during
851  * Power save.
852  *
853  * @param[in] pGpu
854  * @param[in] pKernelBus
855  *
856  * @returns NV_OK always.
857  */
858 NV_STATUS
859 kbusDestroyBar1_GM107
860 (
861     OBJGPU      *pGpu,
862     KernelBus   *pKernelBus,
863     NvU32        gfid
864 )
865 {
866     NV_STATUS   status = NV_OK;
867     OBJSYS     *pSys = SYS_GET_INSTANCE();
868     OBJVMM     *pVmm = SYS_GET_VMM(pSys);
869 
870     if (pKernelBus->bar1[gfid].pVAS != NULL)
871     {
872 
873         // Remove the P2P write mailbox alloc, if it exists
874         if ((pKernelBus->p2pPcie.writeMailboxBar1Addr != PCIE_P2P_INVALID_WRITE_MAILBOX_ADDR) &&
875             IS_GFID_PF(gfid))
876         {
877             if (!kbusIsP2pMailboxClientAllocated(pKernelBus))
878                 vaspaceFree(pKernelBus->bar1[gfid].pVAS, pKernelBus->p2pPcie.writeMailboxBar1Addr);
879             pKernelBus->p2pPcie.writeMailboxBar1Addr  = PCIE_P2P_INVALID_WRITE_MAILBOX_ADDR;
880             pKernelBus->p2pPcie.writeMailboxTotalSize = 0;
881         }
882 
883         // Remove the preserved BAR1 console mapping, if it exists
884         if (pKernelBus->bBar1ConsolePreserved && IS_GFID_PF(gfid))
885         {
886             MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
887             PMEMORY_DESCRIPTOR pConsoleMemDesc =
888                 memmgrGetReservedConsoleMemDesc(pGpu, pMemoryManager);
889 
890             if (pConsoleMemDesc != NULL)
891             {
892                 NvU64 consoleSize = memdescGetSize(pConsoleMemDesc);
893 
894                 kbusUnmapFbAperture_HAL(pGpu, pKernelBus, pConsoleMemDesc,
895                                         0, consoleSize, BUS_MAP_FB_FLAGS_MAP_UNICAST | BUS_MAP_FB_FLAGS_PRE_INIT);
896             }
897             else if (pGpu->uefiScanoutSurfaceSizeInMB)
898             {
899                 vaspaceFree(pKernelBus->bar1[gfid].pVAS, 0);
900             }
901             else
902             {
903                 NV_ASSERT(pConsoleMemDesc != NULL);
904             }
905 
906             pKernelBus->bBar1ConsolePreserved = NV_FALSE;
907         }
908 
909         vmmDestroyVaspace(pVmm, pKernelBus->bar1[gfid].pVAS);
910 
911         pKernelBus->bar1[gfid].pVAS = NULL;
912     }
913 
914     if (IS_GFID_VF(gfid) && (pKernelBus->bar1[gfid].pInstBlkMemDesc != NULL))
915     {
916         memdescFree(pKernelBus->bar1[gfid].pInstBlkMemDesc);
917         memdescDestroy(pKernelBus->bar1[gfid].pInstBlkMemDesc);
918         pKernelBus->bar1[gfid].pInstBlkMemDesc = NULL;
919     }
920 
921     return status;
922 }
923 
924 /*!
925  * @brief Initialize BAR2
926  *
927  * 1. Setup Bar2 VA Space.
928  * 2. Setup Bar2 in HW.
929  * 3. Host over fetch WAR.
930  *
931  * @param[in] pGpu
932  * @param[in] pKernelBus
933  * @param[in] gfid          GFID for VF
934  *
935  * @returns NV_OK on success.
936  */
937 NV_STATUS
938 kbusInitBar2_GM107
939 (
940     OBJGPU      *pGpu,
941     KernelBus   *pKernelBus,
942     NvU32        gfid
943 )
944 {
945     NV_STATUS  status     = NV_OK;
946 
947     //
948     // Nothing to be done in guest in the paravirtualization case or
949     // if guest is running in SRIOV heavy mode.
950     //
951     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
952         (IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))
953     {
954         return NV_OK;
955     }
956 
957     status = kbusSetupBar2CpuAperture_HAL(pGpu, pKernelBus, gfid);
958     NV_ASSERT_OR_GOTO(status == NV_OK,  cleanup);
959 
960     if (KBUS_BAR2_ENABLED(pKernelBus))
961     {
962         status = kbusSetupBar2GpuVaSpace_HAL(pGpu, pKernelBus, gfid);
963         NV_ASSERT_OR_GOTO(status == NV_OK,  cleanup);
964     }
965 
966     status = kbusCommitBar2_HAL(pGpu, pKernelBus, GPU_STATE_DEFAULT);
967     NV_ASSERT_OR_GOTO(status == NV_OK,  cleanup);
968 
969     if (IS_GFID_PF(gfid))
970     {
971         pKernelBus->bIsBar2Initialized = NV_TRUE;
972     }
973 
974 cleanup:
975     if (status != NV_OK)
976     {
977         kbusDestroyBar2_HAL(pGpu, pKernelBus, gfid);
978     }
979 
980     return status;
981 }
982 
983 /*!
984  * @brief Destroy BAR2
985  *
986  * 1. Tear down BAR2 Cpu Aperture.
987  * 2. Destroy Bar2 Gpu VA Space.
988  *
989  * @param[in] pGpu
990  * @param[in] pKernelBus
991  *
992  * @returns NV_OK on success.
993  */
994 NV_STATUS
995 kbusDestroyBar2_GM107(OBJGPU *pGpu, KernelBus *pKernelBus, NvU32 gfid)
996 {
997     NV_STATUS  status = NV_OK;
998 
999     if (IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu) && IS_VIRTUAL_WITH_SRIOV(pGpu))
1000     {
1001         vgpuGspTeardownBuffers(pGpu);
1002     }
1003 
1004     //
1005     // Nothing to be done in guest in the paravirtualization case or
1006     // if guest is running in SRIOV heavy mode.
1007     //
1008     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
1009         (IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))
1010     {
1011         return NV_OK;
1012     }
1013 
1014     if (kbusTeardownBar2CpuAperture_HAL(pGpu, pKernelBus, gfid) != NV_OK)
1015     {
1016         status = NV_ERR_GENERIC;
1017     }
1018 
1019     if (KBUS_BAR2_ENABLED(pKernelBus))
1020     {
1021         if (kbusTeardownBar2GpuVaSpace_HAL(pGpu, pKernelBus, gfid) != NV_OK)
1022         {
1023             status = NV_ERR_GENERIC;
1024         }
1025     }
1026 
1027     if (IS_GFID_PF(gfid))
1028     {
1029         pKernelBus->bIsBar2Initialized = NV_FALSE;
1030     }
1031 
1032     //
1033     // In cache only mode, do a video memory flush after unbinding BARS to
1034     // make sure that during capture, we don't get stuck waiting on L2.
1035     // This could probably just be done all the time, but currently limiting
1036     // to cache only mode.
1037     //
1038     if (gpuIsCacheOnlyModeEnabled(pGpu) &&
1039         !pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB))
1040     {
1041         kbusFlush_HAL(pGpu, pKernelBus, BUS_FLUSH_VIDEO_MEMORY);
1042         kbusFlush_HAL(pGpu, pKernelBus, BUS_FLUSH_VIDEO_MEMORY);
1043     }
1044 
1045     return status;
1046 }
1047 
1048 /*!
1049  * @brief Setup BAR2 aperture for CPU access
1050  *
1051  * 1. Acquire BAR2 CPU mapping.
1052  * 2. Initialize BAR2 GPU vaspace.
1053  *
1054  * @param[in] pGpu
1055  * @param[in] pKernelBus
1056  *
1057  * @returns NV_OK on success.
1058  */
1059 NV_STATUS
1060 kbusSetupBar2CpuAperture_GM107(OBJGPU *pGpu, KernelBus *pKernelBus, NvU32 gfid)
1061 {
1062     NV_STATUS         status  = NV_OK;
1063 
1064     // Nothing to be done in guest in the paravirtualization case.
1065     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) || IS_GFID_VF(gfid) ||
1066         (IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))
1067     {
1068         return NV_OK;
1069     }
1070 
1071     if (pKernelBus->virtualBar2[gfid].pCpuMapping != NULL)
1072     {
1073         NV_PRINTF(LEVEL_ERROR, "BAR2 already initialized!\n");
1074         return NV_ERR_GENERIC;
1075     }
1076 
1077     if (0 == pKernelBus->bar2[gfid].pteBase)
1078     {
1079         NV_PRINTF(LEVEL_ERROR,
1080                   "BAR2 pteBase not initialized by fbPreInit_FERMI!\n");
1081         DBG_BREAKPOINT();
1082         return NV_ERR_GENERIC;
1083     }
1084 
1085     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) &&
1086         !gpuIsCacheOnlyModeEnabled(pGpu))
1087     {
1088         pKernelBus->virtualBar2[gfid].pCpuMapping = NULL;
1089         return NV_OK;
1090     }
1091 
1092     if (KBUS_BAR2_TUNNELLED(pKernelBus))
1093     {
1094         //
1095         // Since GK20A doesn't support BAR2 accesses we tunnel all RM BAR2 accesses
1096         // through the BAR0 window. For this we register a callback function with the
1097         // OS layer which is called when RM accesses an address in the CPU BAR2 VA range.
1098         // We skip the normal stuff we do init BAR2 (like init-ing BAR2 inst block) since
1099         // they are not needed for GK20A.
1100         //
1101 
1102         //
1103         // Map bar2 space -- only map the space we use in the RM.  Some 32b OSes are *cramped*
1104         // for kernel virtual addresses. On GK20A, we just alloc CPU VA space since there is no
1105         // actual bar2, and tunnel the "fake" bar2 accesses through the bar0 window.
1106         //
1107         pKernelBus->virtualBar2[gfid].pCpuMapping = portMemAllocNonPaged(pKernelBus->bar2[gfid].rmApertureLimit + 1);
1108         if (pKernelBus->virtualBar2[gfid].pCpuMapping == NULL)
1109         {
1110             NV_PRINTF(LEVEL_ERROR, "- Unable to map bar2!\n");
1111             DBG_BREAKPOINT();
1112             return NV_ERR_NO_MEMORY;
1113         }
1114 
1115         //
1116         // Call the OS add mem filter routine now that bar2 is mapped
1117         // Currently this is used to route bar2 accesses through bar0 on gk20A
1118         //
1119         status = osMemAddFilter((NvU64)((NvUPtr)(pKernelBus->virtualBar2[gfid].pCpuMapping)),
1120                                 (NvU64)((NvUPtr)(pKernelBus->virtualBar2[gfid].pCpuMapping)) +
1121                                 (pKernelBus->bar2[gfid].rmApertureLimit + 1),
1122                                 _kbusBar0TunnelCb_GM107,
1123                                 (void *)pGpu);
1124         if (status != NV_OK)
1125         {
1126             NV_PRINTF(LEVEL_ERROR,
1127                       "Cannot add os mem filter for bar2 tunneling\n");
1128             DBG_BREAKPOINT();
1129             goto cleanup;
1130         }
1131     }
1132     else
1133     {
1134         //
1135         // Map bar2 space -- only map the space we use in the RM.  Some 32b OSes are *cramped*
1136         // for kernel virtual addresses.
1137         //
1138         if (NV_OK != osMapPciMemoryKernelOld(pGpu, pKernelBus->bar2[gfid].physAddr,
1139                                              (pKernelBus->bar2[gfid].rmApertureLimit + 1),
1140                                              NV_PROTECT_READ_WRITE,
1141                                              (void**)&(pKernelBus->virtualBar2[gfid].pCpuMapping),
1142                                              NV_MEMORY_WRITECOMBINED))
1143         {
1144             NV_PRINTF(LEVEL_ERROR, "- Unable to map bar2!\n");
1145             DBG_BREAKPOINT();
1146             return NV_ERR_GENERIC;
1147         }
1148 
1149         NV_PRINTF_COND(IS_EMULATION(pGpu), LEVEL_NOTICE, LEVEL_INFO,
1150                        "BAR0 Base Cpu Mapping @ 0x%p and BAR2 Base Cpu Mapping @ 0x%p\n",
1151                        pGpu->deviceMappings[0].gpuNvAddr->Reg032,
1152                        pKernelBus->virtualBar2[gfid].pCpuMapping);
1153 
1154 
1155     }
1156 
1157 cleanup:
1158     if (status != NV_OK)
1159     {
1160         kbusTeardownBar2CpuAperture_HAL(pGpu, pKernelBus, gfid);
1161     }
1162 
1163     return status;
1164 }
1165 
1166 /*!
1167  * @brief Tear down BAR2 CPU aperture
1168  *
1169  * 1. Release BAR2 GPU vaspace mappings.
1170  * 2. Release BAR2 CPU mapping.
1171  *
1172  * @param[in] pGpu
1173  * @param[in] pKernelBus
1174  * @param[in] gfid
1175  *
1176  * @returns NV_OK on success.
1177  */
1178 NV_STATUS
1179 kbusTeardownBar2CpuAperture_GM107
1180 (
1181     OBJGPU    *pGpu,
1182     KernelBus *pKernelBus,
1183     NvU32      gfid
1184 )
1185 {
1186     // Nothing to be done in guest in the paravirtualization case.
1187     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) || IS_GFID_VF(gfid) ||
1188         (IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))
1189     {
1190         return NV_OK;
1191     }
1192 
1193     if (KBUS_BAR2_TUNNELLED(pKernelBus))
1194     {
1195         // Unmap bar2 space
1196         if (pKernelBus->virtualBar2[gfid].pCpuMapping)
1197         {
1198             // Remove the memory access filter
1199             osMemRemoveFilter((NvU64)((NvUPtr)(pKernelBus->virtualBar2[gfid].pCpuMapping)));
1200             portMemFree(pKernelBus->virtualBar2[gfid].pCpuMapping);
1201             pKernelBus->virtualBar2[gfid].pCpuMapping = NULL;
1202         }
1203     }
1204     else
1205     {
1206         if (pKernelBus->virtualBar2[gfid].pPageLevels)
1207         {
1208             memmgrMemDescEndTransfer(GPU_GET_MEMORY_MANAGER(pGpu),
1209                          pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc,
1210                          TRANSFER_FLAGS_NONE);
1211             pKernelBus->virtualBar2[gfid].pPageLevels = NULL;
1212         }
1213 
1214         kbusDestroyCpuPointerForBusFlush_HAL(pGpu, pKernelBus);
1215 
1216         kbusFlushVirtualBar2_HAL(pGpu, pKernelBus, NV_FALSE, gfid);
1217 
1218         if (pKernelBus->virtualBar2[gfid].pCpuMapping)
1219         {
1220             osUnmapPciMemoryKernelOld(pGpu, (void*)pKernelBus->virtualBar2[gfid].pCpuMapping);
1221             // Mark the BAR as un-initialized so that a later call
1222             // to initbar2 can succeed.
1223             pKernelBus->virtualBar2[gfid].pCpuMapping = NULL;
1224         }
1225 
1226         //
1227         // make sure that the bar2 mode is physical so that the vesa extended
1228         // linear framebuffer works after driver unload.  Clear other bits to force
1229         // vid.
1230         //
1231         // if BROKEN_FB, merely rewriting this to 0 (as it already was) causes
1232         // FBACKTIMEOUT -- don't do it (Bug 594539)
1233         //
1234         if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB))
1235         {
1236             GPU_FLD_WR_DRF_DEF(pGpu, _PBUS, _BAR2_BLOCK, _MODE, _PHYSICAL);
1237             // bug 1738008: temporary fix to unblock -inst_in_sys argument
1238             // we tried to correct bar2 unbind sequence but didn't fix the real issue
1239             // will fix this soon 4/8/16
1240             GPU_REG_RD32(pGpu, NV_PBUS_BAR2_BLOCK);
1241         }
1242     }
1243 
1244     return NV_OK;
1245 }
1246 
1247 /*!
1248  * @brief Setup BAR2 GPU vaspace
1249  *
1250  * 1. Allocate & initialize BAR2 GPU vaspace page directories & tables.
1251  *
1252  * @param[in] pGpu
1253  * @param[in] pKernelBus
1254  *
1255  * @returns NV_OK on success.
1256  */
1257 NV_STATUS
1258 kbusSetupBar2GpuVaSpace_GM107
1259 (
1260     OBJGPU    *pGpu,
1261     KernelBus *pKernelBus,
1262     NvU32      gfid
1263 )
1264 {
1265     NV_STATUS               status             = NV_OK;
1266     MemoryManager          *pMemoryManager     = GPU_GET_MEMORY_MANAGER(pGpu);
1267     KernelGmmu             *pKernelGmmu        = GPU_GET_KERNEL_GMMU(pGpu);
1268     MMU_WALK               *pWalk              = NULL;
1269     MMU_WALK_FLAGS          walkFlags          = {0};
1270     MMU_WALK_USER_CTX       userCtx            = {0};
1271     const MMU_FMT_LEVEL    *pLevelFmt          = NULL;
1272     NvU64                   origVidOffset      = 0;
1273     OBJEHEAP               *pVASpaceHeap;
1274     MEMORY_DESCRIPTOR      *pPageLevelsMemDesc = NULL;
1275     NvU32                   allocSize;
1276 
1277     //
1278     // Nothing to be done in guest in the paravirtualization case or if
1279     // if guest is running in SRIOV heavy mode.
1280     //
1281     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
1282         (IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))
1283     {
1284         return NV_OK;
1285     }
1286 
1287     if (IS_GFID_VF(gfid))
1288     {
1289         //
1290         // VF BAR2 instance block cannot by in PF sysmem as the latter
1291         // is not mapped into VF's IOMMU domain
1292         //
1293         NV_ASSERT_OR_RETURN(pKernelBus->InstBlkAperture == ADDR_FBMEM, NV_ERR_INVALID_ARGUMENT);
1294 
1295         if ((status = memdescCreate(&pKernelBus->bar2[gfid].pInstBlkMemDesc,
1296                                     pGpu,
1297                                     GF100_BUS_INSTANCEBLOCK_SIZE,
1298                                     GF100_BUS_INSTANCEBLOCK_SIZE,
1299                                     NV_TRUE,
1300                                     pKernelBus->InstBlkAperture,
1301                                     pKernelBus->InstBlkAttr,
1302                                     MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE)) != NV_OK)
1303         {
1304             NV_ASSERT_OR_RETURN(status == NV_OK, status);
1305         }
1306 
1307         status = memdescAlloc(pKernelBus->bar2[gfid].pInstBlkMemDesc);
1308         NV_ASSERT_OR_RETURN(status == NV_OK, status);
1309 
1310         pKernelBus->bar2[gfid].instBlockBase =
1311                      memdescGetPhysAddr(pKernelBus->bar2[gfid].pInstBlkMemDesc,
1312                                     AT_GPU, 0);
1313     }
1314     // Add the reserved memory base, converting from relative to absolute addresses.
1315     else
1316     {
1317         if (ADDR_FBMEM == pKernelBus->PDEBAR2Aperture)
1318             pKernelBus->bar2[gfid].pdeBase += memmgrGetRsvdMemoryBase(pMemoryManager);
1319         if (ADDR_FBMEM == pKernelBus->PTEBAR2Aperture)
1320             pKernelBus->bar2[gfid].pteBase += memmgrGetRsvdMemoryBase(pMemoryManager);
1321     }
1322 
1323     if (IS_GFID_PF(gfid))
1324     {
1325         // Setup BAR0 window for page directory/table updates during BAR2 bootstrap
1326         status = kbusSetupBar0WindowBeforeBar2Bootstrap_HAL(pGpu, pKernelBus, &origVidOffset);
1327         NV_ASSERT_OR_RETURN(NV_OK == status, status);
1328     }
1329 
1330     // Get Bar2 VA limit.
1331     pKernelBus->bar2[gfid].vaLimit = kbusGetVaLimitForBar2_HAL(pGpu, pKernelBus);
1332 
1333     //
1334     // Reduce BAR2 VA space by FERMI_SMALL_PAGESIZE for host overfetch
1335     // bug WAR (Bug 529932/525381); the last BAR2 page will remain
1336     // mapped to the scratch page.
1337     //
1338     pVASpaceHeap = pKernelBus->virtualBar2[gfid].pVASpaceHeap;
1339 
1340     if (pVASpaceHeap != NULL)
1341     {
1342         if (pVASpaceHeap->eheapSetAllocRange(pVASpaceHeap, pKernelBus->bar2[gfid].rmApertureBase,
1343             pKernelBus->bar2[gfid].rmApertureLimit - FERMI_SMALL_PAGESIZE) != NV_OK)
1344         {
1345             DBG_BREAKPOINT();
1346         }
1347     }
1348 
1349     allocSize = kbusGetSizeOfBar2PageDirs_HAL(pGpu, pKernelBus) +
1350                 kbusGetSizeOfBar2PageTables_HAL(pGpu, pKernelBus);
1351 
1352     if (pKernelBus->PDEBAR2Aperture == ADDR_FBMEM)
1353     {
1354         //
1355         // The page directories and page tables should all be within
1356         // the same type of memory.
1357         //
1358         NV_ASSERT_OR_GOTO(pKernelBus->PDEBAR2Aperture == pKernelBus->PTEBAR2Aperture,
1359                          cleanup);
1360 
1361         status = memdescCreate(&pPageLevelsMemDesc, pGpu,
1362                                allocSize,
1363                                RM_PAGE_SIZE,
1364                                NV_TRUE,
1365                                pKernelBus->PDEBAR2Aperture,
1366                                pKernelBus->PDEBAR2Attr,
1367                                MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE);
1368         NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
1369 
1370         if (IS_GFID_VF(gfid))
1371         {
1372             status = memdescAlloc(pPageLevelsMemDesc);
1373             NV_ASSERT_OR_GOTO(status == NV_OK, cleanup);
1374 
1375             pKernelBus->bar2[gfid].pdeBase = memdescGetPhysAddr(pPageLevelsMemDesc,
1376                                                                 AT_GPU, 0);
1377 
1378             pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc = pPageLevelsMemDesc;
1379 
1380             pKernelBus->bar2[gfid].pteBase = pKernelBus->bar2[gfid].pdeBase +
1381                                              kbusGetSizeOfBar2PageDirs_HAL(pGpu, pKernelBus);
1382 
1383             pKernelBus->bar2[gfid].pteBase = NV_ROUNDUP(pKernelBus->bar2[gfid].pteBase, RM_PAGE_SIZE);
1384 
1385             pKernelBus->virtualBar2[gfid].pPageLevels = kbusMapRmAperture_HAL(pGpu,
1386                                                                         pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc);
1387             NV_ASSERT_OR_RETURN(pKernelBus->virtualBar2[gfid].pPageLevels,
1388                           NV_ERR_INSUFFICIENT_RESOURCES);
1389         }
1390     }
1391 
1392     // Get the MMU format for BAR2.
1393     pKernelBus->bar2[gfid].pFmt = kgmmuFmtGet(pKernelGmmu, GMMU_FMT_VERSION_DEFAULT, 0);
1394     NV_ASSERT_OR_GOTO(NULL != pKernelBus->bar2[gfid].pFmt, cleanup);
1395     walkFlags.bUseIterative = gpuIsIterativeMmuWalkerEnabled(pGpu);
1396 
1397     //
1398     // Initialize/allocate walker staging buffer only if PTEs in FBMEM
1399     // and we are currently bootstrapping BAR2.
1400     //
1401     if (pKernelBus->bar2[gfid].pWalkStagingBuffer == NULL &&
1402         pKernelBus->PTEBAR2Aperture == ADDR_FBMEM &&
1403         pKernelBus->bar2[gfid].bBootstrap)
1404     {
1405         pKernelBus->bar2[gfid].pWalkStagingBuffer = kbusCreateStagingMemdesc(pGpu);
1406     }
1407 
1408     // Create the MMU_WALKER state
1409     status = mmuWalkCreate(pKernelBus->bar2[gfid].pFmt->pRoot,
1410                            NULL,
1411                            &g_bar2WalkCallbacks,
1412                            walkFlags,
1413                            &pWalk,
1414                            (struct MMU_WALK_MEMDESC *) pKernelBus->bar2[gfid].pWalkStagingBuffer);
1415     NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
1416     pKernelBus->bar2[gfid].pWalk = pWalk;
1417 
1418     // We want to lock the small page table
1419     pLevelFmt = mmuFmtFindLevelWithPageShift(pKernelBus->bar2[gfid].pFmt->pRoot,
1420                                              RM_PAGE_SHIFT);
1421 
1422     // Setup walk user context.
1423     userCtx.pGpu = pGpu;
1424     userCtx.gfid = gfid;
1425 
1426     NV_ASSERT_OR_RETURN(pWalk != NULL, NV_ERR_INVALID_STATE);
1427 
1428     // Pre-reserve and init 4K tables through BAR0 window (bBootstrap) mode.
1429     mmuWalkSetUserCtx(pWalk, &userCtx);
1430 
1431     if (pKernelBus->bar2[gfid].cpuVisibleLimit != 0)
1432     {
1433         status = mmuWalkReserveEntries(pWalk, pLevelFmt, pKernelBus->bar2[gfid].cpuVisibleBase,
1434                                        pKernelBus->bar2[gfid].cpuVisibleLimit, NV_FALSE);
1435         NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
1436         status = mmuWalkSparsify(pWalk, pKernelBus->bar2[gfid].cpuVisibleBase, pKernelBus->bar2[gfid].cpuVisibleLimit, NV_TRUE);
1437         NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
1438     }
1439 
1440     if (pKernelBus->bar2[gfid].cpuInvisibleLimit != 0)
1441     {
1442         status = mmuWalkReserveEntries(pWalk, pLevelFmt, pKernelBus->bar2[gfid].cpuInvisibleBase,
1443                                        pKernelBus->bar2[gfid].cpuInvisibleLimit, NV_FALSE);
1444         NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
1445         status = mmuWalkSparsify(pWalk, pKernelBus->bar2[gfid].cpuInvisibleBase, pKernelBus->bar2[gfid].cpuInvisibleLimit, NV_TRUE);
1446         NV_ASSERT_OR_GOTO(NV_OK == status, cleanup);
1447     }
1448 
1449     NV_PRINTF(LEVEL_INFO, "(BAR2 0x%llx, PDB 0x%llx): vaLimit = 0x%llx\n",
1450               pKernelBus->bar2[gfid].instBlockBase, pKernelBus->bar2[gfid].pdeBase,
1451               pKernelBus->bar2[gfid].vaLimit);
1452 
1453     if (NULL != pKernelBus->bar2[gfid].pPDEMemDescForBootstrap)
1454     {
1455         memdescSetPageSize(pKernelBus->bar2[gfid].pPDEMemDescForBootstrap, AT_GPU,
1456                        FERMI_SMALL_PAGESIZE);
1457         pKernelBus->virtualBar2[gfid].pPDB = pKernelBus->bar2[gfid].pPDEMemDescForBootstrap;
1458     }
1459     else
1460     {
1461         memdescSetPageSize(pKernelBus->bar2[gfid].pPDEMemDesc, AT_GPU,
1462                        FERMI_SMALL_PAGESIZE);
1463         pKernelBus->virtualBar2[gfid].pPDB = pKernelBus->bar2[gfid].pPDEMemDesc;
1464     }
1465 
1466     //
1467     // Setup a memdesc that covers all of BAR2's page levels.
1468     //
1469     // The following is based on _bar2WalkCBLevelAlloc().
1470     //
1471     if (IS_GFID_PF(gfid))
1472     {
1473         switch (pKernelBus->PDEBAR2Aperture)
1474         {
1475             default:
1476             case ADDR_FBMEM:
1477                 if (pPageLevelsMemDesc != NULL)
1478                 {
1479                     memdescDescribe(pPageLevelsMemDesc,
1480                                     pKernelBus->PDEBAR2Aperture,
1481                                     pKernelBus->bar2[gfid].pdeBase,
1482                                     allocSize);
1483                 }
1484                 break;
1485 
1486             case ADDR_SYSMEM:
1487                 //
1488                 // In SYSMEM, page level instances are allocated one at a time. It is
1489                 // not guaranteed that they are contiguous. Thus, SYSMEM page level
1490                 // instances are dynamically mapped-in via memmap as needed instead of
1491                 // having one static mapping.
1492                 //
1493                 pPageLevelsMemDesc = NULL;
1494                 break;
1495         }
1496         pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc = pPageLevelsMemDesc;
1497     }
1498 
1499     kbusPatchBar2Pdb_HAL(pGpu, pKernelBus);
1500 
1501 cleanup:
1502 
1503     if (IS_GFID_VF(gfid) && (pKernelBus->virtualBar2[gfid].pPageLevels != NULL))
1504     {
1505         kbusUnmapRmAperture_HAL(pGpu,
1506                                 pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc,
1507                                 &pKernelBus->virtualBar2[gfid].pPageLevels, NV_TRUE);
1508         pKernelBus->virtualBar2[gfid].pPageLevels = NULL;
1509     }
1510 
1511     if (pWalk != NULL)
1512     {
1513         mmuWalkSetUserCtx(pWalk, NULL);
1514     }
1515 
1516     if (!kbusIsPhysicalBar2InitPagetableEnabled(pKernelBus) && IS_GFID_PF(gfid))
1517     {
1518         // Restore BAR0 window after BAR2 bootstrap
1519         kbusRestoreBar0WindowAfterBar2Bootstrap_HAL(pGpu, pKernelBus, origVidOffset);
1520     }
1521 
1522     if (status != NV_OK)
1523     {
1524         if (kbusTeardownBar2GpuVaSpace_HAL(pGpu, pKernelBus, gfid) != NV_OK)
1525         {
1526             DBG_BREAKPOINT();
1527         }
1528     }
1529 
1530     if (kbusIsPhysicalBar2InitPagetableEnabled(pKernelBus))
1531     {
1532         pKernelBus->bar2[gfid].bBootstrap = NV_FALSE;
1533     }
1534 
1535     return status;
1536 }
1537 
1538 /*!
1539  * @brief Destroy BAR2 GPU vaspace
1540  *
1541  * 1. Free BAR2 GPU vaspace page directories & tables.
1542  *
1543  * @param[in] pGpu
1544  * @param[in] pKernelBus
1545  *
1546  * @returns NV_OK on success.
1547  */
1548 NV_STATUS
1549 kbusTeardownBar2GpuVaSpace_GM107
1550 (
1551     OBJGPU    *pGpu,
1552     KernelBus *pKernelBus,
1553     NvU32      gfid
1554 )
1555 {
1556     NV_STATUS         status  = NV_OK;
1557 
1558     //
1559     // Nothing to be done in the guest in the paravirtualization case or if
1560     // guest is running SRIOV heavy mode.
1561     //
1562     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
1563         (IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))
1564     {
1565         return NV_OK;
1566     }
1567 
1568     if (NULL != pKernelBus->bar2[gfid].pWalk)
1569     {
1570         const MMU_FMT_LEVEL *pLevelFmt     = NULL;
1571         MMU_WALK_USER_CTX    userCtx       = {0};
1572         NvU64                origVidOffset = 0;
1573 
1574         pLevelFmt = mmuFmtFindLevelWithPageShift(pKernelBus->bar2[gfid].pFmt->pRoot, RM_PAGE_SHIFT);
1575 
1576         userCtx.pGpu = pGpu;
1577 
1578         mmuWalkSetUserCtx(pKernelBus->bar2[gfid].pWalk, &userCtx);
1579 
1580         if (kbusIsPhysicalBar2InitPagetableEnabled(pKernelBus) || IS_GFID_VF(gfid))
1581         {
1582             mmuWalkLevelInstancesForceFree(pKernelBus->bar2[gfid].pWalk);
1583         }
1584         else
1585         {
1586             status = kbusSetupBar0WindowBeforeBar2Bootstrap_HAL(pGpu, pKernelBus, &origVidOffset);
1587             NV_ASSERT_OR_RETURN(NV_OK == status, status);
1588 
1589             if (pKernelBus->bar2[gfid].cpuVisibleLimit != 0)
1590             {
1591                 status = mmuWalkUnmap(pKernelBus->bar2[gfid].pWalk, pKernelBus->bar2[gfid].cpuVisibleBase, pKernelBus->bar2[gfid].cpuVisibleLimit);
1592                 NV_ASSERT(NV_OK == status);
1593                 mmuWalkReleaseEntries(pKernelBus->bar2[gfid].pWalk, pLevelFmt, pKernelBus->bar2[gfid].cpuVisibleBase, pKernelBus->bar2[gfid].cpuVisibleLimit);
1594             }
1595 
1596             if (pKernelBus->bar2[gfid].cpuInvisibleLimit != 0)
1597             {
1598                 status = mmuWalkUnmap(pKernelBus->bar2[gfid].pWalk, pKernelBus->bar2[gfid].cpuInvisibleBase, pKernelBus->bar2[gfid].cpuInvisibleLimit);
1599                 NV_ASSERT(NV_OK == status);
1600                 mmuWalkReleaseEntries(pKernelBus->bar2[gfid].pWalk, pLevelFmt, pKernelBus->bar2[gfid].cpuInvisibleBase, pKernelBus->bar2[gfid].cpuInvisibleLimit);
1601             }
1602 
1603             kbusRestoreBar0WindowAfterBar2Bootstrap_HAL(pGpu, pKernelBus, origVidOffset);
1604         }
1605 
1606         mmuWalkSetUserCtx(pKernelBus->bar2[gfid].pWalk, NULL);
1607 
1608         mmuWalkDestroy(pKernelBus->bar2[gfid].pWalk);
1609         pKernelBus->bar2[gfid].pWalk                    = NULL;
1610         pKernelBus->bar2[gfid].pPDEMemDesc              = NULL;
1611         pKernelBus->bar2[gfid].pPDEMemDescForBootstrap  = NULL;
1612         pKernelBus->virtualBar2[gfid].pPTEMemDesc       = NULL;
1613 
1614         // Free staging buffer
1615         memdescFree(pKernelBus->bar2[gfid].pWalkStagingBuffer);
1616         memdescDestroy(pKernelBus->bar2[gfid].pWalkStagingBuffer);
1617         pKernelBus->bar2[gfid].pWalkStagingBuffer = NULL;
1618 
1619         if (IS_GFID_VF(gfid) && (pKernelBus->virtualBar2[gfid].pPageLevels != NULL))
1620         {
1621             kbusUnmapRmAperture_HAL(pGpu,
1622                                     pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc,
1623                                     &pKernelBus->virtualBar2[gfid].pPageLevels,
1624                                     NV_TRUE);
1625             pKernelBus->virtualBar2[gfid].pPageLevels = NULL;
1626         }
1627 
1628         // Free the overall page levels memdesc.
1629         if (pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc != NULL)
1630         {
1631             memdescFree(pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc);
1632             memdescDestroy(pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc);
1633             pKernelBus->virtualBar2[gfid].pPageLevelsMemDesc = NULL;
1634         }
1635 
1636         if (IS_GSP_CLIENT(pGpu))
1637         {
1638             //
1639             // Normally virtualBar2.pPDB (which equals to the memDesc
1640             // of BAR2 root directory) gets freed when BAR2 page table got
1641             // destroyed. But in RM-offload, virtualBar2.pPDB in CPU-RM
1642             // is patched to GSP-RM's address, thus it won't be freed when
1643             // destroying BAR2 page table. So we need to explicitly free it
1644             // at destruct time.
1645             //
1646             if (pKernelBus->virtualBar2[gfid].pPDB != NULL)
1647             {
1648                 memdescFree(pKernelBus->virtualBar2[gfid].pPDB);
1649                 memdescDestroy(pKernelBus->virtualBar2[gfid].pPDB);
1650                 pKernelBus->virtualBar2[gfid].pPDB = NULL;
1651             }
1652 
1653             //
1654             // No more need for CPU-RM's page table, thus requesting GSP-RM to
1655             // delete the PDE3[0] value from GSP-RM's page table (by wrinting 0
1656             // to GSP-RM's PDE3[0].
1657             //
1658             NV_RM_RPC_UPDATE_BAR_PDE(pGpu, NV_RPC_UPDATE_PDE_BAR_2, 0, pKernelBus->bar2[gfid].pFmt->pRoot->virtAddrBitLo, status);
1659         }
1660 
1661         if (IS_GFID_VF(gfid) && (pKernelBus->bar2[gfid].pInstBlkMemDesc != NULL))
1662         {
1663             memdescFree(pKernelBus->bar2[gfid].pInstBlkMemDesc);
1664             memdescDestroy(pKernelBus->bar2[gfid].pInstBlkMemDesc);
1665             pKernelBus->bar2[gfid].pInstBlkMemDesc = NULL;
1666         }
1667     }
1668 
1669     return status;
1670 }
1671 
1672 /*!
1673  * @brief Setup BAR0 window for BAR2 setup
1674  *
1675  * We point the BAR0 window to the start of the BAR2 page directory
1676  *
1677  * @param[in]  pGpu
1678  * @param[in]  pKernelBus
1679  * @param[out] pOrigVidOffset Location to Save the original BAR0 window offset
1680  *
1681  * @returns NV_OK on success.
1682  */
1683 NV_STATUS
1684 kbusSetupBar0WindowBeforeBar2Bootstrap_GM107
1685 (
1686     OBJGPU      *pGpu,
1687     KernelBus   *pKernelBus,
1688     NvU64       *pOrigVidOffset
1689 )
1690 {
1691     NV_STATUS         status  = NV_OK;
1692 
1693     // Check that Bar2 Page Dir starts at or after bar0 window vid offset
1694     if (ADDR_FBMEM == pKernelBus->PDEBAR2Aperture ||
1695         ADDR_FBMEM == pKernelBus->PTEBAR2Aperture)
1696     {
1697         // Right now, PDE needs to be in FBMEM for BAR0 window to work.
1698         NV_ASSERT_OR_RETURN(ADDR_FBMEM == pKernelBus->PDEBAR2Aperture, NV_ERR_NOT_SUPPORTED);
1699 
1700         // Save original BAR0 window base (restored in cleanup).
1701         *pOrigVidOffset = kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus);
1702 
1703         // Set BAR0 window base to memory region reserved for BAR2 page level instances.
1704         status = kbusSetBAR0WindowVidOffset_HAL(pGpu, pKernelBus,
1705                                                 pKernelBus->bar2[GPU_GFID_PF].pdeBase & ~0xffffULL);
1706         NV_ASSERT_OR_RETURN(NV_OK == status, status);
1707 
1708         // Get BAR0 window offset to be used for BAR2 init.
1709         pKernelBus->bar2[GPU_GFID_PF].bar2OffsetInBar0Window =
1710             (pKernelBus->bar2[GPU_GFID_PF].pdeBase - kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus)) +
1711              NV_PRAMIN_DATA008(0);
1712     }
1713 
1714     pKernelBus->bar2[GPU_GFID_PF].bBootstrap = NV_TRUE;
1715 
1716     return NV_OK;
1717 }
1718 
1719 /*!
1720  * @brief Restore BAR0 window after BAR2 setup
1721  *
1722  * Restore the BAR0 window to the original offset
1723  *
1724  * @param[in]  pGpu
1725  * @param[in]  pKernelBus
1726  * @param[in]  origVidOffset  Location to restore the original BAR0 window offset
1727  *
1728  * @returns NV_OK on success.
1729  */
1730 void
1731 kbusRestoreBar0WindowAfterBar2Bootstrap_GM107
1732 (
1733     OBJGPU      *pGpu,
1734     KernelBus   *pKernelBus,
1735     NvU64        origVidOffset
1736 )
1737 {
1738     NV_ASSERT(pKernelBus->bar2[GPU_GFID_PF].bBootstrap);
1739     pKernelBus->bar2[GPU_GFID_PF].bBootstrap = NV_FALSE;
1740 
1741     if (ADDR_FBMEM == pKernelBus->PDEBAR2Aperture ||
1742         ADDR_FBMEM == pKernelBus->PTEBAR2Aperture)
1743     {
1744         NV_STATUS status;
1745         status = kbusSetBAR0WindowVidOffset_HAL(pGpu, pKernelBus, origVidOffset);
1746         NV_ASSERT(NV_OK == status);
1747         pKernelBus->bar2[GPU_GFID_PF].bar2OffsetInBar0Window = 0;
1748     }
1749 }
1750 
1751 /*!
1752  * Defines the data needed to iterate over the last level during map VA op.
1753  * Note: Used only in the new VMM code path.
1754  */
1755 struct MMU_MAP_ITERATOR
1756 {
1757     /*!
1758      * @copydoc GMMU_FMT
1759      */
1760     const GMMU_FMT *pFmt;
1761 
1762     /*!
1763      * Physical aperture of the pages.
1764      */
1765     GMMU_APERTURE aperture;
1766 
1767     /*!
1768      * Physical pages to map. Always points to 4K-sized pages.
1769      */
1770     DMA_PAGE_ARRAY *pPageArray;
1771 
1772     /*!
1773      * The index of pPageArray that needs to be mapped.
1774      */
1775     NvU32 currIdx;
1776 
1777     /*!
1778      * Physical address of the last page mapped.
1779      */
1780     NvU64 physAddr;
1781 
1782     /*!
1783      * Template used to initialize PTEs. Contains values that do not change
1784      * across one map operation.
1785      */
1786     GMMU_ENTRY_VALUE pteTemplate;
1787 
1788     /*!
1789      * The PTE physical address field to use based on the PTE aperture.
1790      */
1791     const GMMU_FIELD_ADDRESS *pAddrField;
1792 };
1793 
1794 static void
1795 _busWalkCBMapNextEntries_UpdatePhysAddr
1796 (
1797     OBJGPU           *pGpu,
1798     GMMU_ENTRY_VALUE *pEntryValue,
1799     MMU_MAP_ITERATOR *pIter,
1800     const NvU64       pageSize
1801 )
1802 {
1803     // Update the PTE with the physical address.
1804     if (pIter->currIdx < pIter->pPageArray->count)
1805     {
1806         pIter->physAddr = dmaPageArrayGetPhysAddr(pIter->pPageArray,
1807                                                   pIter->currIdx);
1808         pIter->physAddr = NV_ALIGN_DOWN64(pIter->physAddr, pageSize);
1809     }
1810     else
1811     {
1812         //
1813         // As BAR2 page tables are physically contiguous, physAddr can be
1814         // incremented.
1815         //
1816         // Should not be the first page (currIdx == 0) being mapped.
1817         //
1818         NV_ASSERT_OR_RETURN_VOID((pIter->pPageArray->count == 1) &&
1819                                (pIter->currIdx > 0));
1820         pIter->physAddr += pageSize;
1821     }
1822 
1823     gmmuFieldSetAddress(pIter->pAddrField,
1824         kgmmuEncodePhysAddr(GPU_GET_KERNEL_GMMU(pGpu),
1825             pIter->aperture, pIter->physAddr, NVLINK_INVALID_FABRIC_ADDR),
1826         pEntryValue->v8);
1827 
1828     //
1829     // pPageArray deals in 4K-pages. Increment by the ratio of mapping page
1830     // size to 4K.
1831     //
1832     pIter->currIdx += (NvU32)(pageSize / RM_PAGE_SIZE);
1833 }
1834 
1835 /*!
1836  * Implementation of @ref MmuWalkCBMapNextEntries for BAR2
1837  */
1838 static void
1839 _kbusWalkCBMapNextEntries_RmAperture
1840 (
1841     MMU_WALK_USER_CTX        *pUserCtx,
1842     const MMU_MAP_TARGET     *pTarget,
1843     const MMU_WALK_MEMDESC   *pLevelMem,
1844     const NvU32               entryIndexLo,
1845     const NvU32               entryIndexHi,
1846     NvU32                    *pProgress
1847 )
1848 {
1849     OBJGPU              *pGpu        = pUserCtx->pGpu;
1850     KernelBus           *pKernelBus  = GPU_GET_KERNEL_BUS(pGpu);
1851     MMU_MAP_ITERATOR    *pIter       = pTarget->pIter;
1852     const MMU_FMT_LEVEL *pLevelFmt   = pTarget->pLevelFmt;
1853     NvU8                *pMap        = NULL;
1854     void                *pPriv       = NULL;
1855     MEMORY_DESCRIPTOR   *pMemDesc    = (MEMORY_DESCRIPTOR*)pLevelMem;
1856     const NvU64          pageSize    = mmuFmtLevelPageSize(pLevelFmt);
1857     NV_STATUS            status      = NV_OK;
1858     GMMU_ENTRY_VALUE     entryValue;
1859     NvU32                entryIndex;
1860     NvU32                entryOffset;
1861     NvU32                sizeInDWord = 0;
1862     NvU64                entry = 0;
1863     NvU32                gfid = pUserCtx->gfid;
1864 
1865     NV_PRINTF(LEVEL_INFO, "[GPU%u]: PA 0x%llX, Entries 0x%X-0x%X\n",
1866               pUserCtx->pGpu->gpuInstance,
1867               memdescGetPhysAddr(pMemDesc, AT_GPU, 0), entryIndexLo,
1868               entryIndexHi);
1869 
1870     //
1871     // Initialize the PTE with the template. The template contains the values
1872     // that do not change across PTEs for this map operation.
1873     //
1874     portMemCopy(entryValue.v8, sizeof(pIter->pteTemplate), pIter->pteTemplate.v8, sizeof(pIter->pteTemplate));
1875 
1876     if (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM)
1877     {
1878         if (pKernelBus->virtualBar2[gfid].pPageLevels != NULL)
1879         {
1880             //
1881             // Determine the start of the desired page level offset from
1882             // CPU mapping to the start of the BAR2 VAS page levels.
1883             //
1884             if (pKernelBus->bar2[gfid].bMigrating)
1885             {
1886                 // In the migration phase. HW is using the page tables at bottom of FB.
1887                 NV_ASSERT_OR_RETURN_VOID(NULL != pKernelBus->virtualBar2[gfid].pPageLevelsForBootstrap);
1888                 pMap = memdescGetPhysAddr(pMemDesc, AT_GPU, 0) -
1889                                       pKernelBus->bar2[gfid].pdeBaseForBootstrap +
1890                                       pKernelBus->virtualBar2[gfid].pPageLevelsForBootstrap;
1891             }
1892             else
1893             {
1894                 // Migration is done. HW is using the page tables at top of FB.
1895                 pMap = memdescGetPhysAddr(pMemDesc, AT_GPU, 0) -
1896                                       pKernelBus->bar2[gfid].pdeBase +
1897                                       pKernelBus->virtualBar2[gfid].pPageLevels;
1898             }
1899 
1900             for (entryIndex = entryIndexLo; entryIndex <= entryIndexHi; entryIndex++)
1901             {
1902                 // Update the PTE with the physical address.
1903                 _busWalkCBMapNextEntries_UpdatePhysAddr(pGpu,
1904                                                        &entryValue,
1905                                                         pIter,
1906                                                         pageSize);
1907 
1908                 entryOffset = entryIndex * pLevelFmt->entrySize;
1909 
1910                 // Commit to memory.
1911                 portMemCopy(pMap + entryOffset, pLevelFmt->entrySize, entryValue.v8, pLevelFmt->entrySize);
1912             }
1913         }
1914         else if (pKernelBus->bar2[gfid].bBootstrap)
1915         {
1916 
1917             for ( entryIndex = entryIndexLo; entryIndex <= entryIndexHi; entryIndex++ )
1918             {
1919                 // Update the PTE with the physical address.
1920                 _busWalkCBMapNextEntries_UpdatePhysAddr(pGpu,
1921                                                         &entryValue,
1922                                                         pIter,
1923                                                         pageSize);
1924 
1925                 entryOffset = entryIndex * pLevelFmt->entrySize;
1926 
1927                 if (kbusIsPhysicalBar2InitPagetableEnabled(pKernelBus))
1928                 {
1929                     pMap = kbusCpuOffsetInBar2WindowGet(pGpu, pKernelBus, pMemDesc);
1930                     NV_ASSERT_OR_RETURN_VOID(NULL != pMap);
1931                     portMemCopy(pMap + entryOffset,
1932                                 pLevelFmt->entrySize, entryValue.v8,
1933                                 pLevelFmt->entrySize);
1934                 }
1935                 else
1936                 {
1937                     // Use BAR0 or nvlink if available
1938                     sizeInDWord = (NvU32)NV_CEIL(pLevelFmt->entrySize, sizeof(NvU32));
1939                     NvU64 entryStart = memdescGetPhysAddr(pMemDesc, FORCE_VMMU_TRANSLATION(pMemDesc, AT_GPU), entryOffset);
1940                     NvU32 i;
1941                     NvU8 *pMapping = NULL;
1942 
1943                     if (pKernelBus->coherentCpuMapping.bCoherentCpuMapping)
1944                     {
1945                         NV_ASSERT_OR_RETURN_VOID(pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING));
1946                         pMapping = kbusMapCoherentCpuMapping_HAL(pGpu, pKernelBus, pMemDesc);
1947                         NV_ASSERT_OR_RETURN_VOID(pMapping != NULL);
1948                         for (i = 0; i < sizeInDWord; i++)
1949                         {
1950                             MEM_WR32(pMapping + entryOffset + sizeof(NvU32)*i,
1951                                     entryValue.v32[i]);
1952                         }
1953                         kbusUnmapCoherentCpuMapping_HAL(pGpu, pKernelBus, pMemDesc);
1954                     }
1955                     else
1956                     {
1957                         for (i = 0; i < sizeInDWord; i++)
1958                         {
1959                             // BAR0 write.
1960                             status = kbusMemAccessBar0Window_HAL(pGpu, pKernelBus,
1961                                                   (entryStart + (sizeof(NvU32) * i)),
1962                                                   &entryValue.v32[i],
1963                                                   sizeof(NvU32),
1964                                                   NV_FALSE,
1965                                                   ADDR_FBMEM);
1966                             NV_ASSERT_OR_RETURN_VOID(NV_OK == status);
1967                         }
1968                     }
1969 
1970                     entry = entryStart;
1971                 }
1972             }
1973             //
1974             // Use PRAMIN flush to make sure that BAR0 writes has reached the memory
1975             //
1976             if (pKernelBus->bar2[gfid].bBootstrap &&
1977                 !kbusIsPhysicalBar2InitPagetableEnabled(pKernelBus) &&
1978                 !RMCFG_FEATURE_PLATFORM_GSP)
1979             {
1980                 NvU32 data = 0;
1981                 NvU32 i;
1982                 for (i = 0; i < sizeInDWord; i++)
1983                 {
1984                     NV_ASSERT_OR_RETURN_VOID(kbusMemAccessBar0Window_HAL(pGpu, pKernelBus,
1985                                                         (entry + (sizeof(NvU32) * i)), &data, sizeof(NvU32),
1986                                                         NV_TRUE, ADDR_FBMEM) == NV_OK);
1987                 }
1988             }
1989         }
1990         else
1991         {
1992             //
1993             // We are migrating and old page tables are currently active. So, PTE
1994             // updates should be made in the old page tables at the bottom of FB.
1995             //
1996             NV_ASSERT_OR_RETURN_VOID(pKernelBus->bar2[gfid].bMigrating);
1997             NV_ASSERT_OR_RETURN_VOID(NULL == pKernelBus->virtualBar2[gfid].pPageLevels);
1998             NV_ASSERT_OR_RETURN_VOID(NULL != pKernelBus->virtualBar2[gfid].pPageLevelsForBootstrap);
1999 
2000             pMap = memdescGetPhysAddr(pMemDesc, AT_GPU, 0) -
2001                                   pKernelBus->bar2[gfid].pdeBaseForBootstrap +
2002                                   pKernelBus->virtualBar2[gfid].pPageLevelsForBootstrap;
2003 
2004             for (entryIndex = entryIndexLo; entryIndex <= entryIndexHi; entryIndex++)
2005             {
2006                 // Update the PTE with the physical address.
2007                 _busWalkCBMapNextEntries_UpdatePhysAddr(pGpu,
2008                                                         &entryValue,
2009                                                         pIter,
2010                                                         pageSize);
2011 
2012                 entryOffset = entryIndex * pLevelFmt->entrySize;
2013 
2014                 // Commit to memory.
2015                 portMemCopy(pMap + entryOffset, pLevelFmt->entrySize, entryValue.v8, pLevelFmt->entrySize);
2016             }
2017         }
2018     }
2019     else
2020     {
2021         NV_ASSERT(memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM &&
2022                   pKernelBus->virtualBar2[gfid].pPageLevels == NULL);
2023 
2024         // Plain old memmap.
2025         status = memdescMapOld(pMemDesc, 0,
2026                                pMemDesc->Size,
2027                                NV_TRUE, // kernel,
2028                                NV_PROTECT_READ_WRITE,
2029                                (void **)&pMap,
2030                                &pPriv);
2031         NV_ASSERT_OR_RETURN_VOID(NV_OK == status);
2032 
2033         for ( entryIndex = entryIndexLo; entryIndex <= entryIndexHi; entryIndex++ )
2034         {
2035             // Update the PTE with the physical address.
2036             _busWalkCBMapNextEntries_UpdatePhysAddr(pGpu,
2037                                                     &entryValue,
2038                                                     pIter,
2039                                                     pageSize);
2040 
2041             entryOffset = entryIndex * pLevelFmt->entrySize;
2042 
2043             // Memory-mapped write.
2044             portMemCopy(pMap + entryOffset,
2045                         pLevelFmt->entrySize,
2046                         entryValue.v8,
2047                         pLevelFmt->entrySize);
2048         }
2049 
2050         memdescUnmapOld(pMemDesc, 1, 0, pMap, pPriv);
2051     }
2052 
2053     *pProgress = entryIndexHi - entryIndexLo + 1;
2054 }
2055 
2056 /*!
2057  *  @brief Third level of RmAperture support. This routine writes BAR2 PTEs.
2058  *
2059  *  @param[in]   pGpu
2060  *  @param[in]   pKernelBus
2061  *  @param[in]   pMemDesc    The memory area to copy from.
2062  *  @param[in]   vaddr       Offset into bar2 to program
2063  *  @param[in]   vaSize      Amount of VA to write (can be greater than pMemDesc size)
2064  *  @param[in]   flags       Defined by UPDATE_RM_APERTURE_FLAGS_*
2065  *
2066  *  @returns NV_OK on success, failure in some bootstrapping cases.
2067  */
2068 NV_STATUS
2069 kbusUpdateRmAperture_GM107
2070 (
2071     OBJGPU      *pGpu,
2072     KernelBus   *pKernelBus,
2073     PMEMORY_DESCRIPTOR pMemDesc,
2074     NvU64        vaddr,
2075     NvU64        vaSize,
2076     NvU32        flags
2077 )
2078 {
2079     KernelGmmu         *pKernelGmmu  = GPU_GET_KERNEL_GMMU(pGpu);
2080     PMEMORY_DESCRIPTOR  pSubDevMemDesc;
2081     NV_STATUS           status      = NV_OK;
2082     NvBool              bInvalidate = !!(flags & UPDATE_RM_APERTURE_FLAGS_INVALIDATE);
2083     NvBool              bDiscard    = !!(flags & UPDATE_RM_APERTURE_FLAGS_DISCARD);
2084     NvBool              bSparsify   = !!(flags & UPDATE_RM_APERTURE_FLAGS_SPARSIFY);
2085     MMU_MAP_TARGET      mapTarget = {0};
2086     MMU_MAP_ITERATOR    mapIter   = {0};
2087     MMU_WALK_USER_CTX   userCtx   = {0};
2088     DMA_PAGE_ARRAY      pageArray;
2089     NvU64               origVidOffset = 0;
2090     NvU64               vaLo;
2091     NvU64               vaHi;
2092     NvU32               gfid;
2093     const NvU32         pageSize  = FERMI_SMALL_PAGESIZE;
2094     const GMMU_FMT     *pFmt;
2095     ADDRESS_TRANSLATION addressTranslation;
2096     NvBool              bCallingContextPlugin;
2097 
2098     //
2099     // In case of SR-IOV heavy, host RM must update VF BAR2 page tables
2100     // only for CPU invisible range. VF BAR2's CPU visible range is not
2101     // in use on host RM.
2102     //
2103     if (!(flags & UPDATE_RM_APERTURE_FLAGS_CPU_INVISIBLE_RANGE))
2104     {
2105         gfid = GPU_GFID_PF;
2106     }
2107     else
2108     {
2109         NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid));
2110         NV_ASSERT_OK_OR_RETURN(vgpuIsCallingContextPlugin(pGpu, &bCallingContextPlugin));
2111         if (bCallingContextPlugin)
2112         {
2113             gfid = GPU_GFID_PF;
2114         }
2115     }
2116 
2117     pFmt = pKernelBus->bar2[gfid].pFmt;
2118 
2119     // Math below requires page-sized va.
2120     if (vaSize == 0 || vaSize & RM_PAGE_MASK)
2121     {
2122         NV_PRINTF(LEVEL_ERROR, "unsupported VA size (0x%llx)\n", vaSize);
2123         DBG_BREAKPOINT();
2124         return NV_ERR_INVALID_ARGUMENT;
2125     }
2126 
2127     // Currently don't do anything at unmap.
2128     if (bDiscard && !bSparsify)
2129         return NV_OK;
2130 
2131     vaLo = NV_ALIGN_DOWN64(vaddr, pageSize);
2132     vaHi = NV_ALIGN_UP64(vaddr + vaSize, pageSize) - 1;
2133     pSubDevMemDesc = memdescGetMemDescFromGpu(pMemDesc, pGpu);
2134     //
2135     // In case of SR-IOV heavy, host RM updates VF BAR2, so
2136     // if the update is for VF BAR2 (IS_GFID_PF(gfid) is false),
2137     // use GPA, otherwise use SPA.
2138     //
2139     if (IS_GFID_PF(gfid))
2140     {
2141         addressTranslation = FORCE_VMMU_TRANSLATION(pSubDevMemDesc, AT_GPU);
2142     }
2143     else
2144     {
2145         addressTranslation = AT_GPU;
2146     }
2147 
2148     dmaPageArrayInitFromMemDesc(&pageArray, pSubDevMemDesc, addressTranslation);
2149     userCtx.pGpu = pGpu;
2150     userCtx.gfid = gfid;
2151     mmuWalkSetUserCtx(pKernelBus->bar2[gfid].pWalk, &userCtx);
2152 
2153     if (bSparsify)
2154     {
2155         NV_PRINTF(LEVEL_INFO,
2156                   "mmuWalkSparsify pwalk=%p, vaLo=%llx, vaHi = %llx\n",
2157                   pKernelBus->bar2[gfid].pWalk, vaLo, vaHi);
2158 
2159         status = mmuWalkSparsify(pKernelBus->bar2[gfid].pWalk, vaLo, vaHi, NV_FALSE);
2160         if (status != NV_OK)
2161         {
2162             NV_PRINTF(LEVEL_ERROR,
2163                       "mmuWalkSparsify status=%x pwalk=%p, vaLo=%llx, vaHi = %llx\n",
2164                       status, pKernelBus->bar2[gfid].pWalk, vaLo, vaHi);
2165         }
2166     }
2167     else
2168     {
2169         // MMU_MAP_CTX
2170         mapTarget.pLevelFmt      = mmuFmtFindLevelWithPageShift(pFmt->pRoot,
2171                                                                 BIT_IDX_32(pageSize));
2172         mapTarget.pIter          = &mapIter;
2173         mapTarget.MapNextEntries = _kbusWalkCBMapNextEntries_RmAperture;
2174 
2175         // MMU_MAP_ITER
2176         mapIter.pFmt       = pFmt;
2177         mapIter.aperture   = kgmmuGetMemAperture(pKernelGmmu, pMemDesc);
2178         mapIter.pPageArray = &pageArray;
2179 
2180         //
2181         // Setup a template PTE with those values that will not change across
2182         // PTEs during mapping.
2183         //
2184         nvFieldSetBool(&pFmt->pPte->fldValid, NV_TRUE, mapIter.pteTemplate.v8);
2185         if (pFmt->version == GMMU_FMT_VERSION_3)
2186         {
2187             NvU32 ptePcfHw  = 0;
2188             NvU32 ptePcfSw  = 0;
2189 
2190             if (memdescGetVolatility(pMemDesc))
2191             {
2192                 ptePcfSw = 1 << SW_MMU_PCF_UNCACHED_IDX;
2193             }
2194             else
2195             {
2196                 //
2197                 // For internal DMA mappings to sysmem, we should always use
2198                 // GPU-uncached because RM won't invalidate L2 upon unmap
2199                 //
2200                 NV_ASSERT(mapIter.aperture == GMMU_APERTURE_VIDEO);
2201             }
2202 
2203             ptePcfSw |= (1 << SW_MMU_PCF_REGULAR_IDX);
2204 
2205             NV_ASSERT_OR_RETURN(kgmmuTranslatePtePcfFromSw_HAL(pKernelGmmu, ptePcfSw,
2206                 &ptePcfHw) == NV_OK, NV_ERR_INVALID_ARGUMENT);
2207             nvFieldSet32(&pFmt->pPte->fldPtePcf, ptePcfHw, mapIter.pteTemplate.v8);
2208         }
2209         else
2210         {
2211             nvFieldSetBool(&pFmt->pPte->fldVolatile, memdescGetVolatility(pMemDesc), mapIter.pteTemplate.v8);
2212         }
2213 
2214         gmmuFieldSetAperture(&pFmt->pPte->fldAperture,
2215                              mapIter.aperture,
2216                              mapIter.pteTemplate.v8);
2217 
2218         //
2219         // Determine the PTE physical address field to use based on the PTE
2220         // aperture. Physical addresses themselves will get added to the PTE
2221         // during mapping.
2222         //
2223         mapIter.pAddrField =
2224             gmmuFmtPtePhysAddrFld(pFmt->pPte,
2225                                   gmmuFieldGetAperture(
2226                                       &pFmt->pPte->fldAperture,
2227                                       mapIter.pteTemplate.v8));
2228 
2229 
2230         // Write PTE kind.
2231         nvFieldSet32(&pFmt->pPte->fldKind, memdescGetPteKind(pMemDesc),
2232                      mapIter.pteTemplate.v8);
2233 
2234         //
2235         // We haven't yet self-mapped the BAR2 page tables.
2236         // This call is to do the same.
2237         // So keep BAR2 in bootstrap mode to allow BAR0 window updates.
2238         //
2239         if ((ADDR_FBMEM == pKernelBus->PDEBAR2Aperture ||
2240              ADDR_FBMEM == pKernelBus->PTEBAR2Aperture) &&
2241              !kbusIsPhysicalBar2InitPagetableEnabled(pKernelBus) &&
2242              pKernelBus->virtualBar2[gfid].pPageLevels == NULL && IS_GFID_PF(gfid))
2243         {
2244             status = kbusSetupBar0WindowBeforeBar2Bootstrap_HAL(pGpu, pKernelBus, &origVidOffset);
2245             NV_ASSERT_OR_RETURN(NV_OK == status, status);
2246         }
2247         status = mmuWalkMap(pKernelBus->bar2[gfid].pWalk, vaLo, vaHi, &mapTarget);
2248         NV_ASSERT(NV_OK == status);
2249     }
2250 
2251     mmuWalkSetUserCtx(pKernelBus->bar2[gfid].pWalk, NULL);
2252 
2253     if (pKernelBus->bar2[gfid].bBootstrap &&
2254         !kbusIsPhysicalBar2InitPagetableEnabled(pKernelBus))
2255     {
2256         kbusRestoreBar0WindowAfterBar2Bootstrap_HAL(pGpu, pKernelBus, origVidOffset);
2257     }
2258 
2259     //
2260     // Synchronize BAR2 address space to memory and then invalidate TLB
2261     // to invalidate any cached PTEs.
2262     //
2263     if (bInvalidate)
2264     {
2265         osFlushCpuWriteCombineBuffer();
2266 
2267         // PCIE_READ kbusFlush is more efficient and preferred.  When not ready, use kbusSendSysmembar().
2268         if (pKernelBus->pReadToFlush != NULL)
2269         {
2270             NvU32 flushFlag = BUS_FLUSH_USE_PCIE_READ |
2271                               kbusGetFlushAperture(pKernelBus,
2272                                                    memdescGetAddressSpace(pKernelBus->virtualBar2[gfid].pPTEMemDesc));
2273             kbusFlush_HAL(pGpu, pKernelBus, flushFlag);
2274         }
2275         else
2276         {
2277             kbusSendSysmembar(pGpu, pKernelBus);
2278         }
2279 
2280         SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY)
2281         pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
2282         pKernelBus  = GPU_GET_KERNEL_BUS(pGpu);
2283         kgmmuInvalidateTlb_HAL(pGpu, pKernelGmmu,
2284                               pKernelBus->virtualBar2[gfid].pPDB,
2285                               pKernelBus->virtualBar2[gfid].flags,
2286                               PTE_DOWNGRADE, 0,
2287                               NV_GMMU_INVAL_SCOPE_NON_LINK_TLBS);
2288         SLI_LOOP_END
2289         pKernelBus  = GPU_GET_KERNEL_BUS(pGpu);
2290     }
2291 
2292     return status;
2293 }
2294 
2295 /**
2296  * @brief This function is used to return the BAR1 VA space.
2297  *        BAR1 VA space per-GPU, no longer shared
2298  */
2299 OBJVASPACE *kbusGetBar1VASpace_GM107(OBJGPU *pGpu, KernelBus *pKernelBus)
2300 {
2301     NvU32             gfid;
2302     NvBool            bCallingContextPlugin;
2303 
2304     NV_ASSERT_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid) == NV_OK, NULL);
2305     NV_ASSERT_OR_RETURN(vgpuIsCallingContextPlugin(pGpu, &bCallingContextPlugin) == NV_OK, NULL);
2306     if (bCallingContextPlugin || !gpuIsWarBug200577889SriovHeavyEnabled(pGpu))
2307     {
2308         gfid = GPU_GFID_PF;
2309     }
2310 
2311     return pKernelBus->bar1[gfid].pVAS;
2312 }
2313 
2314 static NV_STATUS
2315 _kbusUpdateDebugStatistics(OBJGPU *pGpu)
2316 {
2317     KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
2318     OBJVASPACE *pBar1VAS = kbusGetBar1VASpace_HAL(pGpu, pKernelBus);
2319     OBJEHEAP *pVASHeap;
2320     NV00DE_SHARED_DATA *pSharedData = gpushareddataWriteStart(pGpu);
2321     NV_RANGE bar1VARange = NV_RANGE_EMPTY;
2322 
2323     pVASHeap = vaspaceGetHeap(pBar1VAS);
2324     bar1VARange = rangeMake(vaspaceGetVaStart(pBar1VAS), vaspaceGetVaLimit(pBar1VAS));
2325 
2326     pSharedData->bar1Size = (NvU32)(rangeLength(bar1VARange) / 1024);
2327     pSharedData->bar1AvailSize = 0;
2328 
2329     if (pVASHeap != NULL)
2330     {
2331         NvU64 freeSize = 0;
2332 
2333         pVASHeap->eheapInfoForRange(pVASHeap, bar1VARange, NULL, NULL, NULL, &freeSize);
2334         pSharedData->bar1AvailSize = (NvU32)(freeSize / 1024);
2335     }
2336 
2337     gpushareddataWriteFinish(pGpu);
2338 
2339     return NV_OK;
2340 }
2341 
2342 NV_STATUS
2343 kbusMapFbAperture_GM107
2344 (
2345     OBJGPU     *pGpu,
2346     KernelBus  *pKernelBus,
2347     MEMORY_DESCRIPTOR *pMemDesc,
2348     NvU64       offset,
2349     NvU64      *pAperOffset,
2350     NvU64      *pLength,
2351     NvU32       flags,
2352     NvHandle    hClient
2353 )
2354 {
2355     NvBool           bBcState = gpumgrGetBcEnabledStatus(pGpu);
2356     OBJVASPACE      *pVAS;
2357     NV_STATUS        rmStatus   = NV_OK;
2358     NV_STATUS        failStatus = NV_OK;
2359     OBJGPU          *pLoopGpu   = NULL;
2360     NvU64            newAperOffset = 0;
2361     // Track which gpus have mapped so we can free in case of error
2362     NvU32            gpuMappingSuccessMask = 0;
2363 
2364     NV_ASSERT((flags & BUS_MAP_FB_FLAGS_FERMI_INVALID) == 0);
2365 
2366     pVAS = kbusGetBar1VASpace_HAL(pGpu, pKernelBus);
2367 
2368     // Set BC to enabled in UC flag not passed
2369     if ((IsSLIEnabled(pGpu) && ((flags & BUS_MAP_FB_FLAGS_MAP_UNICAST) == 0)) &&
2370         ((flags & BUS_MAP_FB_FLAGS_PRE_INIT) == 0))
2371     {
2372         gpumgrSetBcEnabledStatus(pGpu, NV_TRUE);
2373         flags |= BUS_MAP_FB_FLAGS_MAP_UNICAST;
2374     }
2375     else
2376     {
2377         gpumgrSetBcEnabledStatus(pGpu, NV_FALSE);
2378     }
2379 
2380     // Call _kbusMapAperture_GM107 multiple times in UC for BC mapping
2381     SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY)
2382     {
2383         pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
2384         pLoopGpu = pGpu;
2385 
2386         pVAS = kbusGetBar1VASpace_HAL(pGpu, pKernelBus);
2387         rmStatus = _kbusMapAperture_GM107(pGpu, pMemDesc,
2388                                           pVAS, offset, pAperOffset,
2389                                           pLength, flags, hClient);
2390 
2391         //
2392         // Ensure that all returned VA offsets are the same on each GPU
2393         // The _OFFSET_FIXED flag ensures this is true unless one GPU has
2394         // no free extent starting at the bar1 vAddr mapped by the parent
2395         // GPU.
2396         //
2397         // This can and should be updated later to enable multiple Bar1 vAddr
2398         // returns. The client functions must then be updated to handle
2399         // multiple returns, and the OFFSET_FIXED flag can be removed from here
2400         // and /resman/kernel/inc/gpu/bus/kern_bus.h.
2401         //
2402         if (gpuMappingSuccessMask == 0)
2403         {
2404             newAperOffset = *pAperOffset;
2405             flags |= BUS_MAP_FB_FLAGS_MAP_OFFSET_FIXED;
2406         }
2407         else
2408         {
2409             NV_ASSERT(newAperOffset == *pAperOffset);
2410         }
2411 
2412         if (rmStatus != NV_OK)
2413         {
2414             SLI_LOOP_BREAK;
2415         }
2416         gpuMappingSuccessMask |= pGpu->gpuInstance;
2417     }
2418     SLI_LOOP_END
2419 
2420     gpumgrSetBcEnabledStatus(pGpu, bBcState);
2421 
2422     if (rmStatus == NV_OK)
2423     {
2424         _kbusUpdateDebugStatistics(pGpu);
2425         return rmStatus;
2426     }
2427 
2428     NV_PRINTF(LEVEL_ERROR,
2429               "Failed: [GPU%u] Could not map pAperOffset: 0x%llx\n",
2430               pLoopGpu->gpuInstance, newAperOffset);
2431 
2432     // Unmap mapped addresses after BC mapping failure in SLI
2433     SLI_LOOP_START(SLI_LOOP_FLAGS_NONE)
2434     {
2435         if ((NVBIT(pGpu->gpuInstance) & gpuMappingSuccessMask) == 0)
2436         {
2437             SLI_LOOP_CONTINUE;
2438         }
2439         pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
2440         failStatus = kbusUnmapFbAperture_HAL(pGpu, pKernelBus,
2441                                              pMemDesc, newAperOffset,
2442                                              *pLength,
2443                                              BUS_MAP_FB_FLAGS_MAP_UNICAST);
2444         // Failure to unmap mapped address
2445         if (failStatus != NV_OK)
2446         {
2447             NV_PRINTF(LEVEL_ERROR,
2448                       "[GPU%u] Could not unmap on failure to map Bar1\n",
2449                       pGpu->gpuInstance);
2450         }
2451     }
2452     SLI_LOOP_END
2453 
2454     return rmStatus;
2455 }
2456 
2457 NV_STATUS
2458 kbusUnmapFbAperture_GM107
2459 (
2460     OBJGPU     *pGpu,
2461     KernelBus  *pKernelBus,
2462     MEMORY_DESCRIPTOR *pMemDesc,
2463     NvU64       aperOffset,
2464     NvU64       length,
2465     NvU32       flags
2466 )
2467 {
2468     NV_STATUS       rmStatus    = NV_OK;
2469     NvBool          bBcState    = gpumgrGetBcEnabledStatus(pGpu);
2470     OBJVASPACE     *pVAS        = NULL;
2471     OBJGPU         *pLoopGpu    = NULL;
2472 
2473     NV_ASSERT(pMemDesc);
2474 
2475     aperOffset &= ~RM_PAGE_MASK;
2476 
2477     // Set BC to enabled if UC flag not passed
2478     if ((IsSLIEnabled(pGpu) && ((flags & BUS_MAP_FB_FLAGS_MAP_UNICAST) == 0)) &&
2479         ((flags & BUS_MAP_FB_FLAGS_PRE_INIT) == 0))
2480     {
2481         gpumgrSetBcEnabledStatus(pGpu, NV_TRUE);
2482     }
2483     else
2484     {
2485         gpumgrSetBcEnabledStatus(pGpu, NV_FALSE);
2486     }
2487 
2488     // Call _kbusUnmapAperture_GM107 in UC for each GPU when BC is called
2489     SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY)
2490     {
2491         pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
2492         pLoopGpu = pGpu;
2493         pVAS = kbusGetBar1VASpace_HAL(pGpu, pKernelBus);
2494 
2495         if (pVAS == NULL)
2496         {
2497             rmStatus = NV_ERR_GENERIC;
2498             SLI_LOOP_BREAK;
2499         }
2500         memdescFlushCpuCaches(pGpu, pMemDesc);
2501         rmStatus = _kbusUnmapAperture_GM107(pGpu, pVAS, pMemDesc, aperOffset);
2502 
2503         if (rmStatus != NV_OK)
2504         {
2505             SLI_LOOP_BREAK;
2506         }
2507     }
2508     SLI_LOOP_END
2509 
2510     _kbusUpdateDebugStatistics(pGpu);
2511 
2512     if (rmStatus == NV_OK)
2513     {
2514         NV_PRINTF(LEVEL_INFO,
2515                   "unmapped BAR1 offset 0x%llx\n",
2516                   aperOffset);
2517     }
2518     else
2519     {
2520         NV_PRINTF(LEVEL_ERROR, "[GPU%u] Unable to unmap aperOffset: 0x%llx\n",
2521                   pLoopGpu->gpuInstance, aperOffset);
2522     }
2523 
2524     gpumgrSetBcEnabledStatus(pGpu, bBcState);
2525 
2526     return rmStatus;
2527 }
2528 
2529 /*!
2530  * @brief Lower level FB flush to push pending writes to FB/sysmem
2531  *
2532  * NOTE: Must be called inside a SLI loop
2533  *
2534  * @param[in]   pGpu
2535  * @param[in]   KernelBus
2536  * @param[in]   flags   Flags to indicate aperture and other behaviors
2537  * @return      NV_OK on success
2538  *
2539  */
2540 NV_STATUS
2541 kbusFlushSingle_GM107
2542 (
2543     OBJGPU      *pGpu,
2544     KernelBus   *pKernelBus,
2545     NvU32        flags
2546 )
2547 {
2548     NvBool  bCoherentCpuMapping = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING);
2549 
2550     //
2551     // Nothing to be done in the guest in the paravirtualization case or
2552     // if guest is running in SRIOV heavy mode.
2553     //
2554     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
2555         (IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))
2556     {
2557         return NV_OK;
2558     }
2559 
2560     if (bCoherentCpuMapping)
2561     {
2562         //
2563         // This function issues an HWSYNC. This is needed for synchronizing read/writes
2564         // with NVLINK mappings.
2565         //
2566         portAtomicMemoryFenceFull();
2567         return NV_OK;
2568     }
2569 
2570     if (flags & BUS_FLUSH_SYSTEM_MEMORY)
2571     {
2572         portAtomicMemoryFenceFull();
2573     }
2574 
2575     if (API_GPU_IN_RESET_SANITY_CHECK(pGpu) || API_GPU_IN_RECOVERY_SANITY_CHECK(pGpu) ||
2576         !API_GPU_ATTACHED_SANITY_CHECK(pGpu))
2577     {
2578         //
2579         // When the GPU is in full chip reset or lost
2580         // We cannot expect to flush successfully so early return here
2581         //
2582         return NV_OK;
2583     }
2584 
2585     if (flags & BUS_FLUSH_VIDEO_MEMORY)
2586     {
2587         //
2588         // Read the FB address 0 in order to trigger a flush.
2589         // This will not work with reflected mappings so only enable on VOLTA+
2590         // Note SRIOV guest does not have access to uflush register.
2591         //
2592         // TODO: remove the BUS_FLUSH_USE_PCIE_READ flag from RM and do this
2593         // everywhere since it's faster than uflush.
2594         //
2595         if (IS_VIRTUAL(pGpu) ||
2596             (kbusIsReadCpuPointerToFlushEnabled(pKernelBus) &&
2597              (flags & BUS_FLUSH_USE_PCIE_READ)))
2598         {
2599             volatile NvU32 data;
2600             NV_ASSERT(pKernelBus->pReadToFlush != NULL || pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping != NULL);
2601 
2602             if (pKernelBus->pReadToFlush != NULL)
2603             {
2604                 data = MEM_RD32(pKernelBus->pReadToFlush);
2605             }
2606             else if (pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping != NULL)
2607             {
2608                 //
2609                 // pReadToFlush is still not ready for use. So, use pCpuMapping
2610                 // instead which should already be mapped to FB addr 0 as
2611                 // BAR2 is in physical mode right now.
2612                 //
2613                 data = MEM_RD32(pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping);
2614             }
2615             (void) data;
2616             return NV_OK;
2617         }
2618         else
2619         {
2620             if (IS_GSP_CLIENT(pGpu))
2621             {
2622                 //
2623                 // on GSP client, we should use PCIE_READ to do video memory flush.
2624                 // A sysmembar flush that touches registers is done through RPC and has
2625                 // lower effeciency.  For cases where it needs sysmembar, the caller site
2626                 // should use kbusSendSysmembarSingle_HAL explicitly.
2627                 //
2628                 NV_ASSERT(0);
2629 
2630                 // This will dump a stack trace to assist debug on certain
2631                 // platforms.
2632                 osAssertFailed();
2633             }
2634 
2635             return kbusSendSysmembarSingle_HAL(pGpu, pKernelBus);
2636         }
2637     }
2638 
2639     return NV_OK;
2640 }
2641 
2642 /*!
2643  * @brief Properly flush written PDEs, PTEs, or other
2644  * instance memory data or context buffers. See bug 152868
2645  *
2646  * NOTE: Must call kbusFlush BEFORE any calls to busInvalidate
2647  *
2648  * @param[in] pGpu
2649  * @param[in] pKernelBus
2650  * @param[in] flags     NvU32 flags to indicate flush behavior
2651  *
2652  */
2653 NV_STATUS
2654 kbusFlush_GM107(OBJGPU *pGpu, KernelBus *pKernelBus, NvU32 flags)
2655 {
2656     NV_STATUS           status  = NV_OK;
2657 
2658     // Nothing to be done in guest in the paravirtualization case.
2659     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu))
2660     {
2661         return NV_OK;
2662     }
2663 
2664     if (kbusIsFbFlushDisabled(pKernelBus))
2665     {
2666         // Eliminate FB flushes, but keep mmu invalidates
2667         NV_PRINTF(LEVEL_INFO, "disable_fb_flush flag, skipping flush.\n");
2668         return status;
2669     }
2670 
2671     // Wait for the flush to flow through
2672     SLI_LOOP_START(SLI_LOOP_FLAGS_BC_ONLY | SLI_LOOP_FLAGS_IGNORE_REENTRANCY);
2673         pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
2674         if (kbusFlushSingle_HAL(pGpu, pKernelBus, flags) == NV_ERR_TIMEOUT)
2675         {
2676             status = NV_ERR_TIMEOUT;
2677         }
2678     SLI_LOOP_END;
2679     pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
2680 
2681     return status;
2682 }
2683 
2684 //
2685 // _kbusMapAperture_GM107
2686 // Helper function: Given offset and range, alloc VA address space and update it.
2687 //
2688 NV_STATUS
2689 _kbusMapAperture_GM107
2690 (
2691     OBJGPU            *pGpu,
2692     MEMORY_DESCRIPTOR *pMemDesc,
2693     OBJVASPACE        *pVAS,
2694     NvU64              offset,
2695     NvU64             *pAperOffset,
2696     NvU64             *pLength,
2697     NvU32              mapFlags,
2698     NvHandle           hClient
2699 )
2700 {
2701     NV_STATUS           rmStatus = NV_ERR_GENERIC;
2702     VirtMemAllocator   *pDma;
2703     NvBool              bBcState = gpumgrGetBcEnabledStatus(pGpu);
2704     NvU32               flags = DRF_DEF(OS46, _FLAGS, _DMA_UNICAST_REUSE_ALLOC, _FALSE);
2705     MEMORY_DESCRIPTOR  *pTempMemDesc;
2706     NvU32               swizzId = KMIGMGR_SWIZZID_INVALID;
2707 
2708     // Ensure that the BAR1 VA space is the same across all subdevices
2709     if (IsSLIEnabled(pGpu) && ((mapFlags & BUS_MAP_FB_FLAGS_MAP_UNICAST) == 0))
2710     {
2711         pGpu  = gpumgrGetParentGPU(pGpu);
2712         gpumgrSetBcEnabledStatus(pGpu, NV_TRUE);
2713     }
2714 
2715     if (mapFlags & BUS_MAP_FB_FLAGS_MAP_OFFSET_FIXED)
2716     {
2717         flags = FLD_SET_DRF(OS46, _FLAGS, _DMA_OFFSET_FIXED, _TRUE, flags);
2718     }
2719 
2720     pDma  = GPU_GET_DMA(pGpu);
2721 
2722     //
2723     // Valid client handle should be associated with a BAR1 mapping request if SMC memory
2724     // partitioning is enabled. That's because BAR1 VA space is split among SMC partitions.
2725     //
2726     // Internal allocations like RM allocated USERD which require BAR1 mapping are done during RM init
2727     // before SMC is enabled and BAR1 VA space is split. So they should work despite not having
2728     // an associated hClient and also such BAR VA space allocations should happen before BAR1 is split.
2729     //
2730     if (IS_MIG_IN_USE(pGpu))
2731     {
2732         MIG_INSTANCE_REF ref;
2733         KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
2734 
2735         NV_ASSERT_OR_RETURN(hClient != NV01_NULL_OBJECT, NV_ERR_INVALID_ARGUMENT);
2736         NV_ASSERT_OK_OR_RETURN(kmigmgrGetInstanceRefFromClient(pGpu, pKernelMIGManager,
2737                                    hClient, &ref));
2738         swizzId = ref.pKernelMIGGpuInstance->swizzId;
2739     }
2740 
2741     if (memdescGetCpuCacheAttrib(pMemDesc) == NV_MEMORY_CACHED)
2742     {
2743         flags = FLD_SET_DRF(OS46, _FLAGS, _CACHE_SNOOP, _ENABLE, flags);
2744     }
2745 
2746     if (mapFlags & BUS_MAP_FB_FLAGS_MAP_DOWNWARDS)
2747     {
2748         flags = FLD_SET_DRF(OS46, _FLAGS, _DMA_OFFSET_GROWS, _DOWN, flags);
2749     }
2750 
2751     // Disable the encryption if DIRECT mapping is requested, currently it is just for testing purpose
2752     if (mapFlags & BUS_MAP_FB_FLAGS_DISABLE_ENCRYPTION)
2753     {
2754         // !!!! Nasty hack
2755         //
2756         // NVOS46_FLAGS_PTE_COALESCE_LEVEL_CAP used to convey the encryption info to dmaAllocMapping_HAL().
2757         // Since we have no bit fields left in NVOS46_FLAGS_* to specify encryption info.
2758         // This is applicable to FERMI+ chips.
2759         //
2760         // NVOS46_FLAGS_PTE_COALESCE_LEVEL_CAP is _NV50 specific, and is not used in FERMI+.
2761         // NVOS46_FLAGS_PTE_COALESCE_LEVEL_CAP_DEFAULT means use default encryption status
2762         // NVOS46_FLAGS_PTE_COALESCE_LEVEL_CAP_1       means disable encryption
2763         flags = FLD_SET_DRF(OS46, _FLAGS, _PTE_COALESCE_LEVEL_CAP, _1, flags);
2764     }
2765 
2766     NV_ASSERT(!((mapFlags & BUS_MAP_FB_FLAGS_READ_ONLY) &&
2767                 (mapFlags & BUS_MAP_FB_FLAGS_WRITE_ONLY)));
2768     if (mapFlags & BUS_MAP_FB_FLAGS_READ_ONLY)
2769     {
2770         flags = FLD_SET_DRF(OS46, _FLAGS, _ACCESS, _READ_ONLY, flags);
2771     }
2772     else if (mapFlags & BUS_MAP_FB_FLAGS_WRITE_ONLY)
2773     {
2774         flags = FLD_SET_DRF(OS46, _FLAGS, _ACCESS, _WRITE_ONLY, flags);
2775     }
2776 
2777     rmStatus = memdescCreateSubMem(&pTempMemDesc, pMemDesc, pGpu, offset, *pLength);
2778     if (NV_OK == rmStatus)
2779     {
2780         rmStatus = dmaAllocMapping_HAL(pGpu, pDma, pVAS, pTempMemDesc, pAperOffset, flags, NULL, swizzId);
2781         memdescFree(pTempMemDesc);
2782         memdescDestroy(pTempMemDesc);
2783     }
2784 
2785     gpumgrSetBcEnabledStatus(pGpu, bBcState);
2786 
2787     return rmStatus;
2788 }
2789 
2790 //
2791 // _kbusUnmapAperture_GM107
2792 // Helper function: Given offset and range, free VA address space.
2793 //
2794 NV_STATUS
2795 _kbusUnmapAperture_GM107
2796 (
2797     OBJGPU            *pGpu,
2798     OBJVASPACE        *pVAS,
2799     MEMORY_DESCRIPTOR *pMemDesc,
2800     NvU64              aperOffset
2801 )
2802 {
2803     NV_STATUS           rmStatus = NV_OK;
2804     VirtMemAllocator   *pDma = GPU_GET_DMA(pGpu);
2805 
2806     rmStatus = dmaFreeMapping_HAL(pGpu, pDma, pVAS, aperOffset, pMemDesc, 0, NULL);
2807 
2808     return rmStatus;
2809 }
2810 
2811 NV_STATUS
2812 _kbusInitP2P_GM107
2813 (
2814     OBJGPU    *pGpu,
2815     KernelBus *pKernelBusUnused
2816 )
2817 {
2818     NV_STATUS status = NV_OK;
2819     KernelBus *pLocalKernelBus;
2820     KernelBus *pRemoteKernelBus;
2821 
2822     NvU32 deviceInstance, gpuMask;
2823     OBJGPU *pLocalGpu, *pRemoteGpu;
2824     NvU32 localGpuInstance, remoteGpuInstance;
2825     NvU32 localPeerIndex, remotePeerIndex, localPeerCount, remotePeerCount;
2826     NvU32 numSubdevices;
2827 
2828     deviceInstance = gpuGetDeviceInstance(pGpu);
2829     gpuMask = gpumgrGetDeviceGpuMask(deviceInstance);
2830     numSubdevices = gpumgrGetSubDeviceCount(gpuMask);
2831 
2832     if ((numSubdevices < 1) || (numSubdevices > P2P_MAX_NUM_PEERS))
2833     {
2834         NV_PRINTF(LEVEL_ERROR,
2835                   "Fermi only supports P2P with up to 8 subdevices in SLI configuration.\n");
2836         return NV_ERR_GENERIC;
2837     }
2838 
2839     // Link all the GPUs.
2840     localGpuInstance = 0;
2841     localPeerIndex = 0;
2842     localPeerCount = 0;
2843 
2844     while ((pLocalGpu = gpumgrGetNextGpu(gpuMask, &localGpuInstance)) != NULL)
2845     {
2846         pLocalKernelBus = GPU_GET_KERNEL_BUS(pLocalGpu);
2847 
2848         remoteGpuInstance = localGpuInstance;
2849         remotePeerIndex = localPeerIndex + 1;
2850         remotePeerCount = 0;
2851 
2852         while ((pRemoteGpu = gpumgrGetNextGpu(gpuMask, &remoteGpuInstance)) != NULL)
2853         {
2854             NvU32 locPeerId;
2855             NvU32 remPeerId;
2856 
2857             NV_ASSERT(localPeerIndex != remotePeerIndex);
2858             NV_ASSERT((localPeerCount < P2P_MAX_NUM_PEERS) &&
2859                       (remotePeerCount < P2P_MAX_NUM_PEERS));
2860 
2861             pRemoteKernelBus  = GPU_GET_KERNEL_BUS(pRemoteGpu);
2862 
2863             locPeerId = kbusGetPeerIdFromTable_HAL(pLocalGpu, pLocalKernelBus,
2864                                                   localPeerIndex, remotePeerIndex);
2865             remPeerId = kbusGetPeerIdFromTable_HAL(pRemoteGpu, pRemoteKernelBus,
2866                                                   remotePeerIndex, localPeerIndex);
2867 
2868             NV_ASSERT((locPeerId < P2P_MAX_NUM_PEERS) &&
2869                       (remPeerId < P2P_MAX_NUM_PEERS));
2870 
2871             pLocalKernelBus->p2pPcie.peerNumberMask[pRemoteGpu->gpuInstance] |=
2872                 NVBIT(locPeerId);
2873             pRemoteKernelBus->p2pPcie.peerNumberMask[pLocalGpu->gpuInstance] |=
2874                 NVBIT(remPeerId);
2875 
2876             pLocalKernelBus->p2pPcie.busPeer[locPeerId].refCount++;
2877             pLocalKernelBus->p2pPcie.busPeer[locPeerId].remotePeerId = remPeerId;
2878             pRemoteKernelBus->p2pPcie.busPeer[remPeerId].refCount++;
2879             pRemoteKernelBus->p2pPcie.busPeer[remPeerId].remotePeerId = locPeerId;
2880 
2881             remotePeerIndex++;
2882             remotePeerCount++;
2883         }
2884 
2885         pLocalKernelBus->bP2pInitialized = NV_TRUE;
2886 
2887         localPeerIndex++;
2888         localPeerCount++;
2889     }
2890 
2891     return status;
2892 }
2893 
2894 NV_STATUS
2895 _kbusDestroyP2P_GM107
2896 (
2897     OBJGPU    *pGpu,
2898     KernelBus *pKernelBus
2899 )
2900 {
2901     NV_STATUS status = NV_OK;
2902 
2903     OBJGPU *pRemoteGpu;
2904     KernelBus *pRemoteKernelBus;
2905     NvU32 i;
2906 
2907 
2908     // Clear all peer numbers.
2909     for (i = 0; i < NV_MAX_DEVICES; i++)
2910     {
2911         if (pKernelBus->p2pPcie.peerNumberMask[i] != 0)
2912         {
2913             NvU32 locPeerId, remPeerId, gpuInst;
2914 
2915             pRemoteGpu = gpumgrGetGpu(i);
2916             NV_ASSERT_OR_RETURN(pRemoteGpu != NULL, NV_ERR_INVALID_STATE);
2917             pRemoteKernelBus = GPU_GET_KERNEL_BUS(pRemoteGpu);
2918             locPeerId = kbusGetPeerId_HAL(pGpu, pKernelBus, pRemoteGpu);
2919             remPeerId = kbusGetPeerId_HAL(pRemoteGpu, pRemoteKernelBus, pGpu);
2920 
2921             NV_ASSERT_OR_RETURN(locPeerId < P2P_MAX_NUM_PEERS,
2922                               NV_ERR_INVALID_STATE);
2923             NV_ASSERT_OR_RETURN(remPeerId < P2P_MAX_NUM_PEERS,
2924                               NV_ERR_INVALID_STATE);
2925             NV_ASSERT_OR_RETURN(pRemoteKernelBus->p2pPcie.busPeer[remPeerId].remotePeerId == locPeerId,
2926                               NV_ERR_INVALID_STATE);
2927 
2928             pKernelBus->p2pPcie.busPeer[locPeerId].refCount--;
2929             pRemoteKernelBus->p2pPcie.busPeer[remPeerId].refCount--;
2930 
2931             gpuInst = gpuGetInstance(pGpu);
2932             pKernelBus->p2pPcie.peerNumberMask[i] &= ~NVBIT(locPeerId);
2933             pRemoteKernelBus->p2pPcie.peerNumberMask[gpuInst] &= ~NVBIT(remPeerId);
2934 
2935             // That should have been the only peer ID associated with the remote
2936             NV_ASSERT(pKernelBus->p2pPcie.peerNumberMask[i] == 0);
2937             NV_ASSERT(pRemoteKernelBus->p2pPcie.peerNumberMask[gpuInst] == 0);
2938         }
2939 
2940         // Clear NVlink related data structures as well.
2941         if (kbusGetNvlinkPeerNumberMask_HAL(pGpu, pKernelBus, i) != 0)
2942         {
2943             NvU32 locPeerId, remPeerId, gpuInst;
2944 
2945             pRemoteGpu = gpumgrGetGpu(i);
2946             NV_ASSERT_OR_RETURN(pRemoteGpu != NULL, NV_ERR_INVALID_STATE);
2947             pRemoteKernelBus = GPU_GET_KERNEL_BUS(pRemoteGpu);
2948             locPeerId = kbusGetPeerId_HAL(pGpu, pKernelBus, pRemoteGpu);
2949             remPeerId = kbusGetPeerId_HAL(pRemoteGpu, pRemoteKernelBus, pGpu);
2950             gpuInst = gpuGetInstance(pGpu);
2951 
2952             NV_ASSERT_OR_RETURN(locPeerId < P2P_MAX_NUM_PEERS,
2953                               NV_ERR_INVALID_STATE);
2954             NV_ASSERT_OR_RETURN(remPeerId < P2P_MAX_NUM_PEERS,
2955                               NV_ERR_INVALID_STATE);
2956 
2957             pKernelBus->p2p.busNvlinkMappingRefcountPerGpu[i]--;
2958             pRemoteKernelBus->p2p.busNvlinkMappingRefcountPerGpu[gpuInst]--;
2959             pKernelBus->p2p.busNvlinkPeerNumberMask[i] &= ~NVBIT(locPeerId);
2960             pRemoteKernelBus->p2p.busNvlinkPeerNumberMask[gpuInst] &= ~NVBIT(remPeerId);
2961             pKernelBus->p2p.busNvlinkMappingRefcountPerPeerId[locPeerId]--;
2962             pRemoteKernelBus->p2p.busNvlinkMappingRefcountPerPeerId[remPeerId]--;
2963         }
2964     }
2965 
2966     for (i = 0; i < P2P_MAX_NUM_PEERS; ++i)
2967     {
2968         if (pKernelBus->p2pPcie.busPeer[i].refCount)
2969         {
2970             NV_PRINTF(LEVEL_ERROR,
2971                       "non-zero peer refcount(%d) on GPU 0x%x peer %d\n",
2972                       pKernelBus->p2pPcie.busPeer[i].refCount, pGpu->gpuInstance, i);
2973         }
2974         pKernelBus->p2pPcie.busPeer[i].refCount = 0;
2975     }
2976 
2977     pKernelBus->bP2pInitialized = NV_FALSE;
2978 
2979     return status;
2980 }
2981 
2982 
2983 //
2984 // Link P2P for all GPUs
2985 //
2986 void
2987 _kbusLinkP2P_GM107
2988 (
2989     OBJGPU    *pGpu,
2990     KernelBus *pKernelBus
2991 )
2992 {
2993     OBJGPU     *pRemoteGpu;
2994     NV_STATUS   status;
2995     NvU32       i;
2996 
2997     for ( i = 0; i < NV_MAX_DEVICES; ++i)
2998     {
2999         if ((pKernelBus->p2pPcie.peerNumberMask[i] != 0) ||
3000             (kbusGetNvlinkPeerNumberMask_HAL(pGpu, pKernelBus, i) != 0))
3001         {
3002             pRemoteGpu = gpumgrGetGpu(i);
3003             NV_ASSERT(pRemoteGpu != NULL);
3004 
3005             //
3006             // If there is a loopback mapping pRemoteGpu will return !fullPower
3007             // since we are currently in the process of resuming it.
3008             // Therefore, we special case it and restore the mapping anyways.
3009             //
3010             if (gpuIsGpuFullPower(pRemoteGpu) ||
3011                     pRemoteGpu == pGpu)
3012             {
3013                 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu);
3014                 KernelNvlink *pRemoteKernelNvlink = GPU_GET_KERNEL_NVLINK(pRemoteGpu);
3015                 NvU32 locPeerId = kbusGetPeerId_HAL(pGpu, pKernelBus, pRemoteGpu);
3016                 NvU32 remPeerId = kbusGetPeerId_HAL(pRemoteGpu, GPU_GET_KERNEL_BUS(pRemoteGpu), pGpu);
3017 
3018                 NV_ASSERT(locPeerId < P2P_MAX_NUM_PEERS);
3019                 NV_ASSERT(remPeerId < P2P_MAX_NUM_PEERS);
3020                 NV_ASSERT(pKernelBus->p2pPcie.busPeer[locPeerId].remotePeerId == remPeerId);
3021 
3022                 if ((pKernelNvlink != NULL) && (pRemoteKernelNvlink != NULL) &&
3023                     (knvlinkGetP2pConnectionStatus(pGpu, pKernelNvlink, pRemoteGpu) == NV_OK))
3024                 {
3025                     //
3026                     // These variables should only be updated for RM Managed P2P.
3027                     // And only once during RmInit, not during resume as while
3028                     // going to S3/S4, these variables are not cleared.
3029                     //
3030                     if (!kbusIsP2pMailboxClientAllocated(pKernelBus) &&
3031                         !pGpu->getProperty(pGpu, PDB_PROP_GPU_IN_PM_CODEPATH))
3032                     {
3033                         KernelBus *pRemoteKernelBus = GPU_GET_KERNEL_BUS(pRemoteGpu);
3034 
3035                         pKernelBus->p2p.busNvlinkPeerNumberMask[pRemoteGpu->gpuInstance] |=
3036                             NVBIT(locPeerId);
3037                         pRemoteKernelBus->p2p.busNvlinkPeerNumberMask[pGpu->gpuInstance] |=
3038                             NVBIT(remPeerId);
3039                         pKernelBus->p2p.busNvlinkMappingRefcountPerGpu[pRemoteGpu->gpuInstance]++;
3040                         pRemoteKernelBus->p2p.busNvlinkMappingRefcountPerGpu[pGpu->gpuInstance]++;
3041                         pKernelBus->p2p.busNvlinkMappingRefcountPerPeerId[locPeerId]++;
3042                         pRemoteKernelBus->p2p.busNvlinkMappingRefcountPerPeerId[remPeerId]++;
3043                     }
3044 
3045                     // Train the links to ACTIVE
3046                     if ((knvlinkTrainP2pLinksToActive(pGpu, pRemoteGpu, pKernelNvlink)) != NV_OK)
3047                     {
3048                         NV_ASSERT(0);
3049                     }
3050 
3051                     // Use NVLINK if available
3052                     knvlinkSetupPeerMapping_HAL(pGpu, pKernelNvlink, pRemoteGpu, locPeerId);
3053                     knvlinkSetupPeerMapping_HAL(pRemoteGpu, pRemoteKernelNvlink, pGpu, remPeerId);
3054                 }
3055                 else
3056                 {
3057                     RM_API *pRmApi;
3058                     NV2080_CTRL_INTERNAL_HSHUB_PEER_CONN_CONFIG_PARAMS params;
3059 
3060                     //
3061                     // Fall back to PCIe otherwise
3062                     // We only expect one PCIE peer ID per remote GPU for SLI
3063                     //
3064                     NV_ASSERT(nvPopCount32(pKernelBus->p2pPcie.peerNumberMask[i]) == 1);
3065 
3066                     kbusSetupMailboxes_HAL(pGpu, pKernelBus,
3067                                            pRemoteGpu, GPU_GET_KERNEL_BUS(pRemoteGpu),
3068                                            locPeerId, remPeerId);
3069                     kbusSetupMailboxes_HAL(pRemoteGpu, GPU_GET_KERNEL_BUS(pRemoteGpu),
3070                                            pGpu, pKernelBus,
3071                                            remPeerId, locPeerId);
3072                     // Program the registers
3073                     pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
3074                     portMemSet(&params, 0, sizeof(params));
3075                     params.programPciePeerMask = NVBIT32(locPeerId);
3076                     status = pRmApi->Control(pRmApi,
3077                                              pGpu->hInternalClient,
3078                                              pGpu->hInternalSubdevice,
3079                                              NV2080_CTRL_CMD_INTERNAL_HSHUB_PEER_CONN_CONFIG,
3080                                              &params,
3081                                              sizeof(params));
3082                     if (status != NV_OK)
3083                     {
3084                         NV_PRINTF(LEVEL_ERROR, "Error in programming the local PEER_CONNECTION_CFG registers\n");
3085                     }
3086                     pRmApi = GPU_GET_PHYSICAL_RMAPI(pRemoteGpu);
3087                     portMemSet(&params, 0, sizeof(params));
3088                     params.programPciePeerMask = NVBIT32(remPeerId);
3089                     status = pRmApi->Control(pRmApi,
3090                                              pRemoteGpu->hInternalClient,
3091                                              pRemoteGpu->hInternalSubdevice,
3092                                              NV2080_CTRL_CMD_INTERNAL_HSHUB_PEER_CONN_CONFIG,
3093                                              &params,
3094                                              sizeof(params));
3095                     if (status != NV_OK)
3096                     {
3097                         NV_PRINTF(LEVEL_ERROR, "Error in programming the remote PEER_CONNECTION_CFG registers\n");
3098                     }
3099                 }
3100             }
3101         }
3102     }
3103 }
3104 
3105 static NV_STATUS
3106 kbusSendMemsysDisableNvlinkPeers
3107 (
3108     OBJGPU    *pGpu
3109 )
3110 {
3111     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
3112 
3113     if (API_GPU_IN_RESET_SANITY_CHECK(pGpu))
3114         return NV_OK;
3115 
3116     return pRmApi->Control(pRmApi, pGpu->hInternalClient, pGpu->hInternalSubdevice,
3117                            NV2080_CTRL_CMD_INTERNAL_MEMSYS_DISABLE_NVLINK_PEERS,
3118                            NULL, 0);
3119 }
3120 
3121 //
3122 // Unlink P2P for all GPUs
3123 //
3124 void
3125 kbusUnlinkP2P_GM107
3126 (
3127     OBJGPU    *pGpu,
3128     KernelBus *pKernelBus
3129 )
3130 {
3131     KernelBus *pRemoteKernelBus;
3132     OBJGPU *pRemoteGpu;
3133     NvU32 i;
3134 
3135     for ( i = 0; i < NV_MAX_DEVICES; ++i)
3136     {
3137         if ((pKernelBus->p2pPcie.peerNumberMask[i] != 0) ||
3138             (kbusGetNvlinkPeerNumberMask_HAL(pGpu, pKernelBus, i) != 0))
3139         {
3140             pRemoteGpu = gpumgrGetGpu(i);
3141             if (pRemoteGpu == NULL)
3142             {
3143                 //
3144                 // There is a P2P mapping involving an unloaded GPU
3145                 // Has NV50_P2P been properly freed ?
3146                 //
3147                 NV_PRINTF(LEVEL_ERROR, "There is a P2P mapping involving an unloaded GPU\n");
3148                 continue;
3149             }
3150 
3151             pRemoteKernelBus = GPU_GET_KERNEL_BUS(pRemoteGpu);
3152 
3153             if (gpuIsGpuFullPower(pRemoteGpu) &&
3154                 kbusIsP2pInitialized(pRemoteKernelBus))
3155             {
3156                 //
3157                 // NVLINK mappings are static and cannot be torn down, but make
3158                 // sure we tear down any PCIe P2P mappings created.
3159                 //
3160                 if (pKernelBus->p2pPcie.peerNumberMask[i] != 0)
3161                 {
3162                     NvU32 locPeerId = kbusGetPeerId_HAL(pGpu, pKernelBus, pRemoteGpu);
3163                     NvU32 remPeerId = kbusGetPeerId_HAL(pRemoteGpu, pRemoteKernelBus, pGpu);
3164 
3165                     // We only expect one PCIE peer ID per remote GPU for SLI
3166                     NV_ASSERT(nvPopCount32(pKernelBus->p2pPcie.peerNumberMask[i]) == 1);
3167 
3168                     NV_ASSERT(locPeerId < P2P_MAX_NUM_PEERS);
3169                     NV_ASSERT(remPeerId < P2P_MAX_NUM_PEERS);
3170                     NV_ASSERT(pKernelBus->p2pPcie.busPeer[locPeerId].remotePeerId == remPeerId);
3171 
3172                     kbusDestroyMailbox(pGpu, pKernelBus, pRemoteGpu, locPeerId);
3173                     kbusDestroyMailbox(pRemoteGpu, pRemoteKernelBus, pGpu, remPeerId);
3174                 }
3175 
3176                 //
3177                 // Instead just disable the NVLINK peers
3178                 //
3179                 NV_ASSERT_OK(kbusSendMemsysDisableNvlinkPeers(pGpu));
3180                 NV_ASSERT_OK(kbusSendMemsysDisableNvlinkPeers(pRemoteGpu));
3181             }
3182         }
3183     }
3184 }
3185 
3186 /*!
3187  * @brief  Calculates the memory needed for allocating BAR2 Page Tables.
3188  *
3189  * Size calculation is optimized  for @ref GMMU_FMT_VER_1 due to
3190  * large % overhead of full Page Table size over the size
3191  * actually needed for BAR2. UVM replayable fault buffer size is
3192  * also accomodated in this calculation.
3193  *
3194  * @return Size in Bytes, needed for BAR2 Page Tables.
3195  */
3196 NvU32
3197 kbusGetSizeOfBar2PageTables_GM107
3198 (
3199     OBJGPU    *pGpu,
3200     KernelBus *pKernelBus
3201 )
3202 {
3203     KernelGmmu          *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
3204     const GMMU_FMT      *pFmt  = NULL;
3205     NvU64                vaLimit;
3206     NvU32                numPgTblsCeil;
3207     NvU32                numPgTblsFloor;
3208     NvU32                pgTblSize;
3209     NvU32                numEntries;
3210     NvU64                vaPerEntry;
3211     const MMU_FMT_LEVEL *pPgTbl = NULL;
3212     NvU32                gfid;
3213     NvU32                cpuVisibleApertureSize = 0;
3214     NvU32                cpuInisibleApertureSize = 0;
3215 
3216     NV_ASSERT_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid) == NV_OK, 0);
3217 
3218     // Return 0 from the guest in the paravirtualization case.
3219     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu))
3220     {
3221         return 0;
3222     }
3223 
3224     // Get the @ref GMMU_FMT for this chip
3225     pFmt = kgmmuFmtGet(pKernelGmmu, GMMU_FMT_VERSION_DEFAULT, 0);
3226     NV_ASSERT_OR_RETURN(NULL != pFmt, 0);
3227 
3228     // Get 4K page size Page Table
3229     pPgTbl = mmuFmtFindLevelWithPageShift(pFmt->pRoot, RM_PAGE_SHIFT);
3230 
3231     if (pKernelBus->bar2[gfid].cpuVisibleLimit != 0)
3232         cpuVisibleApertureSize  = pKernelBus->bar2[gfid].cpuVisibleLimit - pKernelBus->bar2[gfid].cpuVisibleBase + 1;
3233     if (pKernelBus->bar2[gfid].cpuInvisibleLimit != 0)
3234         cpuInisibleApertureSize = pKernelBus->bar2[gfid].cpuInvisibleLimit - pKernelBus->bar2[gfid].cpuInvisibleBase + 1;
3235 
3236     vaLimit = cpuVisibleApertureSize + cpuInisibleApertureSize;
3237 
3238 
3239     numPgTblsCeil  = (NvU32)(NV_CEIL(vaLimit, NVBIT64(pPgTbl->virtAddrBitHi + 1)));
3240     numPgTblsFloor = (NvU32)vaLimit / NVBIT64(pPgTbl->virtAddrBitHi + 1);
3241 
3242     //
3243     // Let's optimize the space caculation on GMMU_FMT_VER_1
3244     // if the Page Table is not fully used.
3245     //
3246     if (0 == numPgTblsFloor)
3247     {
3248         vaPerEntry = mmuFmtEntryVirtAddrMask(pPgTbl) + 1;
3249         numEntries = (NvU32)(NV_CEIL(vaLimit, vaPerEntry));
3250         pgTblSize  = numEntries * pPgTbl->entrySize;
3251         pKernelBus->bar2[gfid].pageTblSize = pgTblSize;
3252     }
3253     else
3254     {
3255         pKernelBus->bar2[gfid].pageTblSize = mmuFmtLevelSize(pPgTbl);
3256         pgTblSize = numPgTblsCeil * pKernelBus->bar2[gfid].pageTblSize;
3257     }
3258 
3259     pKernelBus->bar2[gfid].numPageTbls = numPgTblsCeil;
3260 
3261     return pgTblSize;
3262 }
3263 
3264 void
3265 kbusStateDestroy_GM107
3266 (
3267     OBJGPU    *pGpu,
3268     KernelBus *pKernelBus
3269 )
3270 {
3271     KernelBif           *pKernelBif     = GPU_GET_KERNEL_BIF(pGpu);
3272     MemoryManager       *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
3273     NvU64                offsetBar0;
3274 
3275     (void)kbusDestroyBar2_HAL(pGpu, pKernelBus, GPU_GFID_PF);
3276 
3277     // Bind the BAR0 window to its default location
3278     // note: we can't move the window for all intents and purposes since VBIOS
3279     //       will also use the window at arbitrary locations (eg during an SMI event
3280     if (pMemoryManager->Ram.fbAddrSpaceSizeMb)
3281     {
3282         offsetBar0 = (pMemoryManager->Ram.fbAddrSpaceSizeMb << 20) - DRF_SIZE(NV_PRAMIN);
3283         (void)kbusSetBAR0WindowVidOffset_HAL(pGpu, pKernelBus, offsetBar0);
3284     }
3285 
3286     // Unmap BAR0 Writecombined Window
3287     if(pKernelBus->pWriteCombinedBar0Window != NULL)
3288     {
3289         osUnmapPciMemoryKernelOld(pGpu, (void*)pKernelBus->pWriteCombinedBar0Window);
3290         pKernelBus->pWriteCombinedBar0Window = NULL;
3291         pKernelBus->pDefaultBar0Pointer = pKernelBus->pUncachedBar0Window;
3292     }
3293 
3294     NV_PRINTF(LEVEL_INFO, "FLA Supported: %x \n", kbusIsFlaSupported(pKernelBus));
3295 
3296     // clean up FLA here
3297     // if FLA supported & enabled FLA VAS
3298     if (IS_VIRTUAL(pGpu) && kbusIsFlaSupported(pKernelBus))
3299     {
3300         NV_PRINTF(LEVEL_INFO, "Trying to destroy FLA VAS\n");
3301         kbusDestroyFla_HAL(pGpu, pKernelBus);
3302     }
3303     //
3304     // clean up private info block
3305     //
3306 
3307     if ((pKernelBif != NULL) && ((!pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_READS_DISABLED) ||
3308                                   !pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_P2P_WRITES_DISABLED)) &&
3309                                  (kbusIsP2pInitialized(pKernelBus))))
3310     {
3311         (void)_kbusDestroyP2P_GM107(pGpu, pKernelBus);
3312     }
3313 }
3314 
3315 //
3316 //
3317 // Tests BAR2 against BAR0.
3318 // If memDescIn is NULL, a test mem desc is created and map/unmapped.
3319 // If memDescIn is not NULL and provided, this method assumes that it has
3320 // already been alloc'ed and mapping/unmapping is handled outside
3321 // this method.
3322 //
3323 NV_STATUS
3324 kbusVerifyBar2_GM107
3325 (
3326     OBJGPU      *pGpu,
3327     KernelBus   *pKernelBus,
3328     PMEMORY_DESCRIPTOR pMemDescIn,
3329     NvU8        *pCpuPtrIn,
3330     NvU64        offset,
3331     NvU64        size
3332 )
3333 {
3334     MEMORY_DESCRIPTOR memDesc, *pMemDesc = NULL;
3335     NvU8             *pOffset          = NULL;
3336     NvU32             index            = 0;
3337     NvU64             bar0Window       = 0;
3338     NvU64             testMemoryOffset = 0;
3339     NvU32             testMemorySize   = 0;
3340     NV_STATUS         status           = NV_OK;
3341     NvU32             testData         = 0;
3342     NvU32             temp             = 0;
3343     NV_ADDRESS_SPACE  testAddrSpace    = ADDR_FBMEM;
3344     NV_ADDRESS_SPACE  oldAddrSpace     = ADDR_FBMEM;
3345     NvBool            bIsStandaloneTest;
3346     const NvU32       SAMPLEDATA       = 0xabcdabcd;
3347     const NvU32       FBSIZETESTED     = 0x10;
3348     NvU64             bar0TestAddr     = 0;
3349     KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
3350     NvU32             flagsClean       = 0;
3351 
3352     //
3353     // kbusVerifyBar2 will test BAR0 against sysmem on Tegra; otherwise skip
3354     // the test if inst_in_sys is used
3355     //
3356     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_ALL_INST_IN_SYSMEM) &&
3357         !IsTEGRA(pGpu))
3358     {
3359         return NV_OK;
3360     }
3361 
3362     // In L2 Cache only mode or FB broken, don't verify Bar2
3363     if (gpuIsCacheOnlyModeEnabled(pGpu) ||
3364         pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) ||
3365         kbusIsBar2TestSkipped(pKernelBus))
3366     {
3367         return NV_OK;
3368     }
3369 
3370     NV_PRINTF(LEVEL_INFO, "\n");
3371 
3372     flagsClean = NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_FLAGS_ALL |
3373                  NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_FLAGS_CLEAN;
3374     if (kmemsysIsL2CleanFbPull(pKernelMemorySystem))
3375     {
3376         flagsClean |= NV2080_CTRL_INTERNAL_MEMSYS_L2_INVALIDATE_EVICT_FLAGS_WAIT_FB_PULL;
3377     }
3378 
3379     if (pMemDescIn && pCpuPtrIn)
3380     {
3381         if ((size + offset) > pMemDescIn->Size)
3382         {
3383             NV_PRINTF(LEVEL_ERROR,
3384                       "input offset 0x%llx size 0x%llx exceeds surface size 0x%llx\n",
3385                       offset, size, pMemDescIn->Size);
3386             DBG_BREAKPOINT();
3387             return NV_ERR_INVALID_ARGUMENT;
3388         }
3389         bIsStandaloneTest = NV_FALSE;
3390         pOffset = pCpuPtrIn;
3391         pMemDesc = pMemDescIn;
3392     }
3393     else
3394     {
3395         offset = 0;
3396         size = FBSIZETESTED;
3397         // Allocate some memory to test virtual BAR2 with
3398         if (pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_ALL_INST_IN_SYSMEM))
3399         {
3400             memdescCreateExisting(&memDesc, pGpu, size, ADDR_SYSMEM, pGpu->instCacheOverride, MEMDESC_FLAGS_NONE);
3401         }
3402         else
3403         {
3404             memdescCreateExisting(&memDesc, pGpu, size, ADDR_FBMEM, NV_MEMORY_UNCACHED, MEMDESC_FLAGS_NONE);
3405         }
3406         status = memdescAlloc(&memDesc);
3407         if (status != NV_OK)
3408         {
3409             NV_PRINTF(LEVEL_ERROR,
3410                       "Could not allocate vidmem to test bar2 with\n");
3411             DBG_BREAKPOINT();
3412             return NV_ERR_INSUFFICIENT_RESOURCES;
3413         }
3414 
3415         bIsStandaloneTest = NV_TRUE;
3416         pOffset = kbusMapRmAperture_HAL(pGpu, &memDesc);
3417         if (pOffset == NULL)
3418         {
3419             status = NV_ERR_INSUFFICIENT_RESOURCES;
3420             goto kbusVerifyBar2_failed;
3421         }
3422         pMemDesc = &memDesc;
3423     }
3424     testMemoryOffset = memdescGetPhysAddr(pMemDesc, AT_GPU, 0) + offset;
3425     testMemorySize   = NvU64_LO32(size);
3426     testAddrSpace    = kgmmuGetHwPteApertureFromMemdesc(GPU_GET_KERNEL_GMMU(pGpu), pMemDesc);
3427 
3428     // ==========================================================
3429     // Does the BAR0 window work?
3430 
3431     NV_PRINTF_COND(IS_EMULATION(pGpu), LEVEL_NOTICE, LEVEL_INFO, "Testing BAR0 window...\n");
3432 
3433     bar0Window = kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus);
3434     oldAddrSpace = DRF_VAL( _PBUS, _BAR0_WINDOW, _TARGET, GPU_REG_RD32(pGpu, NV_PBUS_BAR0_WINDOW));
3435     bar0TestAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, 0);
3436     GPU_FLD_WR_DRF_NUM(pGpu, _PBUS, _BAR0_WINDOW, _BASE, NvU64_LO32(bar0TestAddr >> 16));
3437     GPU_FLD_WR_DRF_NUM(pGpu, _PBUS, _BAR0_WINDOW, _TARGET, testAddrSpace);
3438     testData = GPU_REG_RD32(pGpu, DRF_BASE(NV_PRAMIN) + NvU64_LO32(bar0TestAddr & 0xffff));
3439 
3440     GPU_REG_WR32(pGpu, DRF_BASE(NV_PRAMIN) + NvU64_LO32(bar0TestAddr & 0xffff), SAMPLEDATA);
3441 
3442     if (GPU_REG_RD32(pGpu, DRF_BASE(NV_PRAMIN) + NvU64_LO32(bar0TestAddr & 0xffff)) != SAMPLEDATA)
3443     {
3444         //
3445         // Ideally, this should hit the L2 cache and even if memory is bad,
3446         // unless something in the path up to L2 is messed up, we should not
3447         // get here.
3448         //
3449         NV_PRINTF(LEVEL_ERROR,
3450             "Pre-L2 invalidate evict: Address 0x%llx programmed through the bar0 "
3451             "window with value 0x%x did not read back the last write.\n",
3452             bar0TestAddr, SAMPLEDATA);
3453         DBG_BREAKPOINT_REASON(NV_ERR_MEMORY_ERROR);
3454         status = NV_ERR_MEMORY_ERROR;
3455         goto kbusVerifyBar2_failed;
3456     }
3457 
3458     //
3459     // Evict L2 to ensure that the next read doesn't hit L2 and mistakenly
3460     // assume that the BAR0 window to vidmem works
3461     //
3462     status = kmemsysSendL2InvalidateEvict(pGpu, pKernelMemorySystem, flagsClean);
3463     if (NV_OK != status)
3464     {
3465         NV_PRINTF(LEVEL_ERROR, "L2 evict failed\n");
3466         goto kbusVerifyBar2_failed;
3467     }
3468 
3469     if (GPU_REG_RD32(pGpu, DRF_BASE(NV_PRAMIN) + NvU64_LO32(bar0TestAddr & 0xffff)) != SAMPLEDATA)
3470     {
3471         NV_PRINTF(LEVEL_ERROR,
3472             "Post-L2 invalidate evict: Address 0x%llx programmed through the bar0 "
3473             "window with value 0x%x did not read back the last write\n",
3474             bar0TestAddr, SAMPLEDATA);
3475         if (IS_EMULATION(pGpu))
3476         {
3477             NV_PRINTF(LEVEL_ERROR,
3478                       "Setup a trigger on write<Bar0+0x1700, 0x40> with a 3 quarters post "
3479                       "trigger capture\n");
3480             NV_PRINTF(LEVEL_ERROR,
3481                       "and search for the last bar0 window write not returning the same value"
3482                       " in a subsequent read\n");
3483         }
3484         DBG_BREAKPOINT_REASON(NV_ERR_MEMORY_ERROR);
3485         status = NV_ERR_MEMORY_ERROR;
3486         goto kbusVerifyBar2_failed;
3487     }
3488 
3489     NV_PRINTF_COND(IS_EMULATION(pGpu), LEVEL_NOTICE, LEVEL_INFO, "Bar0 window tests successfully\n");
3490     GPU_REG_WR32(pGpu, DRF_BASE(NV_PRAMIN) + NvU64_LO32(bar0TestAddr & 0xffff), testData);
3491     GPU_FLD_WR_DRF_NUM(pGpu, _PBUS, _BAR0_WINDOW, _BASE, NvU64_LO32(bar0Window >> 16));
3492     GPU_FLD_WR_DRF_NUM(pGpu, _PBUS, _BAR0_WINDOW, _TARGET, oldAddrSpace);
3493 
3494     if ((testAddrSpace == NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY) ||
3495         (testAddrSpace == NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY))
3496     {
3497         // Flush GPU write before proceeding to next test (otherwise it may stomp over following CPU writes)
3498         kbusFlush_HAL(pGpu, pKernelBus, BUS_FLUSH_VIDEO_MEMORY | BUS_FLUSH_USE_PCIE_READ);
3499     }
3500     // ==========================================================
3501 
3502 
3503     // ==========================================================
3504     // Does MMU's translation logic work?
3505     NV_PRINTF(LEVEL_INFO,
3506               "MMUTest Writing test data through virtual BAR2 starting at bar2 offset"
3507               " (%p - %p) = %p and of size 0x%x\n", (NvU8 *)pOffset,
3508               (NvU8 *)pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping,
3509               (NvU8 *)(pOffset - pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping),
3510               testMemorySize);
3511     NV_PRINTF_COND(IS_EMULATION(pGpu), LEVEL_NOTICE, LEVEL_INFO,
3512                    "MMUTest The physical address being targetted is 0x%llx\n",
3513                    testMemoryOffset);
3514     for(index = 0; index < testMemorySize; index += 4)
3515     {
3516         MEM_WR32( pOffset + index, SAMPLEDATA );
3517     }
3518     // Flush the bar2 writes
3519     // A uflush should not be required since a bar0 window read follows after this
3520     if ((testAddrSpace == NV_MMU_PTE_APERTURE_SYSTEM_COHERENT_MEMORY) ||
3521         (testAddrSpace == NV_MMU_PTE_APERTURE_SYSTEM_NON_COHERENT_MEMORY))
3522     {
3523         kbusFlush_HAL(pGpu, pKernelBus, BUS_FLUSH_VIDEO_MEMORY | BUS_FLUSH_USE_PCIE_READ);
3524     }
3525     osFlushCpuWriteCombineBuffer();
3526 
3527     status = kmemsysSendL2InvalidateEvict(pGpu, pKernelMemorySystem, flagsClean);
3528     if (NV_OK != status)
3529     {
3530         NV_PRINTF(LEVEL_ERROR, "L2 evict failed\n");
3531         goto kbusVerifyBar2_failed;
3532     }
3533 
3534     // Readback through the bar0 window
3535     bar0Window = kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus);
3536     oldAddrSpace = DRF_VAL( _PBUS, _BAR0_WINDOW, _TARGET, GPU_REG_RD32(pGpu, NV_PBUS_BAR0_WINDOW));
3537     GPU_FLD_WR_DRF_NUM(pGpu, _PBUS, _BAR0_WINDOW, _BASE, NvU64_LO32(testMemoryOffset >> 16));
3538     GPU_FLD_WR_DRF_NUM(pGpu, _PBUS, _BAR0_WINDOW, _TARGET, testAddrSpace);
3539 
3540     NV_PRINTF(LEVEL_INFO,
3541               "bar0Window = 0x%llx, testMemoryOffset = 0x%llx, testAddrSpace = %d, "
3542               "_PBUS_BAR0_WINDOW = 0x%08x\n", bar0Window, testMemoryOffset,
3543               testAddrSpace, GPU_REG_RD32(pGpu, NV_PBUS_BAR0_WINDOW));
3544 
3545     temp = (DRF_BASE(NV_PRAMIN) + (NvU32)(testMemoryOffset & 0xffff));
3546     for(index = 0; index < testMemorySize; index += 4)
3547     {
3548         NvU32 bar0WindowData = GPU_REG_RD32(pGpu, temp + index);
3549         if (bar0WindowData != SAMPLEDATA)
3550         {
3551             NV_PRINTF(LEVEL_ERROR,
3552                       "MMUTest BAR0 window offset 0x%x returned garbage 0x%x\n",
3553                       temp + index, bar0WindowData);
3554             NV_PRINTF_COND(IS_EMULATION(pGpu), LEVEL_ERROR, LEVEL_INFO,
3555                            "Setup a trigger for write<bar0 + 0x1700, 0x40> and in the waves search"
3556                            " the last few bar2 virtual writes mixed with bar0 window reads\n");
3557             DBG_BREAKPOINT_REASON(NV_ERR_MEMORY_ERROR);
3558             status = NV_ERR_MEMORY_ERROR;
3559             goto kbusVerifyBar2_failed;
3560         }
3561         // Write through the BAR0 window to be readback through BAR2 later
3562         GPU_REG_WR32(pGpu, temp + index, SAMPLEDATA + 0x10);
3563     }
3564 
3565     GPU_FLD_WR_DRF_NUM(pGpu, _PBUS, _BAR0_WINDOW, _BASE, NvU64_LO32(bar0Window >> 16));
3566     GPU_FLD_WR_DRF_NUM(pGpu, _PBUS, _BAR0_WINDOW, _TARGET, oldAddrSpace);
3567 
3568     status = kbusFlush_HAL(pGpu, pKernelBus, BUS_FLUSH_VIDEO_MEMORY | BUS_FLUSH_USE_PCIE_READ);
3569 
3570     // Bail now if we have encountered any error
3571     if (status != NV_OK)
3572     {
3573         goto kbusVerifyBar2_failed;
3574     }
3575 
3576     status = kmemsysSendL2InvalidateEvict(pGpu, pKernelMemorySystem, flagsClean);
3577     if (NV_OK != status)
3578     {
3579         goto kbusVerifyBar2_failed;
3580     }
3581 
3582     // Verify BAR2 virtual reads
3583     for(index = 0; index < testMemorySize; index +=4)
3584     {
3585         temp = MEM_RD32(pOffset + index);
3586         if (temp != (SAMPLEDATA + 0x10))
3587         {
3588             NV_PRINTF(LEVEL_ERROR,
3589                       "MMUTest BAR2 Read of virtual addr 0x%x returned garbage 0x%x\n",
3590                       (NvU32)(pOffset - pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping) + index,
3591                       temp);
3592             DBG_BREAKPOINT_REASON(NV_ERR_MEMORY_ERROR);
3593             status = NV_ERR_MEMORY_ERROR;
3594             goto kbusVerifyBar2_failed;
3595         }
3596     }
3597 
3598 kbusVerifyBar2_failed:
3599     if (bIsStandaloneTest)
3600     {
3601         if (pOffset != NULL)
3602         {
3603             kbusUnmapRmAperture_HAL(pGpu, pMemDesc, &pOffset, NV_TRUE);
3604         }
3605         memdescFree(pMemDesc);
3606         memdescDestroy(pMemDesc);
3607     }
3608 
3609     if (status == NV_OK)
3610     {
3611         NV_PRINTF_COND(IS_EMULATION(pGpu), LEVEL_NOTICE, LEVEL_INFO, "BAR2 virtual test passes\n");
3612     }
3613 
3614     return status;
3615 }
3616 
3617 /*!
3618  * @brief Inits physical address of Bar1 and Bar2 structures
3619  *
3620  * @param[in] KernelBus
3621  */
3622 NV_STATUS
3623 kbusInitBarsBaseInfo_GM107
3624 (
3625     KernelBus  *pKernelBus
3626 )
3627 {
3628     // pKernelBus->pciBars[] should be initialized before the function gets called
3629     NV_ASSERT_OR_RETURN(pKernelBus->pciBars[BUS_BAR_1] != 0, NV_ERR_INVALID_STATE);
3630     NV_ASSERT_OR_RETURN(pKernelBus->pciBars[BUS_BAR_2] != 0, NV_ERR_INVALID_STATE);
3631 
3632     pKernelBus->bar1[GPU_GFID_PF].physAddr   = pKernelBus->pciBars[BUS_BAR_1];
3633     pKernelBus->bar2[GPU_GFID_PF].physAddr   = pKernelBus->pciBars[BUS_BAR_2];
3634 
3635     return NV_OK;
3636 }
3637 
3638 /**
3639  * @brief Set BAR1/BAR2 virtual aperture size and BAR2 CPU visible limit
3640  *
3641  * @param pGpu
3642  * @param pKernelBus
3643  * @param gfid
3644  *
3645  * @return
3646  */
3647 NV_STATUS kbusSetBarsApertureSize_GM107
3648 (
3649     OBJGPU    *pGpu,
3650     KernelBus *pKernelBus,
3651     NvU32      gfid
3652 )
3653 {
3654     NvU32             data32;
3655 
3656     //
3657     // Setup BAR1 aperture size only for GFID_VF
3658     // GFID_PF is done in StateInit phase
3659     //
3660     if (IS_GFID_VF(gfid))
3661     {
3662         kbusDetermineBar1ApertureLength(pKernelBus, gfid);
3663     }
3664 
3665     //
3666     // Setup BAR2 aperture size
3667     // Check to see if a BAR2 aperture size override has been specified.
3668     //
3669     if (((NV_OK == osReadRegistryDword(pGpu, NV_REG_STR_RM_BAR2_APERTURE_SIZE_MB,
3670                         &data32))) && data32 && data32 <= BUS_BAR2_RM_APERTURE_MB)
3671     {
3672         // Set the BAR2 aperture size based on the override
3673         pKernelBus->bar2[gfid].rmApertureLimit = (data32 << 20) - 1;
3674         //
3675         // This shrinks the bar2 page table and has the side-effect of not
3676         // configuring the upper part of bar2 used for VESA access (because we
3677         // only apply override if < BUS_BAR2_RM_APERTURE_MB).
3678         //
3679         pKernelBus->bar2[gfid].cpuVisibleLimit  = (data32 << 20) - 1;
3680     }
3681     else
3682     {
3683         //
3684         // For simulation mods we limit BAR2 size to decrease PTE init time.
3685         // Backdoor fmodel/RTL could use the standard settings, but want to
3686         // keep the code path the same for emulation.  With a 8MB BAR2 we do
3687         // not expect instance memory to evict a cached mapping.
3688         //
3689         if ((IS_SIM_MODS(GPU_GET_OS(pGpu)) && IS_SILICON(pGpu) == 0) || (!RMCFG_FEATURE_MODS_FEATURES && IS_SIMULATION(pGpu)))
3690         {
3691                 pKernelBus->bar2[gfid].rmApertureLimit = ((BUS_BAR2_RM_APERTURE_MB >> 1) << 20) - 1;  // 8MB
3692             pKernelBus->bar2[gfid].cpuVisibleLimit = pKernelBus->bar2[gfid].rmApertureLimit;        // No VESA space
3693         }
3694         else
3695         {
3696             pKernelBus->bar2[gfid].cpuVisibleLimit = (BUS_BAR2_APERTURE_MB << 20) - 1;
3697             pKernelBus->bar2[gfid].rmApertureLimit = (BUS_BAR2_RM_APERTURE_MB << 20) - 1;
3698         }
3699     }
3700 
3701     return NV_OK;
3702 }
3703 
3704 /*!
3705  * @brief Calculates the memory needed for allocating a BAR2 Page Dir for a given VA range
3706  *
3707  * @param[in] vaPerEntry  The VA span of one entry within the Page Dir
3708  *                        whose size is needed.
3709  * @param[in] entrySize   The size of one PDE within the Page Dir of interest.
3710  *
3711  * @return RM_PAGE_SIZE aligned size in Bytes, needed for the BAR2 Page Dir.
3712  */
3713 static NvU32 _kbusGetSizeOfBar2PageDir_GM107
3714 (
3715     NvU64                vaBase,
3716     NvU64                vaLimit,
3717     NvU64                vaPerEntry,
3718     NvU32                entrySize
3719 )
3720 {
3721     NvU32              size;
3722     NvU32              numEntries;
3723     NvU64              vaBaseAligned;
3724 
3725     NV_ASSERT_OR_RETURN(0 != entrySize, 0);
3726     NV_ASSERT_OR_RETURN(0 != vaPerEntry, 0);
3727 
3728     //
3729     // Calculate number of entries needed within this level to represent
3730     // the entire BAR2 aperture VA range, then align to 4K
3731     //
3732     vaBaseAligned = vaBase & ~(vaPerEntry - 1);
3733     numEntries = (NvU32)NV_CEIL(vaLimit - vaBaseAligned, vaPerEntry);
3734     size       = numEntries * entrySize;
3735     size       = NV_ROUNDUP(size, RM_PAGE_SIZE);
3736 
3737     return size;
3738 }
3739 
3740 /*!
3741  * @brief Calculates the memory needed for allocating BAR2 Page Dirs
3742  *
3743  * Size calculation considers all Page Levels defined in @ref GMMU_FMT.
3744  * Assumes Cpu visible region always starts before the invisible region.
3745  *
3746  * @return RM_PAGE_SIZE aligned size in Bytes, needed for all BAR2 Page Dirs.
3747  */
3748 NvU32 kbusGetSizeOfBar2PageDirs_GM107
3749 (
3750     OBJGPU    *pGpu,
3751     KernelBus *pKernelBus
3752 )
3753 {
3754     KernelGmmu          *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
3755     const GMMU_FMT      *pFmt   = NULL;
3756     NvU32                size   = 0;
3757     const MMU_FMT_LEVEL *pLevel = NULL;
3758     NvU64                bar2VaLimit = kbusGetVaLimitForBar2_HAL(pGpu, pKernelBus);
3759     NvU16                i;
3760     NvU32                gfid;
3761     NvBool               bContiguous;
3762 
3763     NV_ASSERT_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid) == NV_OK, 0);
3764 
3765     // Return 0 from the guest in the paravirtualization case.
3766     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu))
3767     {
3768         return 0;
3769     }
3770 
3771     // Get the @ref GMMU_FMT for this chip
3772     pFmt = kgmmuFmtGet(pKernelGmmu, GMMU_FMT_VERSION_DEFAULT, 0);
3773     NV_ASSERT_OR_RETURN(NULL != pFmt, 0);
3774     pLevel = pFmt->pRoot;
3775 
3776     // Cache the size of the root Page Dir, once.
3777     pKernelBus->bar2[gfid].pageDirSize = _kbusGetSizeOfBar2PageDir_GM107(pKernelBus->bar2[gfid].cpuVisibleBase,
3778                                                                          bar2VaLimit,
3779                                                                          mmuFmtEntryVirtAddrMask(pLevel) + 1,
3780                                                                          pLevel->entrySize);
3781 
3782     // Accumulate size for all Page Directories.
3783     pKernelBus->bar2[gfid].numPageDirs = 0;
3784     bContiguous = (pKernelBus->bar2[gfid].cpuVisibleLimit + 1 == pKernelBus->bar2[gfid].cpuInvisibleBase) ||
3785                    pKernelBus->bar2[gfid].cpuInvisibleLimit == 0;
3786 
3787     for (i = 0; (i < GMMU_FMT_MAX_LEVELS - 1); i++)
3788     {
3789         NvU32 levelSize = 0;
3790         NvU64 vaPerEntry = mmuFmtEntryVirtAddrMask(pLevel) + 1;
3791 
3792         if (!bContiguous)
3793         {
3794             //
3795             // Avoid double reserving size for page dir when visible and invisible bar2 share the same page directory
3796             // In this case we treat them as contiguous.
3797             //
3798             if ((pKernelBus->bar2[gfid].cpuVisibleLimit  & ~(vaPerEntry - 1)) ==
3799                 (pKernelBus->bar2[gfid].cpuInvisibleBase & ~(vaPerEntry - 1)))
3800             {
3801                 levelSize += _kbusGetSizeOfBar2PageDir_GM107(pKernelBus->bar2[gfid].cpuVisibleBase,
3802                                                              bar2VaLimit,
3803                                                              vaPerEntry,
3804                                                              pLevel->entrySize);
3805             }
3806             else
3807             {
3808                 levelSize += _kbusGetSizeOfBar2PageDir_GM107(pKernelBus->bar2[gfid].cpuInvisibleBase,
3809                                                              pKernelBus->bar2[gfid].cpuInvisibleLimit,
3810                                                              vaPerEntry,
3811                                                              pLevel->entrySize);
3812 
3813                 levelSize += _kbusGetSizeOfBar2PageDir_GM107(pKernelBus->bar2[gfid].cpuVisibleBase,
3814                                                              pKernelBus->bar2[gfid].cpuVisibleLimit,
3815                                                              vaPerEntry,
3816                                                              pLevel->entrySize);
3817             }
3818         }
3819         else
3820         {
3821             levelSize = _kbusGetSizeOfBar2PageDir_GM107(pKernelBus->bar2[gfid].cpuVisibleBase,
3822                                                         bar2VaLimit,
3823                                                         vaPerEntry,
3824                                                         pLevel->entrySize);
3825         }
3826 
3827         // Get the number of directories we need to initialize from the level size.
3828         pKernelBus->bar2[gfid].numPageDirs += levelSize >> RM_PAGE_SHIFT;
3829         size += levelSize;
3830 
3831         // If there's one sublevel choose that.
3832         if (1 == pLevel->numSubLevels)
3833         {
3834             pLevel = &(pLevel->subLevels[0]);
3835         }
3836         else
3837         {
3838             // Choose the 4K page size sublevel.
3839             pLevel = &(pLevel->subLevels[1]);
3840         }
3841         NV_ASSERT_OR_RETURN(NULL != pLevel, 0);
3842 
3843         // Stop accumulating size if we've exhausted all Page Dirs.
3844         if (pLevel->bPageTable && (0 == pLevel->numSubLevels))
3845         {
3846             break;
3847         }
3848     }
3849 
3850     return size;
3851 }
3852 
3853 /*!
3854  * @brief Tunnel bar2 accesses through bar0 window.
3855  *
3856  * This routine is used to re-direct the bar2 accesses which were mapped as
3857  * type BUSBARMAP_TYPE_BAR through the bar0 window. This is a callback
3858  * routine called by osMem[Rd|Wr]*, portMemSet and portMemCopy routines when they
3859  * detect an address is in the bar2 range.
3860  *
3861  *  @param[in]      *pPrivData - Void pointer to callback-user-defined data.
3862  *                               For the purpose here pPrivData just contains
3863  *                               a pointer to pGpu
3864  *  @param[in]       addr      - The address to be tunneled.
3865  *  @param[in/out]  *pData     - Pointer to the data to be read/written.
3866  *  @param[in]       size      - Size of the data to be read/written.
3867  *  @param[in]       bRead     - Read/Write indicator.
3868  *
3869  *  @returns         NV_OK     - if tunneling is successful.
3870  *                   NV_ERR_INVALID_ARGUMENT if the addr argument is not valid
3871  */
3872 static NV_STATUS
3873 _kbusBar0TunnelCb_GM107
3874 (
3875     void           *pPrivData,
3876     NvU64           addr,
3877     void           *pData,
3878     NvU64           size,
3879     NvBool          bRead
3880 )
3881 {
3882     OBJGPU     *pGpu     = reinterpretCast(pPrivData, OBJGPU *);
3883     KernelBus  *pKernelBus  = GPU_GET_KERNEL_BUS(pGpu);
3884     VirtualBar2MapListIter it;
3885     NvU32       offset;
3886 
3887     it = listIterAll(&pKernelBus->virtualBar2[GPU_GFID_PF].usedMapList);
3888     while (listIterNext(&it))
3889     {
3890         VirtualBar2MapEntry *pMap = it.pValue;
3891 
3892         // Check if there is a valid mapping for the address passed-in
3893         if (addr >= (NvU64)((NvUPtr)pMap->pRtnPtr) &&
3894            (addr + size - 1) < ((NvU64)((NvUPtr)pMap->pRtnPtr) + pMap->pMemDesc->Size))
3895         {
3896             // Re-direct the access through bar0 window
3897             offset = (NvU32)(addr - (NvU64)((NvUPtr)pMap->pRtnPtr));
3898             return kbusMemAccessBar0Window_HAL(
3899                 pGpu,
3900                 pKernelBus,
3901                 memdescGetPhysAddr(pMap->pMemDesc, FORCE_VMMU_TRANSLATION(pMap->pMemDesc, AT_GPU), offset),
3902                 pData,
3903                 size,
3904                 bRead,
3905                 memdescGetAddressSpace(pMap->pMemDesc));
3906         }
3907     }
3908 
3909     return NV_ERR_INVALID_ARGUMENT;
3910 }
3911 
3912 NvU64
3913 kbusGetBAR0WindowAddress_GM107
3914 (
3915     KernelBus *pKernelBus
3916 )
3917 {
3918     return NV_PRAMIN_DATA008(0);
3919 }
3920 
3921 
3922  /*!
3923  * @brief Returns the first available peer Id
3924  *
3925  * @param[in] pGpu
3926  * @param[in] pKernelBus
3927  *
3928  * @returns NvU32 first free peer Id
3929  */
3930 NvU32
3931 kbusGetUnusedPeerId_GM107
3932 (
3933     OBJGPU    *pGpu,
3934     KernelBus *pKernelBus
3935 )
3936 {
3937     NvU32 peerId;
3938 
3939     for (peerId = 0; peerId < pKernelBus->numPeers; peerId++)
3940     {
3941         if ((pKernelBus->p2pPcie.busPeer[peerId].refCount == 0) &&
3942             (!pKernelBus->p2pPcie.busPeer[peerId].bReserved))
3943         {
3944             return peerId;
3945         }
3946     }
3947 
3948     return BUS_INVALID_PEER;
3949 }
3950 
3951 /*!
3952  * @brief Returns the first available PCIE peer Id
3953  *
3954  * @param[in] pGpu
3955  * @param[in] pKernelBus
3956  *
3957  * @returns NvU32 first free peer Id
3958  */
3959 NvU32
3960 kbusGetUnusedPciePeerId_GM107
3961 (
3962     OBJGPU* pGpu,
3963     KernelBus* pKernelBus
3964 )
3965 {
3966     return kbusGetUnusedPeerId_HAL(pGpu, pKernelBus);
3967 }
3968 
3969 
3970  /*!
3971  * @brief Returns the peer number from pGpu (Local) to pGpuPeer
3972  *
3973  * @param[in] pGpu          Local
3974  * @param[in] pKernelBus    Local
3975  * @param[in] pGpuPeer      Remote
3976  *
3977  * @returns NvU32 bus peer number
3978  */
3979 NvU32
3980 kbusGetPeerId_GM107
3981 (
3982     OBJGPU    *pGpu,
3983     KernelBus *pKernelBus,
3984     OBJGPU    *pGpuPeer
3985 )
3986 {
3987     NvU32 gpuPeerInst = gpuGetInstance(pGpuPeer);
3988     NvU32 peerId;
3989 
3990     if (pKernelBus->p2pPcie.peerNumberMask[gpuPeerInst] == 0)
3991     {
3992         return BUS_INVALID_PEER;
3993     }
3994 
3995     peerId = pKernelBus->p2pPcie.peerNumberMask[gpuPeerInst];
3996     LOWESTBITIDX_32(peerId);
3997 
3998     return peerId;
3999 }
4000 
4001 /*!
4002  * @brief Returns whether or not the given peerId is valid for the given GPU.
4003  *
4004  * @returns NV_OK if the peerId corresponds to an active peer mapping
4005  *          NV_ERR_INVALID_INDEX otherwise
4006  */
4007 NV_STATUS
4008 kbusIsPeerIdValid_GM107
4009 (
4010     OBJGPU    *pGpu,
4011     KernelBus *pKernelBus,
4012     NvU32      peerId
4013 )
4014 {
4015     NV_ASSERT_OR_RETURN(peerId < P2P_MAX_NUM_PEERS, NV_ERR_INVALID_INDEX);
4016     if (pKernelBus->p2pPcie.peerNumberMask[gpuGetInstance(pGpu)] & NVBIT(peerId))
4017         return NV_OK;
4018     return NV_ERR_INVALID_INDEX;
4019 }
4020 
4021 /*!
4022 * @brief Gets the BAR2 GMMU walker object
4023 *
4024 * @param[in] pKernelBus
4025 *
4026 * @returns MMU_WALK *  Pointer to BAR2 MMU walker
4027 */
4028 MMU_WALK *
4029 kbusGetBar2GmmuWalker_GM107
4030 (
4031     KernelBus *pKernelBus
4032 )
4033 {
4034     OBJGPU*   pGpu = ENG_GET_GPU(pKernelBus);
4035     NvU32     gfid;
4036 
4037     NV_ASSERT_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid) == NV_OK, NULL);
4038 
4039     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
4040         (IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))
4041     {
4042         return NULL;
4043     }
4044     return pKernelBus->bar2[gfid].pWalk;
4045 }
4046 
4047 /*!
4048 * @brief Gets the BAR2 GMMU format descriptor
4049 *
4050 * @param[in] pKernelBus
4051 *
4052 * @returns const GMMU_FMT *   Pointer to BAR2 GMMU format
4053 */
4054 const GMMU_FMT *
4055 kbusGetBar2GmmuFmt_GM107
4056 (
4057     KernelBus *pKernelBus
4058 )
4059 {
4060     OBJGPU*   pGpu = ENG_GET_GPU(pKernelBus);
4061     NvU32     gfid;
4062 
4063     NV_ASSERT_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid) == NV_OK, NULL);
4064 
4065     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
4066         (IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))
4067     {
4068         return NULL;
4069     }
4070     return pKernelBus->bar2[gfid].pFmt;
4071 }
4072 
4073 /*!
4074  *  brief Returns the peer ID corresponding to the peer indexes
4075  *        from the peer ID table
4076  *
4077  * @param[in]  pGpu
4078  * @param[in]  pKernelBus
4079  * @param[in]  locPeerIdx Local peer Index
4080  * @param[in]  remPeerIdx Remote peer Index
4081  *
4082  * return NvU32 peerID from the table using given peer indexes
4083  */
4084 NvU32
4085 kbusGetPeerIdFromTable_GM107
4086 (
4087     OBJGPU    *pGpu,
4088     KernelBus *pKernelBus,
4089     NvU32      locPeerIdx,
4090     NvU32      remPeerIdx
4091 )
4092 {
4093     if (locPeerIdx >= P2P_MAX_NUM_PEERS ||
4094         remPeerIdx >= P2P_MAX_NUM_PEERS)
4095     {
4096         NV_PRINTF(LEVEL_ERROR,
4097                   "Peer number table doesn't support >%u GPUs\n",
4098                   P2P_MAX_NUM_PEERS);
4099 
4100         return BUS_INVALID_PEER;
4101     }
4102 
4103     return peerNumberTable_GM107[locPeerIdx][remPeerIdx];
4104 }
4105 
4106 //
4107 // Description: This function fills in an object array describing
4108 // the offsets to and addresses in the PCI bus.
4109 //
4110 void
4111 kbusInitPciBars_GM107
4112 (
4113     KernelBus *pKernelBus
4114 )
4115 {
4116     OBJGPU *pGpu = ENG_GET_GPU(pKernelBus);
4117 
4118     pKernelBus->pciBars[0] = pGpu->busInfo.gpuPhysAddr;
4119     pKernelBus->pciBars[1] = pGpu->busInfo.gpuPhysFbAddr;
4120     pKernelBus->pciBars[2] = pGpu->busInfo.gpuPhysInstAddr;
4121 
4122     if (! IsAMODEL(pGpu))
4123     {
4124         // Classic dGPUs
4125         pKernelBus->totalPciBars = BUS_NUM_BARS;
4126         pKernelBus->pciBars[3] = pGpu->busInfo.gpuPhysIoAddr;
4127     }
4128     else
4129     {
4130         // AMODEL doesn't have IO BAR
4131         pKernelBus->totalPciBars = 3;
4132     }
4133 }
4134 
4135 NV_STATUS
4136 kbusSetBAR0WindowVidOffset_GM107
4137 (
4138     OBJGPU      *pGpu,
4139     KernelBus   *pKernelBus,
4140     NvU64        vidOffset
4141 )
4142 {
4143     NV_ASSERT( (vidOffset & 0xffff)==0 );
4144     NV_ASSERT( (vidOffset >> 16) <= DRF_MASK(NV_PBUS_BAR0_WINDOW_BASE) );
4145 
4146     // RM initialises cachedBar0WindowVidOffset with 0. Refresh its value with
4147     // current NV_PBUS_BAR0_WINDOW_BASE.
4148     if (pKernelBus->cachedBar0WindowVidOffset == 0)
4149     {
4150         pKernelBus->cachedBar0WindowVidOffset = ((NvU64) GPU_REG_RD_DRF(pGpu, _PBUS, _BAR0_WINDOW, _BASE)) << 16;
4151     }
4152 
4153     // Update only if the new offset is different from the cached value
4154     if (pKernelBus->cachedBar0WindowVidOffset != vidOffset)
4155     {
4156         NV_PRINTF(LEVEL_INFO,
4157                   "mapping BAR0_WINDOW to VID:%x'%08x\n",
4158                   NvU64_HI32(vidOffset), NvU64_LO32(vidOffset));
4159 
4160         GPU_FLD_WR_DRF_NUM(pGpu, _PBUS, _BAR0_WINDOW, _BASE, NvU64_LO32(vidOffset >> 16));
4161         GPU_FLD_WR_DRF_DEF(pGpu, _PBUS, _BAR0_WINDOW, _TARGET, _VID_MEM);
4162 
4163         pKernelBus->cachedBar0WindowVidOffset = vidOffset;
4164     }
4165 
4166     return (NV_OK);
4167 }
4168 
4169 NvU64
4170 kbusGetBAR0WindowVidOffset_GM107
4171 (
4172     OBJGPU      *pGpu,
4173     KernelBus   *pKernelBus
4174 )
4175 {
4176     NvU64 vidOffset;
4177 
4178     // RM initialises cachedBar0WindowVidOffset with 0. Refresh its value with
4179     // current NV_PBUS_BAR0_WINDOW_BASE.
4180     if (pKernelBus->cachedBar0WindowVidOffset == 0)
4181     {
4182         pKernelBus->cachedBar0WindowVidOffset = ((NvU64) GPU_REG_RD_DRF(pGpu, _PBUS, _BAR0_WINDOW, _BASE)) << 16;
4183     }
4184 
4185     vidOffset = pKernelBus->cachedBar0WindowVidOffset;
4186 
4187     return (vidOffset);
4188 }
4189 
4190 /*!
4191  * Mem read/write through the bar0 window.
4192  *
4193  * This routine is used to re-direct the bar2 accesses which were mapped as
4194  * type BUSBARMAP_TYPE_BAR through the bar0 window.
4195  *
4196  *  @param[in]       pGpu
4197  *  @param[in]       pKernelBus
4198  *  @param[in]       physAddr   - physical address of the accessed memory
4199  *  @param[in]       accessSize - Size of the data to be read/written
4200  *  @param[in]       bRead      - Read or Write flag
4201  *  @param[in]       addrSpace  - aperture of the accessed memory
4202  *  @returns         NV_STATUS
4203  */
4204 NV_STATUS
4205 kbusMemAccessBar0Window_GM107
4206 (
4207     OBJGPU                 *pGpu,
4208     KernelBus              *pKernelBus,
4209     NvU64                   physAddr,
4210     void                   *pData,
4211     NvU64                   accessSize,
4212     NvBool                  bRead,
4213     NV_ADDRESS_SPACE        addrSpace
4214 )
4215 {
4216     NvU64              bar0WindowOffset;
4217     NvU64              bar0WindowOrig;
4218     NvBool             bRestoreWindow = NV_FALSE;
4219 
4220     // The following code assumes aperture to be VID_MEM (or that vidmem/sysmem are same).
4221     NV_ASSERT(gpuIsUnifiedMemorySpaceEnabled(pGpu) || (addrSpace == ADDR_FBMEM));
4222 
4223     bar0WindowOrig   = kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus);
4224     bar0WindowOffset = physAddr - bar0WindowOrig;
4225 
4226     if (bar0WindowOffset + accessSize > DRF_SIZE(NV_PRAMIN))
4227     {
4228         kbusSetBAR0WindowVidOffset_HAL(pGpu, pKernelBus, (physAddr & ~0xffff));
4229         bar0WindowOffset = physAddr - kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus);
4230         bRestoreWindow = NV_TRUE;
4231     }
4232 
4233     if (bRead)
4234     {
4235         // Read access
4236         switch (accessSize)
4237         {
4238             case 1:
4239                 *((NvU8  *)pData) = (NvU8)GPU_REG_RD08(pGpu, NV_PRAMIN_DATA008(bar0WindowOffset));
4240                 break;
4241             case 2:
4242                 *((NvU16 *)pData) = (NvU16)GPU_REG_RD16(pGpu, NV_PRAMIN_DATA008(bar0WindowOffset));
4243                 break;
4244             case 4:
4245                 *((NvU32 *)pData) = (NvU32)GPU_REG_RD32(pGpu, NV_PRAMIN_DATA008(bar0WindowOffset));
4246                 break;
4247             case 8:
4248                 // TO DO: Create GPU_REG_RD64
4249                 *((NvU32 *)pData)     = (NvU32)GPU_REG_RD32(pGpu, NV_PRAMIN_DATA008(bar0WindowOffset));
4250                 *((NvU32 *)pData + 1) = (NvU32)GPU_REG_RD32(pGpu, NV_PRAMIN_DATA008(bar0WindowOffset + 4));
4251                 break;
4252 
4253             default:
4254                 NV_ASSERT(0);
4255                 return NV_ERR_GENERIC;
4256         }
4257     }
4258     else
4259     {
4260         // Write access
4261         switch (accessSize)
4262         {
4263             case 1:
4264                 GPU_REG_WR08(pGpu, NV_PRAMIN_DATA008(bar0WindowOffset), (NvU8)(*((NvU8 *)pData) & 0xff));
4265                 break;
4266             case 2:
4267                 GPU_REG_WR16(pGpu, NV_PRAMIN_DATA008(bar0WindowOffset), (NvU16)(*((NvU16 *)pData) & 0xffff));
4268                 break;
4269             case 4:
4270                 GPU_REG_WR32(pGpu, NV_PRAMIN_DATA008(bar0WindowOffset), *((NvU32 *)pData));
4271                 break;
4272             case 8:
4273                 // TO DO: Create GPU_REG_WR64
4274                 GPU_REG_WR32(pGpu, NV_PRAMIN_DATA008(bar0WindowOffset),     *((NvU32 *)pData));
4275                 GPU_REG_WR32(pGpu, NV_PRAMIN_DATA008(bar0WindowOffset + 4), *((NvU32 *)pData + 1));
4276                 break;
4277 
4278             default:
4279                 NV_ASSERT(0);
4280                 return NV_ERR_GENERIC;
4281         }
4282     }
4283 
4284     //
4285     // The Bar0 window will be restored after Bar2 bootstrap
4286     // so check if we can skip restoring the window to avoid
4287     // these extra register writes to adjust the WINDOW which may
4288     // cause a timeout failure on some GA10X fmodel environment tests.
4289     // By skipping the restore function here we ensure the following
4290     // Bar2 PT writes have the Bar0 window already set up.
4291     //
4292     if (bRestoreWindow && !pKernelBus->bar2[GPU_GFID_PF].bBootstrap)
4293     {
4294         NV_ASSERT_OK_OR_RETURN(kbusSetBAR0WindowVidOffset_HAL(pGpu, pKernelBus, bar0WindowOrig));
4295     }
4296 
4297     return NV_OK;
4298 }
4299 
4300 /*!
4301  * Optimized memcopy through the bar0 window.
4302  *
4303  *  @param[in]       pGpu
4304  *  @param[in]       pKernelBus
4305  *  @param[in]       physAddr   - physical address of the accessed memory
4306  *  @param[in]       pSysmem    - sysmem buffer to read from/write to
4307  *  @param[in]       size       - Size of the data to be read/written
4308  *  @param[in]       bRead      - Read into sysmem buffer or write to it
4309  *  @param[in]       addrSpace  - aperture of the accessed memory
4310  *  @returns         NV_STATUS
4311  */
4312 NV_STATUS
4313 kbusMemCopyBar0Window_GM107
4314 (
4315     OBJGPU                 *pGpu,
4316     KernelBus              *pKernelBus,
4317     RmPhysAddr              physAddr,
4318     void                   *pSysmem,
4319     NvLength                size,
4320     NvBool                  bRead
4321 )
4322 {
4323     NV_STATUS ret = NV_ERR_NOT_SUPPORTED;
4324     NvLength copied = 0;
4325     NvU8 *pSysmemBuf = pSysmem;
4326     NvU64 fbCopyOffset = physAddr;
4327     const NvLength windowSize = DRF_SIZE(NV_PRAMIN);
4328 
4329     NV_CHECK_OR_RETURN(LEVEL_INFO, size > 0, NV_OK);
4330 
4331     do
4332     {
4333         NvU64 praminFbBase = NV_ALIGN_DOWN64(fbCopyOffset, 0x10000);
4334         NvLength praminOffset = fbCopyOffset - praminFbBase;
4335         NvU8 *pPramin = ((NvU8 *)pGpu->registerAccess.gpuInstAddr) + praminOffset;
4336         NvLength copySize = NV_MIN(size - copied, windowSize - praminOffset);
4337         NvU8 *pSource = bRead ? pPramin : pSysmemBuf;
4338         NvU8 *pDest = bRead ? pSysmemBuf : pPramin;
4339 
4340         ret = kbusSetBAR0WindowVidOffset_HAL(pGpu, pKernelBus, praminFbBase);
4341         NV_ASSERT_OK(ret);
4342 
4343         // TODO: use MMIO-safe memcopy abstraction if provided
4344         portMemCopy(pDest, copySize, pSource, copySize);
4345         osSchedule();
4346 
4347         fbCopyOffset += copySize;
4348         pSysmemBuf += copySize;
4349         copied += copySize;
4350     }
4351     while (copied < size);
4352 
4353     return ret;
4354 }
4355 
4356 /*!
4357  * @brief Function to determine if the mapping can be direct mapped or BAR mapped
4358  *
4359  * @param[in]   pGpu
4360  * @param[in]   pKernelBus
4361  * @param[in]   pMemDesc    Memory Descriptor pointer
4362  * @param[in]   mapFlags    Flags used for mapping
4363  * @param[in]   bDirectSysMappingAllowed boolean to return the result
4364  *
4365  * returns NV_OK, since HW supports reflected mappings
4366  */
4367 NV_STATUS
4368 kbusIsDirectMappingAllowed_GM107
4369 (
4370     OBJGPU            *pGpu,
4371     KernelBus         *pKernelBus,
4372     MEMORY_DESCRIPTOR *pMemDesc,
4373     NvU32              mapFlags,
4374     NvBool            *bDirectSysMappingAllowed
4375 )
4376 {
4377     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
4378     NvBool bAllowReflectedMapping = NV_FALSE;
4379     NvU32  pteKind = memdescGetPteKind(pMemDesc);
4380 
4381     //
4382     // Bug 2033948: Will remove supporting reflected mapping for Z surfaces in sysmem,
4383     // as soon as MODS implements Z swizzling. Only for MODS.
4384     //
4385     if (pKernelBus->bAllowReflectedMappingAccess &&
4386         memmgrIsKind_HAL(pMemoryManager, FB_IS_KIND_Z, pteKind))
4387     {
4388         bAllowReflectedMapping = NV_TRUE;
4389     }
4390 
4391     *bDirectSysMappingAllowed =
4392          (!(bAllowReflectedMapping) &&
4393          (!memdescGetFlag(pMemDesc, MEMDESC_FLAGS_ENCRYPTED))  &&
4394          (memdescGetGpuCacheAttrib(pMemDesc) != NV_MEMORY_CACHED) &&
4395          (DRF_VAL(OS33, _FLAGS, _MAPPING, mapFlags) != NVOS33_FLAGS_MAPPING_REFLECTED));
4396 
4397     return NV_OK;
4398 }
4399 
4400 /**
4401  *!
4402  *  @brief Determine if we should use a direct mapping.
4403  *
4404  *  RM tries to pick the most efficient mapping possible.  For frame buffer addresses,
4405  *  we have no choice, they must be mapped through BAR2.  For system memory we prefer
4406  *  to use direct mappings on dGPU as reflected transactions can lead the PCIE bus to
4407  *  deadlock.
4408  *
4409  *  The conditions in which we choose to map system memory through BAR2 are:
4410  *    - Running swap endian and we need BAR2 to do byte swapping
4411  *    - Allowed by verification BAR2_SYSMEM_ENABLE property
4412  *    - Memory is GPU cached
4413  *
4414  *  Allocated is required for a direct system memory map on some platforms
4415  *  as extra information is needed to complete the mapping request.
4416  *  User allocated system memory must be Direct mapped (and NOT reflected mapping).
4417  *  But, memDesc for user allocated memory may have allocated flag as false.
4418  *  So, adding check for the same.
4419  *
4420  *  RM does not map block linear or compressed buffers.  If those come up
4421  *  we will have to check for them.
4422  *
4423  *  We now allow mapping on Allocated memdescs & as well as submemdescs.
4424  *  The Parent descriptor check is added to handle some specific cases where
4425  *  memDesc is not allocated and doesn't have a parent. i.e when GMMU PTEs are
4426  *  allocated from Reserved Sysmem Heap, we use memdescDescribe() to populate the
4427  *  PTE memdesc. This happens in WinXP, and needs to be reflected BAR2 mapped.
4428  *
4429  *  On Tegra we don't want to go via BAR2 (i.e tunneled via BAR0), since it is
4430  *  expensive. BUS cache maintenance code will ensure coherency b/w CPU & GPU in
4431  *  Tegra. We can even have dGPU use this path in future.
4432  *
4433  * @param[in]     pGpu
4434  * @param[in]     pKernelBus
4435  * @param[in]     pMemDesc           MEMORY_DESCRIPTOR pointer
4436  * @param[in/out] pbAllowDirectMap   NvBool pointer
4437  *
4438  *@returns NV_OK, if supported
4439  *         NV_ERR_NOT_SUPPORTED, otherwise
4440  */
4441 NV_STATUS
4442 kbusUseDirectSysmemMap_GM107
4443 (
4444     OBJGPU            *pGpu,
4445     KernelBus         *pKernelBus,
4446     MEMORY_DESCRIPTOR *pMemDesc,
4447     NvBool            *pbAllowDirectMap
4448 )
4449 {
4450     *pbAllowDirectMap = NV_FALSE;
4451 
4452     if((memdescGetAddressSpace(pMemDesc) != ADDR_FBMEM) &&
4453      (!kbusIsBar2SysmemAccessEnabled(pKernelBus)) &&
4454      (pMemDesc->Allocated || memdescGetParentDescriptor(pMemDesc) ||
4455       memdescGetFlag(pMemDesc, MEMDESC_FLAGS_EXT_PAGE_ARRAY_MEM) ||
4456       memdescGetFlag(pMemDesc, MEMDESC_FLAGS_PEER_IO_MEM)) &&
4457      ((memdescGetGpuCacheAttrib(pMemDesc) == NV_MEMORY_UNCACHED) || IsTEGRA(pGpu)))
4458     {
4459          *pbAllowDirectMap =  NV_TRUE;
4460     }
4461 
4462     return NV_OK;
4463 }
4464 
4465 /*!
4466  * Update BAR1 instance block VAS state and rebind it to HW.
4467  */
4468 NV_STATUS
4469 kbusBar1InstBlkVasUpdate_GM107
4470 (
4471     OBJGPU            *pGpu,
4472     KernelBus         *pKernelBus
4473 )
4474 {
4475     KernelGmmu          *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
4476     MemoryManager       *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
4477     OBJVASPACE          *pBar1VAS = kbusGetBar1VASpace_HAL(pGpu, pKernelBus);
4478     INST_BLK_INIT_PARAMS params = {0};
4479     NvU32                gfid;
4480     NV_STATUS            status = NV_OK;
4481 
4482     NV_ASSERT_OK_OR_RETURN(vgpuGetCallingContextGfid(pGpu, &gfid));
4483 
4484     // Nothing to be done in the guest in the paravirtualization case.
4485     if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
4486         (IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))
4487     {
4488         return NV_OK;
4489     }
4490 
4491     if (IS_GFID_VF(gfid) &&
4492         pKernelBus->bar1[gfid].pInstBlkMemDesc == NULL)
4493     {
4494         //
4495         // VF BAR1 instance block cannot by in PF sysmem as the latter
4496         // is not mapped into VF's IOMMU domain
4497         //
4498         NV_ASSERT_OR_RETURN(pKernelBus->InstBlkAperture == ADDR_FBMEM, NV_ERR_INVALID_ARGUMENT);
4499 
4500         if ((status = memdescCreate(&pKernelBus->bar1[gfid].pInstBlkMemDesc,
4501                                     pGpu,
4502                                     GF100_BUS_INSTANCEBLOCK_SIZE,
4503                                     GF100_BUS_INSTANCEBLOCK_SIZE,
4504                                     NV_TRUE,
4505                                     pKernelBus->InstBlkAperture,
4506                                     pKernelBus->InstBlkAttr,
4507                                     MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE)) != NV_OK)
4508         {
4509             NV_ASSERT_OR_RETURN(status == NV_OK, status);
4510         }
4511 
4512         status = memdescAlloc(pKernelBus->bar1[gfid].pInstBlkMemDesc);
4513         NV_ASSERT_OR_RETURN(status == NV_OK, status);
4514 
4515         status = memmgrMemDescMemSet(pMemoryManager,
4516                                      pKernelBus->bar1[gfid].pInstBlkMemDesc,
4517                                      0,
4518                                      TRANSFER_FLAGS_NONE);
4519         NV_ASSERT_OR_RETURN(status == NV_OK, status);
4520 
4521         pKernelBus->bar1[gfid].instBlockBase =
4522                      memdescGetPhysAddr(pKernelBus->bar1[gfid].pInstBlkMemDesc,
4523                                     AT_GPU, 0);
4524     }
4525 
4526     // Initialize the instance block VAS state.
4527     NV_ASSERT_OK_OR_RETURN(
4528         kgmmuInstBlkInit(pKernelGmmu, pKernelBus->bar1[gfid].pInstBlkMemDesc, pBar1VAS,
4529                         FIFO_PDB_IDX_BASE, &params));
4530 
4531     //
4532     // (Re-)bind instance block so host fetches the new VAS state.
4533     // Flush to ensure host sees the latest.
4534     //
4535     kbusFlush_HAL(pGpu, pKernelBus, BUS_FLUSH_VIDEO_MEMORY);
4536 
4537     return NV_OK;
4538 }
4539 
4540