1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2006-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "core/core.h"
25 #include "gpu/gpu.h"
26 
27 #include <class/cl0080.h>
28 #include <class/cl00fc.h>      // FABRIC_VASPACE_A
29 #include "rmapi/rs_utils.h"
30 #include "gpu/mmu/kern_gmmu.h"
31 #include "gpu/device/device.h"
32 #include "gpu/mem_mgr/mem_mgr.h"
33 #include "gpu/bus/p2p_api.h"
34 #include "kernel/gpu/mig_mgr/kernel_mig_manager.h"
35 #include "kernel/gpu/nvlink/kernel_nvlink.h"
36 #include "rmapi/rmapi.h"
37 #include "core/locks.h"
38 #include "vgpu/rpc.h"
39 #include "virtualization/hypervisor/hypervisor.h"
40 #include "mem_mgr/vaspace.h"
41 #include "mem_mgr/fabric_vaspace.h"
42 #include "mem_mgr/virt_mem_mgr.h"
43 #include <os/os.h>
44 
45 #include "published/ampere/ga100/dev_nv_xve.h"
46 #include "published/ampere/ga100/dev_ram.h"  // NV_RAMIN_ALLOC_SIZE
47 #include "ctrl/ctrl2080/ctrl2080fla.h" // NV2080_CTRL_CMD_FLA_SETUP_INSTANCE_MEM_BLOCK
48 
49 #define NVLNK_FABRIC_ADDR_GRANULARITY                                      36
50 
51 /*!
52  * @brief Sets up the Legacy FLA state for the GPU. This function will allocate a RM
53  * client, which will allocate the FERMI_VASPACE_A object with LEGACY_FLA flag.
54  *
55  * @param[in]  base       VASpace base
56  * @param[in]  size       VASpace size
57  *
58  * @return NV_OK if successful
59  */
60 NV_STATUS
61 kbusAllocateLegacyFlaVaspace_GA100
62 (
63     OBJGPU    *pGpu,
64     KernelBus *pKernelBus,
65     NvU64      base,
66     NvU64      size
67 )
68 {
69     NV0080_ALLOC_PARAMETERS nv0080AllocParams = {0};
70     NV2080_ALLOC_PARAMETERS nv2080AllocParams = {0};
71     NV_VASPACE_ALLOCATION_PARAMETERS vaParams = {0};
72     RM_API   *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
73     NvBool    bAcquireLock = NV_FALSE;
74     RsClient *pClient;
75     NV_STATUS    status = NV_OK;
76 
77     //Allocate the client in RM which owns the FLAVASpace
78     status = pRmApi->AllocWithHandle(pRmApi, NV01_NULL_OBJECT, NV01_NULL_OBJECT, NV01_NULL_OBJECT,
79                                      NV01_ROOT, &pKernelBus->flaInfo.hClient, sizeof(pKernelBus->flaInfo.hClient));
80     NV_ASSERT_OR_RETURN(status == NV_OK, status);
81 
82     status = serverGetClientUnderLock(&g_resServ, pKernelBus->flaInfo.hClient, &pClient);
83     NV_ASSERT_OR_GOTO(status == NV_OK, free_client);
84 
85     status = serverutilGenResourceHandle(pKernelBus->flaInfo.hClient, &pKernelBus->flaInfo.hDevice);
86     NV_ASSERT_OR_GOTO(status == NV_OK, free_client);
87 
88     // Allocate a device handle
89     nv0080AllocParams.deviceId = gpuGetDeviceInstance(pGpu);
90     status = pRmApi->AllocWithHandle(pRmApi, pKernelBus->flaInfo.hClient, pKernelBus->flaInfo.hClient,
91                                      pKernelBus->flaInfo.hDevice, NV01_DEVICE_0,
92                                      &nv0080AllocParams, sizeof(nv0080AllocParams));
93 
94     if (status != NV_OK)
95     {
96         NV_PRINTF(LEVEL_ERROR, "failed creating device, status=0x%x\n", status);
97         goto free_client;
98     }
99 
100 
101     status = serverutilGenResourceHandle(pKernelBus->flaInfo.hClient, &pKernelBus->flaInfo.hSubDevice);
102     NV_ASSERT_OR_GOTO(status == NV_OK, free_client);
103 
104     //Allocate a sub device handle
105     nv2080AllocParams.subDeviceId = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
106 
107     status = pRmApi->AllocWithHandle(pRmApi, pKernelBus->flaInfo.hClient, pKernelBus->flaInfo.hDevice,
108                                      pKernelBus->flaInfo.hSubDevice, NV20_SUBDEVICE_0,
109                                      &nv2080AllocParams, sizeof(nv2080AllocParams));
110 
111     if (status != NV_OK)
112     {
113         NV_PRINTF(LEVEL_ERROR, "failed creating sub-device, status=0x%x\n",
114                   status);
115         goto free_client;
116     }
117 
118     // Allocate the FERMI_VASPACE_A FLA VASpace
119     vaParams.index  = NV_VASPACE_ALLOCATION_INDEX_GPU_NEW;
120     vaParams.vaBase = base;
121     vaParams.vaSize = size;
122     vaParams.flags  |= NV_VASPACE_ALLOCATION_FLAGS_IS_FLA;
123 
124     // Generate a vaspace handle for FERMI_VASPACE_A object allocation
125     status = serverutilGenResourceHandle(pKernelBus->flaInfo.hClient, &pKernelBus->flaInfo.hFlaVASpace);
126     if (status != NV_OK)
127     {
128         NV_PRINTF(LEVEL_ERROR,
129                   "failed generating vaspace handle, status=0x%x\n", status);
130         goto free_client;
131     }
132 
133     if (rmDeviceGpuLockIsOwner(pGpu->gpuInstance))
134     {
135         rmGpuLocksRelease(GPUS_LOCK_FLAGS_NONE, NULL);
136         bAcquireLock = NV_TRUE;
137         pRmApi = rmapiGetInterface(RMAPI_API_LOCK_INTERNAL);
138     }
139 
140     // Allocate a FERMI_VASPACE_A object and associate it with hFlaVASpace
141     status = pRmApi->AllocWithHandle(pRmApi, pKernelBus->flaInfo.hClient, pKernelBus->flaInfo.hDevice,
142                                      pKernelBus->flaInfo.hFlaVASpace, FERMI_VASPACE_A,
143                                      &vaParams, sizeof(vaParams));
144     if (bAcquireLock)
145     {
146         NV_ASSERT_OK_OR_CAPTURE_FIRST_ERROR(status,
147                     rmGpuLocksAcquire(GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_MEM_FLA));
148         bAcquireLock = NV_FALSE;
149         pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
150     }
151 
152     if (status != NV_OK)
153     {
154         NV_PRINTF(LEVEL_ERROR, "failed allocating vaspace, status=0x%x\n",
155                   status);
156         goto free_client;
157     }
158 
159     if (!(IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))
160     {
161         // Get the FLA VASpace associated with hFlaVASpace
162         status = vaspaceGetByHandleOrDeviceDefault(pClient,
163                                                    pKernelBus->flaInfo.hDevice,
164                                                    pKernelBus->flaInfo.hFlaVASpace,
165                                                    &pKernelBus->flaInfo.pFlaVAS);
166         if (status != NV_OK)
167         {
168             NV_PRINTF(LEVEL_ERROR,
169                     "failed getting the vaspace from handle, status=0x%x\n",
170                     status);
171             goto free_client;
172         }
173     }
174     return NV_OK;
175 
176 free_client:
177     pRmApi->Free(pRmApi, pKernelBus->flaInfo.hClient, pKernelBus->flaInfo.hClient);
178     portMemSet(&pKernelBus->flaInfo, 0, sizeof(pKernelBus->flaInfo));
179 
180     NV_PRINTF(LEVEL_ERROR, "failed allocating FLA VASpace status=0x%x\n",
181               status);
182     return status;
183 }
184 
185 /*!
186  * @brief Sets up the Fabric FLA state for the GPU. This function will allocate fabric VASpace,
187  *        allocates PDB for both legacy and fabric VAS, allocates instance block and initialize with
188  *        legacy VAS and binds the instance block to HW.
189  *
190  * @param[in]  base       VASpace base
191  * @param[in]  size       VASpace size
192  *
193  * @return NV_OK if successful
194  */
195 NV_STATUS
196 kbusAllocateFlaVaspace_GA100
197 (
198     OBJGPU    *pGpu,
199     KernelBus *pKernelBus,
200     NvU64      base,
201     NvU64      size
202 )
203 {
204     NV_STATUS    status = NV_OK;
205     OBJVMM      *pVmm   = SYS_GET_VMM(SYS_GET_INSTANCE());
206     KernelGmmu  *pKernelGmmu  = GPU_GET_KERNEL_GMMU(pGpu);
207     INST_BLK_INIT_PARAMS pInstblkParams = {0};
208     RM_API   *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
209 
210     NV_ASSERT_OR_RETURN(pGpu != NULL, NV_ERR_INVALID_ARGUMENT);
211     NV_ASSERT_OR_RETURN(size != 0, NV_ERR_INVALID_ARGUMENT);
212     NV_ASSERT_OR_RETURN(!pKernelBus->flaInfo.bFlaAllocated, NV_ERR_INVALID_ARGUMENT);
213 
214     pKernelBus->flaInfo.base = base;
215     pKernelBus->flaInfo.size = size;
216 
217     if (gpuIsSriovEnabled(pGpu))
218     {
219         KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu);
220 
221         if (pKernelNvlink != NULL &&
222             knvlinkIsGpuConnectedToNvswitch(pGpu, pKernelNvlink))
223         {
224             pKernelBus->flaInfo.bFlaRangeRegistered = NV_TRUE;
225             return status;
226         }
227     }
228 
229     NV_ASSERT_OK_OR_RETURN(kbusAllocateLegacyFlaVaspace_HAL(pGpu, pKernelBus, base, size));
230 
231     // Allocate a FABRIC_VASPACE_A object
232     status = vmmCreateVaspace(pVmm, FABRIC_VASPACE_A, pGpu->gpuId, gpumgrGetGpuMask(pGpu),
233                               base, base + size - 1, 0, 0, NULL, 0,
234                               &pGpu->pFabricVAS);
235 
236     if (status != NV_OK)
237     {
238         NV_PRINTF(LEVEL_ERROR, "failed allocating fabric vaspace, status=0x%x\n",
239                   status);
240         goto free_client;
241     }
242 
243     //
244     // For SRIOV Heavy enabled guests, VAS PTs are managed by host
245     // Enabling the same path for GSP-RM offload, where VAS is managed in GSP-RM
246     //
247     if (IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu))
248     {
249         NV2080_CTRL_FLA_RANGE_PARAMS params = {0};
250         RsClient   *pClient;
251         params.mode = NV2080_CTRL_FLA_RANGE_PARAMS_MODE_HOST_MANAGED_VAS_INITIALIZE;
252         params.base = base;
253         params.size = size;
254         params.hVASpace = pKernelBus->flaInfo.hFlaVASpace;
255         NV_RM_RPC_CONTROL(pGpu, pKernelBus->flaInfo.hClient, pKernelBus->flaInfo.hSubDevice,
256                           NV2080_CTRL_CMD_FLA_RANGE,
257                           &params, sizeof(params), status);
258 
259         if (status != NV_OK)
260         {
261             goto free_client;
262         }
263 
264         serverGetClientUnderLock(&g_resServ, pKernelBus->flaInfo.hClient, &pClient);
265 
266         status = vaspaceGetByHandleOrDeviceDefault(pClient,
267                                                    pKernelBus->flaInfo.hDevice,
268                                                    pKernelBus->flaInfo.hFlaVASpace,
269                                                    &pKernelBus->flaInfo.pFlaVAS);
270         if (status != NV_OK)
271         {
272             NV_PRINTF(LEVEL_ERROR,
273                     "failed getting the vaspace from handle, status=0x%x\n",
274                     status);
275             goto free_rpc;
276         }
277     }
278     else
279     {
280         // Pin the VASPACE page directory for pFlaVAS before writing the instance block
281         status = vaspacePinRootPageDir(pKernelBus->flaInfo.pFlaVAS, pGpu);
282         if (status != NV_OK)
283         {
284             NV_PRINTF(LEVEL_ERROR, "failed pinning down FLAVASpace, status=0x%x\n",
285                     status);
286             goto unpin_rootpagedir;
287         }
288 
289         if (pGpu->pFabricVAS != NULL)
290         {
291             // Pin the VASPACE page directory for pFabricVAS before writing the instance block
292             status = vaspacePinRootPageDir(pGpu->pFabricVAS, pGpu);
293             if (status != NV_OK)
294             {
295                 NV_PRINTF(LEVEL_ERROR, "failed pinning down fabric vaspace, status=0x%x\n",
296                           status);
297                 goto unpin_rootpagedir;
298             }
299         }
300 
301         // Construct instance block
302         status = kbusConstructFlaInstBlk_HAL(pGpu, pKernelBus, GPU_GFID_PF);
303         if (status != NV_OK)
304         {
305             NV_PRINTF(LEVEL_ERROR,
306                     "failed constructing instblk for FLA, status=0x%x\n",
307                     status);
308             goto free_instblk;
309         }
310 
311         // Instantiate Inst Blk for pFlaVAS
312         status = kgmmuInstBlkInit(pKernelGmmu,
313                                  pKernelBus->flaInfo.pInstblkMemDesc,
314                                  pKernelBus->flaInfo.pFlaVAS, FIFO_PDB_IDX_BASE,
315                                  &pInstblkParams);
316         if (status != NV_OK)
317         {
318             NV_PRINTF(LEVEL_ERROR,
319                     "failed instantiating instblk for FLA, status=0x%x\n",
320                     status);
321             goto free_instblk;
322         }
323     }
324     pKernelBus->flaInfo.bFlaAllocated    = NV_TRUE;
325     pKernelBus->flaInfo.bToggleBindPoint = NV_TRUE;
326 
327     if (pGpu->pFabricVAS != NULL)
328     {
329         NV_ASSERT_OK_OR_GOTO(status, fabricvaspaceInitUCRange(
330                                      dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE), pGpu,
331                                      base, size), free_instblk);
332     }
333 
334     //
335     // For SRIOV PF/VF system, always check for P2P allocation to determine whether
336     // this function is allowed to bind FLA
337     //
338     if (gpuIsSriovEnabled(pGpu) || IS_VIRTUAL(pGpu))
339     {
340         if (gpuCheckIsP2PAllocated_HAL(pGpu))
341         {
342             status = kbusSetupBindFla(pGpu, pKernelBus, pGpu->sriovState.pP2PInfo->gfid);
343         }
344         else
345         {
346             NV_PRINTF(LEVEL_INFO, "Skipping binding FLA, because no P2P GFID is"
347                       " validated yet\n");
348         }
349     }
350     else
351     {
352         status = kbusSetupBindFla(pGpu, pKernelBus, GPU_GFID_PF);
353     }
354 
355     if (status != NV_OK)
356     {
357         NV_PRINTF(LEVEL_ERROR,
358                   "failed binding instblk for FLA, status=0x%x\n", status);
359         goto free_instblk;
360     }
361     return status;
362 
363 free_rpc:
364      {
365         NV2080_CTRL_FLA_RANGE_PARAMS params = {0};
366         params.mode = NV2080_CTRL_FLA_RANGE_PARAMS_MODE_HOST_MANAGED_VAS_DESTROY;
367         NV_RM_RPC_CONTROL(pGpu, pKernelBus->flaInfo.hClient, pKernelBus->flaInfo.hSubDevice,
368                           NV2080_CTRL_CMD_FLA_RANGE,
369                           &params, sizeof(params), status);
370         goto free_client;
371      }
372 
373 free_instblk:
374     kbusDestructFlaInstBlk_HAL(pGpu, pKernelBus);
375 
376 unpin_rootpagedir:
377     if (pKernelBus->flaInfo.pFlaVAS != NULL)
378     {
379         vaspaceUnpinRootPageDir(pKernelBus->flaInfo.pFlaVAS, pGpu);
380     }
381 
382     if (pGpu->pFabricVAS != NULL)
383     {
384         vaspaceUnpinRootPageDir(pGpu->pFabricVAS, pGpu);
385     }
386 
387 free_client:
388     if (pGpu->pFabricVAS != NULL)
389     {
390         vmmDestroyVaspace(pVmm, pGpu->pFabricVAS);
391         pGpu->pFabricVAS = NULL;
392     }
393 
394     pRmApi->Free(pRmApi, pKernelBus->flaInfo.hClient, pKernelBus->flaInfo.hClient);
395     pKernelBus->flaInfo.bFlaAllocated = NV_FALSE;
396 
397     NV_PRINTF(LEVEL_ERROR, "failed allocating FLA VASpace status=0x%x\n",
398               status);
399 
400     return status;
401 }
402 
403 /*!
404  * @brief Sets up the Host Managed FLA state for the GPU.
405  * This function will manage bare minimum resources in host RM such as
406  * allocating PDB, constructing instance memory block in subheap of the vGPU device,
407  * binding the PDB with the VASpace.
408  *
409  * @param[in]  hClient    Client handle which owns the FLA resources
410  * @param[in]  hDevice    Device handle associated with FLA VAS
411  * @param[in]  hSubdevice SubDevice handle associated with FLA VAS
412  * @param[in]  hVASpace   FLA Vaspace handle
413  * @param[in]  base       VASpace base
414  * @param[in]  size       VASpace size
415  * @param[in]  gfid       Calling Context
416  *
417  * @return NV_OK if successful
418  */
419 NV_STATUS
420 kbusAllocateHostManagedFlaVaspace_GA100
421 (
422     OBJGPU    *pGpu,
423     KernelBus *pKernelBus,
424     NvHandle   hClient,
425     NvHandle   hDevice,
426     NvHandle   hSubdevice,
427     NvHandle   hVASpace,
428     NvU64      base,
429     NvU64      size,
430     NvU32      gfid
431 )
432 {
433     KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
434     OBJVMM     *pVmm        = SYS_GET_VMM(SYS_GET_INSTANCE());
435     INST_BLK_INIT_PARAMS pInstblkParams = {0};
436     RsClient   *pClient;
437     NV_STATUS   status;
438 
439     NV_ASSERT_OR_RETURN(pGpu != NULL, NV_ERR_INVALID_ARGUMENT);
440     NV_ASSERT_OR_RETURN(size != 0, NV_ERR_INVALID_ARGUMENT);
441     NV_ASSERT_OR_RETURN(IS_GFID_VF(gfid), NV_ERR_INVALID_ARGUMENT);
442     NV_ASSERT_OR_RETURN(hClient != NV01_NULL_OBJECT, NV_ERR_INVALID_ARGUMENT);
443     NV_ASSERT_OR_RETURN(hDevice != NV01_NULL_OBJECT, NV_ERR_INVALID_ARGUMENT);
444     NV_ASSERT_OR_RETURN(hSubdevice != NV01_NULL_OBJECT, NV_ERR_INVALID_ARGUMENT);
445     NV_ASSERT_OR_RETURN(hVASpace != NV01_NULL_OBJECT, NV_ERR_INVALID_ARGUMENT);
446     NV_ASSERT_OR_RETURN(!pKernelBus->flaInfo.bFlaAllocated, NV_ERR_INVALID_ARGUMENT);
447 
448     pKernelBus->flaInfo.base = base;
449     pKernelBus->flaInfo.size = size;
450     pKernelBus->flaInfo.hClient = hClient;
451     pKernelBus->flaInfo.hDevice = hDevice;
452     pKernelBus->flaInfo.hSubDevice = hSubdevice;
453     pKernelBus->flaInfo.hFlaVASpace = hVASpace;
454 
455     status = serverGetClientUnderLock(&g_resServ, pKernelBus->flaInfo.hClient, &pClient);
456     NV_ASSERT_OR_GOTO(status == NV_OK, cleanup);
457 
458     status = vaspaceGetByHandleOrDeviceDefault(pClient,
459                                                pKernelBus->flaInfo.hDevice,
460                                                pKernelBus->flaInfo.hFlaVASpace,
461                                                &pKernelBus->flaInfo.pFlaVAS);
462 
463     // Allocate a FABRIC_VASPACE_A object
464     status = vmmCreateVaspace(pVmm, FABRIC_VASPACE_A, pGpu->gpuId, gpumgrGetGpuMask(pGpu),
465                               base, base + size - 1, 0, 0, NULL, 0,
466                               &pGpu->pFabricVAS);
467     if (status != NV_OK)
468     {
469         NV_PRINTF(LEVEL_ERROR, "failed allocating fabric vaspace, status=0x%x\n",
470                   status);
471         goto cleanup;
472     }
473 
474     // Pin the VASPACE page directory for pFabricVAS before writing the instance block
475     status = vaspacePinRootPageDir(pGpu->pFabricVAS, pGpu);
476     if (status != NV_OK)
477     {
478         NV_PRINTF(LEVEL_ERROR, "failed pinning down fabric vaspace, status=0x%x\n",
479                     status);
480         goto cleanup;
481     }
482 
483     // Pin the VASPACE page directory for Legacy VAS  before writing the instance block
484     status = vaspacePinRootPageDir(pKernelBus->flaInfo.pFlaVAS, pGpu);
485     if (status != NV_OK)
486     {
487         NV_PRINTF(LEVEL_ERROR, "failed pinning down legacy vaspace, status=0x%x\n",
488                     status);
489         goto unpin_fabric_page_dir;
490     }
491 
492     // Construct instance block
493     status = kbusConstructFlaInstBlk_HAL(pGpu, pKernelBus, gfid);
494     if (status != NV_OK)
495     {
496         NV_PRINTF(LEVEL_ERROR,
497                  "failed constructing instblk for FLA, status=0x%x\n",
498                   status);
499         goto unpin_legacy_page_dir;
500     }
501 
502     // Instantiate Inst Blk for FLA
503     status = kgmmuInstBlkInit(pKernelGmmu,
504                               pKernelBus->flaInfo.pInstblkMemDesc,
505                               pKernelBus->flaInfo.pFlaVAS, FIFO_PDB_IDX_BASE,
506                               &pInstblkParams);
507     if (status != NV_OK)
508     {
509         NV_PRINTF(LEVEL_ERROR,
510                   "failed instantiating instblk for FLA, status=0x%x\n",
511                   status);
512         goto free_instblk;
513     }
514 
515     pKernelBus->flaInfo.bFlaAllocated = NV_TRUE;
516     pKernelBus->flaInfo.bToggleBindPoint = NV_TRUE;
517 
518     if (pGpu->pFabricVAS != NULL)
519     {
520         NV_ASSERT_OK_OR_GOTO(status, fabricvaspaceInitUCRange(
521                                      dynamicCast(pGpu->pFabricVAS, FABRIC_VASPACE), pGpu,
522                                      base, size), free_instblk);
523     }
524 
525     return status;
526 
527 free_instblk:
528     kbusDestructFlaInstBlk_HAL(pGpu, pKernelBus);
529 
530 unpin_legacy_page_dir:
531     if (pKernelBus->flaInfo.pFlaVAS != NULL)
532     {
533         vaspaceUnpinRootPageDir(pKernelBus->flaInfo.pFlaVAS, pGpu);
534     }
535 
536 unpin_fabric_page_dir:
537     if (pGpu->pFabricVAS != NULL)
538     {
539         vaspaceUnpinRootPageDir(pGpu->pFabricVAS, pGpu);
540     }
541 
542 cleanup:
543     if (pGpu->pFabricVAS != NULL)
544     {
545         vmmDestroyVaspace(pVmm, pGpu->pFabricVAS);
546         pGpu->pFabricVAS = NULL;
547     }
548 
549     pKernelBus->flaInfo.bFlaAllocated = NV_FALSE;
550     return status;
551 }
552 
553 /*!
554  * Top level function to check if the platform supports FLA, and initialize if
555  * supported. This function gets called in all the platforms where Nvlink is enabled.
556  *
557  * @param[in]  base       VASpace base
558  * @param[in]  size       VASpace size
559  */
560 NV_STATUS
561 kbusCheckFlaSupportedAndInit_GA100
562 (
563     OBJGPU    *pGpu,
564     KernelBus *pKernelBus,
565     NvU64      base,
566     NvU64      size
567 )
568 {
569 
570     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
571 
572     portMemSet(&pKernelBus->flaInfo, 0, sizeof(pKernelBus->flaInfo));
573 
574     // Initialize FLA State Info if possible
575     //  1. FLA is by default enabled for GA100,
576     //  2. Disable FLA when MIG is enabled
577     //     Currently MIG is persistent state, so GPU reboot will happpen, when MIG is being enabled/disabled
578     //     so when the GPU reboot happens with a modified state, don't enable FLA. This is decent WAR for bug: 2568634
579     //   3. Disable FLA when SLI is enabled
580     //       Bug: 2985556, re-enable once we fix this bug.
581     //
582     if (((NULL != pKernelMIGManager) && !kmigmgrIsMIGNvlinkP2PSupported(pGpu, pKernelMIGManager)) ||
583         (IsSLIEnabled(pGpu)) || pGpu->getProperty(pGpu, PDB_PROP_GPU_BROKEN_FB) ||
584         pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_ALL_INST_IN_SYSMEM))
585     {
586         NV_PRINTF(LEVEL_INFO, "FLA is disabled, gpu %x is in MIG/SLI mode \n", pGpu->gpuInstance);
587         pKernelBus->bFlaSupported = NV_FALSE;
588         return NV_OK;
589     }
590     else // for all non-MIG configs, FLA is supported
591     {
592         NV_PRINTF(LEVEL_INFO, "Enabling FLA_SUPPORTED to TRUE, gpu: %x ...\n", pGpu->gpuInstance);
593         pKernelBus->bFlaSupported = NV_TRUE;
594     }
595 
596     //
597     // FLA VAspace is allocated from CPU, so no need to do anything
598     // in GSP except setting the property
599     //
600     if (RMCFG_FEATURE_PLATFORM_GSP)
601         return NV_OK;
602 
603     NV_ASSERT_OK_OR_RETURN(kbusDetermineFlaRangeAndAllocate_HAL(pGpu, pKernelBus, base, size));
604 
605     return NV_OK;
606 }
607 
608 /*!
609  * @brief Determine FLA Base and Size for NvSwitch Virtualization systems
610  *        from reading the scratch registers. This determines FLA base and size
611  *        GA100 direct connected systems as well as skip allocation of FLA Vaspace
612  *        for vgpu host.
613  *
614  * @param[in]  base       VASpace base
615  * @param[in]  size       VASpace size
616  *
617  * @return NV_OK if successful
618  */
619 NV_STATUS
620 kbusDetermineFlaRangeAndAllocate_GA100
621 (
622     OBJGPU    *pGpu,
623     KernelBus *pKernelBus,
624     NvU64      base,
625     NvU64      size
626 )
627 {
628     OBJSYS           *pSys              = SYS_GET_INSTANCE();
629     NV_STATUS         status            = NV_OK;
630     KernelNvlink     *pKernelNvlink     = GPU_GET_KERNEL_NVLINK(pGpu);
631 
632     NV2080_CTRL_NVLINK_GET_SET_NVSWITCH_FLA_ADDR_PARAMS params;
633 
634     if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED))
635     {
636         // Nvswitch virtualization enabled
637         if (pKernelNvlink != NULL && knvlinkIsNvswitchProxyPresent(pGpu, pKernelNvlink))
638         {
639             portMemSet(&params, 0, sizeof(params));
640             params.bGet = NV_TRUE;
641 
642             status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
643                                          NV2080_CTRL_CMD_NVLINK_GET_SET_NVSWITCH_FLA_ADDR,
644                                          (void *)&params, sizeof(params));
645             if (status != NV_OK)
646             {
647                 NV_PRINTF(LEVEL_ERROR,
648                           "Failed to get the NVSwitch FLA address\n");
649                 return status;
650             }
651 
652             size = gpuGetFlaVasSize_HAL(pGpu, NV_TRUE);
653 
654             status = knvlinkSetUniqueFlaBaseAddress(pGpu, pKernelNvlink, params.addr);
655             if (status != NV_OK)
656             {
657                 NV_PRINTF(LEVEL_INFO, "Failed to enable FLA for GPU: %x\n", pGpu->gpuInstance);
658                 return status;
659             }
660 
661             base = params.addr;
662         }
663         else
664         {
665              return status;
666         }
667     }
668     else // direct connected systems
669     {
670         if (hypervisorIsVgxHyper())
671         {
672             NV_PRINTF(LEVEL_INFO, "Skipping the FLA initialization in Host vGPU \n");
673             return NV_OK;
674         }
675         if (!size)
676         {
677             size = gpuGetFlaVasSize_HAL(pGpu, NV_FALSE);
678             base = pGpu->gpuInstance * size;
679         }
680     }
681     NV_ASSERT_OK_OR_RETURN(kbusAllocateFlaVaspace_HAL(pGpu, pKernelBus, base, size));
682     return status;
683 }
684 
685 /*!
686  * @brief Destruct the FLA data structure and associated resources.
687  *        Since all the resources are associated with the RM client,
688  *        all resources will be destroyed by Resource server.
689  *        Note: kbusDestroyFla can be called from different places
690  *               1. For direct connected systems, RM unload will call this function.
691  */
692 void
693 kbusDestroyFla_GA100
694 (
695     OBJGPU    *pGpu,
696     KernelBus *pKernelBus
697 )
698 {
699     OBJSYS *pSys   = SYS_GET_INSTANCE();
700     OBJVMM *pVmm   = SYS_GET_VMM(pSys);
701     RM_API *pRmApi = rmapiGetInterface(RMAPI_GPU_LOCK_INTERNAL);
702 
703     // TODO: if there are dangling resources, cleanup here
704     if ((pKernelBus->flaInfo.pFlaVAS != NULL) || (pGpu->pFabricVAS != NULL))
705     {
706         if (pKernelBus->flaInfo.bFlaBind)
707         {
708             if (IS_VIRTUAL(pGpu) || IS_GSP_CLIENT(pGpu))
709             {
710                 kbusSetupUnbindFla_HAL(pGpu, pKernelBus);
711             }
712         }
713 
714         if (pKernelBus->flaInfo.bFlaAllocated)
715         {
716             if (IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu))
717             {
718                 NV2080_CTRL_FLA_RANGE_PARAMS params = {0};
719                 NV_STATUS status = NV_OK;
720                 params.mode = NV2080_CTRL_FLA_RANGE_PARAMS_MODE_HOST_MANAGED_VAS_DESTROY;
721                 NV_RM_RPC_CONTROL(pGpu, pKernelBus->flaInfo.hClient, pKernelBus->flaInfo.hSubDevice,
722                                 NV2080_CTRL_CMD_FLA_RANGE,
723                                 &params, sizeof(params), status);
724 
725                 if (status != NV_OK)
726                 {
727                     NV_PRINTF(LEVEL_WARNING, "RPC to host failed with status: 0x%x\n", status);
728                 }
729 
730                 //
731                 // For SRIOV-Heavy, Instance block is allocated in host, so only destroying the
732                 // vaspace
733                 //
734                 if (pGpu->pFabricVAS != NULL)
735                 {
736                     vmmDestroyVaspace(pVmm, pGpu->pFabricVAS);
737                     pGpu->pFabricVAS = NULL;
738                 }
739 
740                 pRmApi->Free(pRmApi, pKernelBus->flaInfo.hClient, pKernelBus->flaInfo.hClient);
741                 portMemSet(&pKernelBus->flaInfo, 0, sizeof(pKernelBus->flaInfo));
742             }
743             else
744             {
745                 if (pKernelBus->flaInfo.pFlaVAS != NULL)
746                 {
747                     vaspaceUnpinRootPageDir(pKernelBus->flaInfo.pFlaVAS, pGpu);
748                 }
749 
750                 if (pGpu->pFabricVAS != NULL)
751                 {
752                     vaspaceUnpinRootPageDir(pGpu->pFabricVAS, pGpu);
753                 }
754 
755                 kbusDestructFlaInstBlk_HAL(pGpu, pKernelBus);
756 
757                 if (pGpu->pFabricVAS != NULL)
758                 {
759                     vmmDestroyVaspace(pVmm, pGpu->pFabricVAS);
760                     pGpu->pFabricVAS = NULL;
761                 }
762 
763                 pRmApi->Free(pRmApi, pKernelBus->flaInfo.hClient, pKernelBus->flaInfo.hClient);
764                 portMemSet(&pKernelBus->flaInfo, 0, sizeof(pKernelBus->flaInfo));
765             }
766 
767             pKernelBus->flaInfo.bFlaAllocated = NV_FALSE;
768         }
769     }
770 }
771 
772 void
773 kbusDestroyHostManagedFlaVaspace_GA100
774 (
775     OBJGPU    *pGpu,
776     KernelBus *pKernelBus,
777     NvU32      gfid
778 )
779 {
780     OBJSYS *pSys   = SYS_GET_INSTANCE();
781     OBJVMM *pVmm   = SYS_GET_VMM(pSys);
782 
783     NV_PRINTF(LEVEL_INFO, "Freeing the FLA client: 0x%x FLAVASpace:%x, gpu:%x \n",
784              pKernelBus->flaInfo.hClient, pKernelBus->flaInfo.hFlaVASpace, pGpu->gpuInstance);
785 
786     if (pKernelBus->flaInfo.bFlaAllocated)
787     {
788         KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu);
789 
790         if (pKernelBus->flaInfo.pFlaVAS != NULL)
791         {
792             vaspaceUnpinRootPageDir(pKernelBus->flaInfo.pFlaVAS, pGpu);
793             pKernelBus->flaInfo.hFlaVASpace = NV01_NULL_OBJECT;
794             pKernelBus->flaInfo.pFlaVAS = NULL;
795         }
796 
797         if (pGpu->pFabricVAS != NULL)
798         {
799             vaspaceUnpinRootPageDir(pGpu->pFabricVAS, pGpu);
800             vmmDestroyVaspace(pVmm, pGpu->pFabricVAS);
801             pGpu->pFabricVAS = NULL;
802         }
803 
804         kbusDestructFlaInstBlk_HAL(pGpu, pKernelBus);
805         pKernelBus->flaInfo.hClient = NV01_NULL_OBJECT;
806         pKernelBus->flaInfo.hDevice = NV01_NULL_OBJECT;
807         pKernelBus->flaInfo.hSubDevice = NV01_NULL_OBJECT;
808         pKernelBus->flaInfo.bFlaAllocated = NV_FALSE;
809         if (pKernelNvlink == NULL || !knvlinkIsGpuConnectedToNvswitch(pGpu, pKernelNvlink))
810         {
811             pKernelBus->flaInfo.bFlaRangeRegistered = NV_FALSE;
812             pKernelBus->flaInfo.base = 0;
813             pKernelBus->flaInfo.size = 0;
814         }
815     }
816 }
817 
818 /*!
819  * @brief This function will return the OBJVASPACE for the FLA VAS.
820  *
821  * @param[in/out]  ppVAS    OBJVASPACE double pointer
822  *
823  * @return NV_ERR_NOT_SUPPORTED, if FLA is not supported,
824  *         else NV_OK
825  */
826 NV_STATUS
827 kbusGetFlaVaspace_GA100
828 (
829     OBJGPU      *pGpu,
830     KernelBus   *pKernelBus,
831     OBJVASPACE **ppVAS
832 )
833 {
834     NV_STATUS         status  = NV_OK;
835     KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu);
836     KernelNvlink     *pKernelNvlink     = GPU_GET_KERNEL_NVLINK(pGpu);
837 
838     *ppVAS = NULL;
839 
840     // Return NV_ERR_NOT_SUPPORTED if nvlink is force disabled using cmd line args
841     if (!IS_VIRTUAL(pGpu) && pKernelNvlink == NULL)
842     {
843         NV_PRINTF(LEVEL_WARNING, "Nvlink is not supported in this GPU: %x \n", pGpu->gpuInstance);
844         return  NV_ERR_NOT_SUPPORTED;
845     }
846 
847     // Return NV_ERR_NOT_SUPPORTED when we are in MIG mode
848     if ((pKernelMIGManager != NULL) && !kmigmgrIsMIGNvlinkP2PSupported(pGpu, pKernelMIGManager))
849     {
850         NV_PRINTF(LEVEL_WARNING, "FLA is not supported with MIG enabled, GPU: %x \n", pGpu->gpuInstance);
851         return NV_ERR_NOT_SUPPORTED;
852     }
853 
854     if (!kbusIsFlaSupported(pKernelBus))
855     {
856         NV_PRINTF(LEVEL_WARNING, "FLA is not supported, GPU: %x\n", pGpu->gpuInstance);
857         return NV_ERR_NOT_SUPPORTED;
858     }
859 
860     if (!IS_VIRTUAL(pGpu) && !kbusIsFlaEnabled(pKernelBus))
861     {
862         if (!gpuIsSriovEnabled(pGpu) && !IS_VIRTUAL(pGpu))
863         {
864             NV_PRINTF(LEVEL_WARNING, "FLA is not enabled, GPU: %x\n", pGpu->gpuInstance);
865             return NV_ERR_NOT_SUPPORTED;
866         }
867     }
868     //
869     // when FLA init moves to P2P object creation time, any client trying to get
870     // FLA VAS reference, needs to be returned NV_ERR_NOT_SUPPORTED. In that case, only
871     // way to determine is to check if links are trained in the system. Since we dont have an easy
872     // way to do the checks, currently we can assume that Nvlinks will not be disabled outside of MIG
873     //
874 
875     *ppVAS = pKernelBus->flaInfo.pFlaVAS;
876 
877     NV_PRINTF(LEVEL_INFO, "returning the vas: %p for GPU: %x start: 0x%llx, limit:0x%llx \n",
878               pKernelBus->flaInfo.pFlaVAS, pGpu->gpuInstance, pKernelBus->flaInfo.pFlaVAS->vasStart,
879               pKernelBus->flaInfo.pFlaVAS->vasLimit);
880 
881     return status;
882 }
883 
884 /*!
885  * @brief Constructor for the Instance Memory block for FLA VASpace. This will
886  *        allocate the memory descriptor for the IMB.
887  *
888  *
889  * @return NV_OK, if successful
890  */
891 NV_STATUS
892 kbusConstructFlaInstBlk_GA100
893 (
894     OBJGPU    *pGpu,
895     KernelBus *pKernelBus,
896     NvU32      gfid
897 )
898 {
899     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
900     NV_STATUS      status = NV_OK;
901     NvU32          aperture;
902     NvU64          size;
903     NvU32          cpuCacheAttrib;
904     NvU32          flags = MEMDESC_FLAGS_NONE;
905 
906     // Inst Blocks are by default in FB
907     aperture = ADDR_FBMEM;
908     cpuCacheAttrib = NV_MEMORY_UNCACHED;
909     size = NV_RAMIN_ALLOC_SIZE;
910 
911     if (gpuIsWarBug200577889SriovHeavyEnabled(pGpu) && IS_GFID_PF(gfid))
912     {
913         return NV_ERR_INVALID_ARGUMENT;
914     }
915 
916     if (IS_GFID_VF(gfid))
917         flags |= MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE;
918 
919     // create the memdesc for instance block
920     status = memdescCreate(&pKernelBus->flaInfo.pInstblkMemDesc, pGpu,
921                            size, 0, NV_TRUE,
922                            aperture, cpuCacheAttrib, flags);
923 
924     NV_ASSERT(status == NV_OK);
925 
926     memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_89,
927                         pKernelBus->flaInfo.pInstblkMemDesc);
928 
929     NV_ASSERT(status == NV_OK);
930 
931     // Initialize the memdesc to zero
932     status = memmgrMemDescMemSet(pMemoryManager,
933                                  pKernelBus->flaInfo.pInstblkMemDesc,
934                                  0,
935                                  TRANSFER_FLAGS_NONE);
936     NV_ASSERT(status == NV_OK);
937 
938     return status;
939 }
940 
941 /*!
942  * @brief Destruct the Instance memory block allocated for FLA VAS
943  *
944  */
945 void
946 kbusDestructFlaInstBlk_GA100
947 (
948     OBJGPU    *pGpu,
949     KernelBus *pKernelBus
950 )
951 {
952     // Free the FLA Inst Blk MemDesc
953    if (pKernelBus->flaInfo.pInstblkMemDesc != NULL)
954    {
955        memdescFree(pKernelBus->flaInfo.pInstblkMemDesc);
956        memdescDestroy(pKernelBus->flaInfo.pInstblkMemDesc);
957        pKernelBus->flaInfo.pInstblkMemDesc = NULL;
958    }
959 }
960 
961 /*!
962  * @brief Function to determine if the mapping can be direct mapped or BAR mapped
963  *
964  * @param[in]   pMemDesc    Memory Descriptor pointer
965  * @param[in]   mapFlags    Flags used for mapping
966  * @param[in]   bDirectSysMappingAllowed boolean to return the result
967  *
968  * returns NV_ERR_INVALID_ARGUMENT, if the reflected mapping is requested
969  *         NV_OK, otherwise
970  */
971 NV_STATUS
972 kbusIsDirectMappingAllowed_GA100
973 (
974     OBJGPU            *pGpu,
975     KernelBus         *pKernelBus,
976     MEMORY_DESCRIPTOR *pMemDesc,
977     NvU32              mapFlags,
978     NvBool            *bDirectSysMappingAllowed
979 )
980 {
981     *bDirectSysMappingAllowed = NV_FALSE;
982 
983     if (DRF_VAL(OS33, _FLAGS, _MAPPING, mapFlags) == NVOS33_FLAGS_MAPPING_REFLECTED)
984     {
985         NV_PRINTF(LEVEL_WARNING, "BAR allocation trying to request reflected mapping, "
986                    "by passing the map flags, failing the request \n");
987     }
988 
989     if (memdescGetFlag(pMemDesc, MEMDESC_FLAGS_ENCRYPTED))
990     {
991         NV_PRINTF(LEVEL_WARNING, "BAR allocation trying to request reflected mapping, "
992                    "by setting ENCRYPTED flag in memdesc, failing the request \n");
993         return NV_ERR_INVALID_ARGUMENT;
994     }
995 
996     *bDirectSysMappingAllowed = NV_TRUE;
997     return NV_OK;
998 }
999 
1000 /*!
1001  * @brief Returns the Nvlink peer ID from pGpu0 to pGpu1
1002  *
1003  * @param[in]   pGpu0          (local GPU)
1004  * @param[in]   pKernelBus0    (local GPU)
1005  * @param[in]   pGpu1          (remote GPU)
1006  * @param[in]   pKernelBus1    (remote GPU)
1007  * @param[out]  nvlinkPeer     NvU32  pointer
1008  *
1009  * return NV_OK on success
1010  */
1011 NV_STATUS
1012 kbusGetNvlinkP2PPeerId_GA100
1013 (
1014     OBJGPU    *pGpu0,
1015     KernelBus *pKernelBus0,
1016     OBJGPU    *pGpu1,
1017     KernelBus *pKernelBus1,
1018     NvU32     *nvlinkPeer,
1019     NvU32      attributes
1020 )
1021 {
1022     KernelNvlink *pKernelNvlink0 = GPU_GET_KERNEL_NVLINK(pGpu0);
1023     NV_STATUS     status         = NV_OK;
1024     NvBool        bEgmPeer = FLD_TEST_DRF(_P2PAPI, _ATTRIBUTES, _REMOTE_EGM, _YES, attributes);
1025 
1026     if (nvlinkPeer == NULL)
1027     {
1028         return NV_ERR_INVALID_ARGUMENT;
1029     }
1030 
1031     *nvlinkPeer = BUS_INVALID_PEER;
1032 
1033     // If the chip does not support NVLink, then return
1034     if (pKernelNvlink0 == NULL)
1035     {
1036         return NV_OK;
1037     }
1038 
1039     //
1040     // If NVLINK topology is forced and the forced configuration has peer links,
1041     // get the peer ID from the table
1042     //
1043     if ((knvlinkIsForcedConfig(pGpu0, pKernelNvlink0) ||
1044         knvlinkAreLinksRegistryOverriden(pGpu0, pKernelNvlink0)) && !bEgmPeer)
1045     {
1046         if (knvlinkGetPeersNvlinkMaskFromHshub(pGpu0, pKernelNvlink0) != 0)
1047         {
1048             *nvlinkPeer = kbusGetPeerIdFromTable_HAL(pGpu0, pKernelBus0,
1049                                                      pGpu0->gpuInstance,
1050                                                      pGpu1->gpuInstance);
1051 
1052             if (*nvlinkPeer == BUS_INVALID_PEER)
1053             {
1054                 return NV_ERR_INVALID_REQUEST;
1055             }
1056         }
1057         return NV_OK;
1058     }
1059 
1060     //
1061     // NVLINK topology is not forced. Get the NVLink P2P peer ID for NVLink
1062     // auto-config.
1063     //
1064 
1065     //
1066     // Return if there are no NVLink connections to the remote GPU
1067     //
1068     // skipping this check for knvlinkIsForcedConfig case since
1069     // peerLinkMasks is not set for knvlinkIsForcedConfig
1070     // this will be skipped only for reserving EGM peerId (bEgmPeer is true)
1071     // when client has not provided EGM peerId mask in forced config
1072     // tests.
1073     //
1074     if (!(knvlinkIsForcedConfig(pGpu0, pKernelNvlink0) ||
1075           knvlinkAreLinksRegistryOverriden(pGpu0, pKernelNvlink0)) &&
1076          (knvlinkGetPeerLinkMask(pGpu0, pKernelNvlink0, gpuGetInstance(pGpu1)) == 0))
1077     {
1078         return NV_OK;
1079     }
1080 
1081     // Return if a peer ID is already allocated for P2P from pGpu0 to pGpu1
1082     if (bEgmPeer)
1083     {
1084         *nvlinkPeer = kbusGetEgmPeerId_HAL(pGpu0, pKernelBus0, pGpu1);
1085     }
1086     else
1087     {
1088         *nvlinkPeer = kbusGetPeerId_HAL(pGpu0, pKernelBus0, pGpu1);
1089     }
1090 
1091     if (*nvlinkPeer != BUS_INVALID_PEER)
1092     {
1093         return NV_OK;
1094     }
1095 
1096     //
1097     // Peer ID 0 is used for the following use-cases:
1098     //     1. If the GPU is connected to itself through NVLink (loopback)
1099     //     2. If the GPU is connected to the other GPU through NVSwitch
1100     //
1101     // On NVSwitch systems, peer ID 0 might not be available only if:
1102     //     1. PCIe P2P is allowed along with NVLink P2P on NVSWitch systems
1103     //     2. Mix of direct NVLink and NVSwitch connections is supported
1104     //   None of the above hold true currently
1105     //
1106     if (((pGpu0 == pGpu1) && !bEgmPeer) || knvlinkIsGpuConnectedToNvswitch(pGpu0, pKernelNvlink0))
1107     {
1108         *nvlinkPeer = 0;
1109 
1110         goto kbusGetNvlinkP2PPeerId_end;
1111     }
1112 
1113     // If no peer ID has been assigned yet, find the first unused peer ID
1114     if (*nvlinkPeer == BUS_INVALID_PEER)
1115     {
1116         *nvlinkPeer = kbusGetUnusedPeerId_HAL(pGpu0, pKernelBus0);
1117 
1118         // If could not find a free peer ID, return error
1119         if (*nvlinkPeer == BUS_INVALID_PEER)
1120         {
1121             NV_PRINTF(LEVEL_WARNING,
1122                       "GPU%d: peerID not available for NVLink P2P\n",
1123                       pGpu0->gpuInstance);
1124             return NV_ERR_GENERIC;
1125         }
1126 
1127         goto kbusGetNvlinkP2PPeerId_end;
1128     }
1129 
1130 kbusGetNvlinkP2PPeerId_end:
1131 
1132     // Reserve the peer ID for NVLink use
1133     status = kbusReserveP2PPeerIds_HAL(pGpu0, pKernelBus0, NVBIT(*nvlinkPeer));
1134 
1135     if ((status == NV_OK) &&
1136         !knvlinkIsGpuConnectedToNvswitch(pGpu0, pKernelNvlink0))
1137     {
1138         pKernelBus0->p2p.bEgmPeer[*nvlinkPeer] = bEgmPeer;
1139     }
1140 
1141     return status;
1142 }
1143 
1144 /**
1145  *@brief Select whether RM needs to use direct mapping or BAR mapping
1146  *       This function is a WAR for bug: 2494500, where FB hangs if SW issues
1147  *       reflected accesses. RM should select direct mapping for any accesses
1148  *       other than FB
1149  *
1150  * @param[in]     pMemDesc           MEMORY_DESCRIPTOR pointer
1151  * @param[in/out] pbAllowDirectMap   NvBool pointer
1152  *
1153  *@returns NV_OK, if supported
1154  *         NV_ERR_NOT_SUPPORTED, otherwise
1155  */
1156 NV_STATUS
1157 kbusUseDirectSysmemMap_GA100
1158 (
1159     OBJGPU            *pGpu,
1160     KernelBus         *pKernelBus,
1161     MEMORY_DESCRIPTOR *pMemDesc,
1162     NvBool            *pbAllowDirectMap
1163 )
1164 {
1165     *pbAllowDirectMap = NV_FALSE;
1166 
1167     if((memdescGetAddressSpace(pMemDesc) != ADDR_FBMEM))
1168     {
1169         *pbAllowDirectMap = NV_TRUE;
1170     }
1171 
1172     return NV_OK;
1173 }
1174 
1175 /*!
1176  * @brief   Validates FLA base address.
1177  *
1178  * @param         flaBaseAddr
1179  *
1180  * @returns On success, NV_OK.
1181  *          On failure, returns NV_ERR_XXX.
1182  */
1183 NV_STATUS
1184 kbusValidateFlaBaseAddress_GA100
1185 (
1186     OBJGPU            *pGpu,
1187     KernelBus         *pKernelBus,
1188     NvU64              flaBaseAddr
1189 )
1190 {
1191     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
1192     NvU64 fbSizeBytes;
1193 
1194     fbSizeBytes = pMemoryManager->Ram.fbTotalMemSizeMb << 20;
1195 
1196     //
1197     // Ampere SKUs will be paired with NVSwitches (Limerock) supporting 2K
1198     // mapslots that can cover 64GB each. Make sure that the fabric base
1199     // address being used is valid to cover whole frame buffer.
1200     //
1201 
1202     // Check if fabric address is aligned to mapslot size.
1203     if (flaBaseAddr & (NVBIT64(NVLNK_FABRIC_ADDR_GRANULARITY) - 1))
1204     {
1205         return NV_ERR_INVALID_ARGUMENT;
1206     }
1207 
1208     // Align fbSize to mapslot size.
1209     fbSizeBytes = RM_ALIGN_UP(fbSizeBytes, NVBIT64(NVLNK_FABRIC_ADDR_GRANULARITY));
1210 
1211     // Make sure the address range doesn't go beyond the limit, (2K * 64GB).
1212     if ((flaBaseAddr + fbSizeBytes) > NVBIT64(NV_BUS_FLA_VASPACE_ADDR_HI))
1213     {
1214         return NV_ERR_INVALID_ARGUMENT;
1215     }
1216 
1217     return NV_OK;
1218 }
1219 
1220 /*!
1221  * @brief Validates FLA Range allocated in the GPU when FM registers itself to RM
1222  *         This is useful when FM gets killed/crashed during the app runtime and can
1223  *         re-spawn at any point later. We don't do any client validation, since FM is
1224  *         a privileged process managed by sysadmin.
1225  *
1226  * @param[in]    flaBaseAddr NvU64 address
1227  * @param[in]    flaSize     NvU64 Size
1228  *
1229  *  @returns NV_TRUE, if flaBaseAddr & flaSize matches the existing FLA VAS allocation
1230  *           else, NV_FALSE
1231  *
1232  */
1233  NvBool
1234  kbusVerifyFlaRange_GA100
1235  (
1236     OBJGPU    *pGpu,
1237     KernelBus *pKernelBus,
1238     NvU64      flaBaseAddr,
1239     NvU64      flaSize
1240  )
1241  {
1242      if ((pKernelBus->flaInfo.base != flaBaseAddr) || (pKernelBus->flaInfo.size != flaSize))
1243            return NV_FALSE;
1244 
1245     NV_PRINTF(LEVEL_INFO, "FLA base: %llx, size: %llx is verified \n", flaBaseAddr, flaSize);
1246     return NV_TRUE;
1247  }
1248 
1249 /*!
1250  * @brief Returns the NvSwitch peer ID
1251  *
1252  *
1253  * @returns NvU32 bus peer number
1254  */
1255 NvU32
1256 kbusGetNvSwitchPeerId_GA100
1257 (
1258     OBJGPU    *pGpu,
1259     KernelBus *pKernelBus
1260 )
1261 {
1262     KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu);
1263     NvU32 peerId = BUS_INVALID_PEER;
1264     NvU32 i;
1265 
1266     if (pKernelNvlink == NULL)
1267         return BUS_INVALID_PEER;
1268 
1269     if (!knvlinkIsGpuConnectedToNvswitch(pGpu, pKernelNvlink))
1270         return BUS_INVALID_PEER;
1271 
1272     for (i = 0; i < NV_MAX_DEVICES; i++)
1273     {
1274         peerId = pKernelBus->p2p.busNvlinkPeerNumberMask[i];
1275 
1276         if (peerId == 0)
1277             continue;
1278 
1279         LOWESTBITIDX_32(peerId);
1280 
1281         break;
1282     }
1283 
1284     return peerId;
1285 }
1286 
1287 /*!
1288  * @brief Helper function to extract information from FLA data structure and
1289  *        to trigger RPC to Physical RM to BIND FLA VASpace
1290  *
1291  * @param[in]  gfid     GFID
1292  *
1293  * @return NV_OK if successful
1294  */
1295 NV_STATUS
1296 kbusSetupBindFla_GA100
1297 (
1298     OBJGPU    *pGpu,
1299     KernelBus *pKernelBus,
1300     NvU32      gfid
1301 )
1302 {
1303     NV_STATUS status = NV_OK;
1304     NV2080_CTRL_FLA_SETUP_INSTANCE_MEM_BLOCK_PARAMS params = {0};
1305 
1306     if (!gpuIsWarBug200577889SriovHeavyEnabled(pGpu))
1307     {
1308         MEMORY_DESCRIPTOR  *pMemDesc;
1309         RmPhysAddr          imbPhysAddr;
1310         NvU32               addrSpace;
1311 
1312         pMemDesc     = pKernelBus->flaInfo.pInstblkMemDesc;
1313         imbPhysAddr  = memdescGetPhysAddr(pMemDesc, AT_GPU, 0);
1314         addrSpace    = memdescGetAddressSpace(pMemDesc);
1315         NV2080_CTRL_FLA_ADDRSPACE paramAddrSpace = NV2080_CTRL_FLA_ADDRSPACE_FBMEM;
1316 
1317         switch(addrSpace)
1318         {
1319             case ADDR_FBMEM:
1320                 paramAddrSpace = NV2080_CTRL_FLA_ADDRSPACE_FBMEM;
1321                 break;
1322             case ADDR_SYSMEM:
1323                 paramAddrSpace = NV2080_CTRL_FLA_ADDRSPACE_SYSMEM;
1324                 break;
1325         }
1326         params.imbPhysAddr = imbPhysAddr;
1327         params.addrSpace   = paramAddrSpace;
1328     }
1329     params.flaAction   = NV2080_CTRL_FLA_ACTION_BIND;
1330 
1331     NV_RM_RPC_CONTROL(pGpu, pKernelBus->flaInfo.hClient,
1332                         pKernelBus->flaInfo.hSubDevice,
1333                         NV2080_CTRL_CMD_FLA_SETUP_INSTANCE_MEM_BLOCK,
1334                         &params, sizeof(params), status);
1335 
1336     if (status != NV_OK)
1337     {
1338         NV_PRINTF(LEVEL_ERROR, "FLA bind failed, status: %x \n", status);
1339         return status;
1340     }
1341 
1342     // Since FLA state is tracked in the Guest, Guest RM needs to set it here
1343     pKernelBus->flaInfo.bFlaBind = NV_TRUE;
1344     pKernelBus->bFlaEnabled      = NV_TRUE;
1345 
1346     return NV_OK;
1347 }
1348 
1349 /*!
1350  * @brief Helper function to trigger RPC to Physical RM to unbind FLA VASpace
1351  *
1352  * @return NV_OK if successful
1353  */
1354 NV_STATUS
1355 kbusSetupUnbindFla_GA100
1356 (
1357     OBJGPU    *pGpu,
1358     KernelBus *pKernelBus
1359 )
1360 {
1361     NV_STATUS status = NV_OK;
1362     NV2080_CTRL_FLA_SETUP_INSTANCE_MEM_BLOCK_PARAMS params = { 0 };
1363 
1364     if (!pKernelBus->flaInfo.bFlaBind)
1365         return NV_OK;
1366 
1367     params.flaAction = NV2080_CTRL_FLA_ACTION_UNBIND;
1368 
1369     NV_RM_RPC_CONTROL(pGpu, pKernelBus->flaInfo.hClient,
1370                       pKernelBus->flaInfo.hSubDevice,
1371                       NV2080_CTRL_CMD_FLA_SETUP_INSTANCE_MEM_BLOCK,
1372                       &params, sizeof(params), status);
1373 
1374     pKernelBus->flaInfo.bFlaBind = NV_FALSE;
1375     pKernelBus->bFlaEnabled      = NV_FALSE;
1376 
1377     return status;
1378 }
1379 
1380 NV_STATUS
1381 kbusGetFlaRange_GA100
1382 (
1383     OBJGPU    *pGpu,
1384     KernelBus *pKernelBus,
1385     NvU64     *ucFlaBase,
1386     NvU64     *ucFlaSize,
1387     NvBool     bIsConntectedToNvswitch
1388 )
1389 {
1390     if (gpuIsSriovEnabled(pGpu) && bIsConntectedToNvswitch)
1391     {
1392         if (pKernelBus->flaInfo.bFlaRangeRegistered)
1393         {
1394             *ucFlaBase = pKernelBus->flaInfo.base;
1395             *ucFlaSize = pKernelBus->flaInfo.size;
1396         }
1397     }
1398     else // direct connected system
1399     {
1400         *ucFlaSize = gpuGetFlaVasSize_HAL(pGpu, NV_FALSE);
1401         *ucFlaBase = pGpu->gpuInstance * (*ucFlaSize);
1402     }
1403 
1404     return NV_OK;
1405 }
1406 
1407 /*!
1408  * @brief Returns the Nvlink specific peer number from pGpu (Local) to pGpuPeer.
1409  *        Used only by VF.
1410  *
1411  * @param[in] pGpu          Local
1412  * @param[in] pKernelBus    Local
1413  * @param[in] pGpuPeer      Remote
1414  *
1415  * @returns NvU32 bus peer number
1416  */
1417 NvU32
1418 kbusGetNvlinkPeerId_GA100
1419 (
1420     OBJGPU    *pGpu,
1421     KernelBus *pKernelBus,
1422     OBJGPU    *pGpuPeer
1423 )
1424 {
1425     NvU32 gpuPeerInst = gpuGetInstance(pGpuPeer);
1426     NvU32 peerId = pKernelBus->p2p.busNvlinkPeerNumberMask[gpuPeerInst];
1427 
1428     if (peerId == 0)
1429     {
1430         NV_PRINTF(LEVEL_INFO,
1431                   "NVLINK P2P not set up between GPU%u and GPU%u, checking for PCIe P2P...\n",
1432                   gpuGetInstance(pGpu), gpuPeerInst);
1433         return BUS_INVALID_PEER;
1434     }
1435 
1436     LOWESTBITIDX_32(peerId);
1437     return peerId;
1438 }
1439 
1440 /*!
1441 * @brief  Cache the value of NV_XVE_RESIZE_BAR1_CTRL_SIZE
1442 *
1443 * @param[in] pGpu       OBJGPU pointer
1444 * @param[in] pKernelBus KernelBus pointer
1445 *
1446 */
1447 void
1448 kbusCacheBAR1ResizeSize_WAR_BUG_3249028_GA100
1449 (
1450    OBJGPU    *pGpu,
1451    KernelBus *pKernelBus
1452 )
1453 {
1454    NvU32 regVal = GPU_REG_RD32(pGpu, DEVICE_BASE(NV_PCFG) + NV_XVE_RESIZE_BAR1_CTRL);
1455    pKernelBus->bar1ResizeSizeIndex = DRF_VAL(_XVE, _RESIZE_BAR1_CTRL, _BAR_SIZE, regVal);
1456 }
1457 
1458 /*!
1459 * @brief  Restore the value of NV_XVE_RESIZE_BAR1_CTRL_SIZE if different
1460 *         from the cached value.
1461 *         Windows has a strict requirement that the PCIE config has to stay
1462 *         the same across power transitions.
1463 *         Early SBIOS implementing resize BAR do not restore properly
1464 *         the value of NV_XVE_RESIZE_BAR1_CTRL_SIZE.
1465 *         The reason of this WAR is to not crash the systems that have
1466 *         not been updated - yet.
1467 *
1468 * @param[in] pGpu       OBJGPU pointer
1469 * @param[in] pKernelBus KernelBus pointer
1470 *
1471 * @returns      NV_STATUS
1472 *
1473 */
1474 NV_STATUS
1475 kbusRestoreBAR1ResizeSize_WAR_BUG_3249028_GA100
1476 (
1477     OBJGPU    *pGpu,
1478     KernelBus *pKernelBus
1479 )
1480 {
1481     NvU32 regVal;
1482     NvU32 bar1ResizeSizeIndex;
1483 
1484     if (!pKernelBus->getProperty(pKernelBus, PDB_PROP_KBUS_RESTORE_BAR1_SIZE_BUG_3249028_WAR))
1485     {
1486         return NV_OK;
1487     }
1488 
1489     NV_ASSERT_OR_RETURN(pKernelBus->bar1ResizeSizeIndex >= NV_XVE_RESIZE_BAR1_CTRL_BAR_SIZE_MIN,
1490         NV_ERR_INVALID_DATA);
1491 
1492     NV_ASSERT_OR_RETURN(pKernelBus->bar1ResizeSizeIndex <= NV_XVE_RESIZE_BAR1_CTRL_BAR_SIZE_MAX,
1493         NV_ERR_INVALID_DATA);
1494 
1495     regVal = GPU_REG_RD32(pGpu, DEVICE_BASE(NV_PCFG) + NV_XVE_RESIZE_BAR1_CTRL);
1496     bar1ResizeSizeIndex = DRF_VAL(_XVE, _RESIZE_BAR1_CTRL, _BAR_SIZE, regVal);
1497 
1498     if (bar1ResizeSizeIndex == pKernelBus->bar1ResizeSizeIndex)
1499     {
1500         // BAR1 size match. Nothing to do
1501         return NV_OK;
1502     }
1503 
1504     // BAR1 size changed. Warn and update
1505     NV_PRINTF(LEVEL_WARNING, "BAR1 size mismatch: current: 0x%x, expected: 0x%x\n",
1506         bar1ResizeSizeIndex, pKernelBus->bar1ResizeSizeIndex);
1507     NV_PRINTF(LEVEL_WARNING, "Most likely SBIOS did not restore the BAR1 size\n");
1508     NV_PRINTF(LEVEL_WARNING, "Please update your SBIOS!\n");
1509 
1510     regVal = FLD_SET_DRF_NUM(_XVE, _RESIZE_BAR1_CTRL, _BAR_SIZE, pKernelBus->bar1ResizeSizeIndex, regVal);
1511     GPU_REG_WR32(pGpu, DEVICE_BASE(NV_PCFG) + NV_XVE_RESIZE_BAR1_CTRL, regVal);
1512 
1513     return NV_OK;
1514 }
1515