11739a20eSAndy Ritger /*
2eb5c7665SAndy Ritger  * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
31739a20eSAndy Ritger  * SPDX-License-Identifier: MIT
41739a20eSAndy Ritger  *
51739a20eSAndy Ritger  * Permission is hereby granted, free of charge, to any person obtaining a
61739a20eSAndy Ritger  * copy of this software and associated documentation files (the "Software"),
71739a20eSAndy Ritger  * to deal in the Software without restriction, including without limitation
81739a20eSAndy Ritger  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
91739a20eSAndy Ritger  * and/or sell copies of the Software, and to permit persons to whom the
101739a20eSAndy Ritger  * Software is furnished to do so, subject to the following conditions:
111739a20eSAndy Ritger  *
121739a20eSAndy Ritger  * The above copyright notice and this permission notice shall be included in
131739a20eSAndy Ritger  * all copies or substantial portions of the Software.
141739a20eSAndy Ritger  *
151739a20eSAndy Ritger  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
161739a20eSAndy Ritger  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
171739a20eSAndy Ritger  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
181739a20eSAndy Ritger  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
191739a20eSAndy Ritger  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
201739a20eSAndy Ritger  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
211739a20eSAndy Ritger  * DEALINGS IN THE SOFTWARE.
221739a20eSAndy Ritger  */
231739a20eSAndy Ritger 
24eb5c7665SAndy Ritger #define NVOC_KERNEL_NVLINK_H_PRIVATE_ACCESS_ALLOWED
25eb5c7665SAndy Ritger 
26eb5c7665SAndy Ritger // FIXME XXX
27eb5c7665SAndy Ritger #define NVOC_KERNEL_IOCTRL_H_PRIVATE_ACCESS_ALLOWED
28eb5c7665SAndy Ritger 
291739a20eSAndy Ritger #include "os/os.h"
301739a20eSAndy Ritger #include "core/hal.h"
311739a20eSAndy Ritger #include "core/info_block.h"
321739a20eSAndy Ritger #include "core/locks.h"
331739a20eSAndy Ritger #include "gpu/gpu.h"
341739a20eSAndy Ritger #include "kernel/gpu/nvlink/kernel_nvlink.h"
351739a20eSAndy Ritger #include "kernel/gpu/nvlink/kernel_ioctrl.h"
361739a20eSAndy Ritger #include "gpu/mem_mgr/mem_mgr.h"
371739a20eSAndy Ritger #include "gpu/mmu/kern_gmmu.h"
381739a20eSAndy Ritger #include "gpu/ce/kernel_ce.h"
391739a20eSAndy Ritger 
401739a20eSAndy Ritger /*!
411739a20eSAndy Ritger  * @brief Is NVLINK topology forced? NVLink topology is considered
421739a20eSAndy Ritger  *        forced for both legacy forced config and chiplib configs
431739a20eSAndy Ritger  *
441739a20eSAndy Ritger  * @param[in] pGpu           OBJGPU
451739a20eSAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
461739a20eSAndy Ritger  *
471739a20eSAndy Ritger  * @return  NV_TRUE if topology is forced
481739a20eSAndy Ritger  */
491739a20eSAndy Ritger NvBool
501739a20eSAndy Ritger knvlinkIsForcedConfig_IMPL
511739a20eSAndy Ritger (
521739a20eSAndy Ritger     OBJGPU       *pGpu,
531739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
541739a20eSAndy Ritger )
551739a20eSAndy Ritger {
561739a20eSAndy Ritger     return (pKernelNvlink->bChiplibConfig);
571739a20eSAndy Ritger }
581739a20eSAndy Ritger 
591739a20eSAndy Ritger /*!
601739a20eSAndy Ritger  * @brief Determine if NVLink is enabled or disabled by default
611739a20eSAndy Ritger  *
621739a20eSAndy Ritger  * @param[in] pGpu           OBJGPU pointer
631739a20eSAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
641739a20eSAndy Ritger  *
651739a20eSAndy Ritger  * @return  NV_TRUE if NVLink is enabled on the GPU/platform
661739a20eSAndy Ritger  */
671739a20eSAndy Ritger NvBool
681739a20eSAndy Ritger knvlinkIsNvlinkDefaultEnabled_IMPL
691739a20eSAndy Ritger (
701739a20eSAndy Ritger     OBJGPU       *pGpu,
711739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
721739a20eSAndy Ritger )
731739a20eSAndy Ritger {
741739a20eSAndy Ritger     //
751739a20eSAndy Ritger     // Currently it is critical that the following lib check be present.
761739a20eSAndy Ritger     // Burying this in the hal below it may get lost as the stub is all
771739a20eSAndy Ritger     // thats required for POR (always true from the hals perspective)
781739a20eSAndy Ritger     //
791739a20eSAndy Ritger #if !defined(INCLUDE_NVLINK_LIB)
801739a20eSAndy Ritger 
811739a20eSAndy Ritger     return NV_FALSE;
821739a20eSAndy Ritger 
831739a20eSAndy Ritger #endif
841739a20eSAndy Ritger 
851739a20eSAndy Ritger     // Let the PDB handle the final decision.
861739a20eSAndy Ritger     return pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_ENABLED);
871739a20eSAndy Ritger }
881739a20eSAndy Ritger 
891739a20eSAndy Ritger /*!
901739a20eSAndy Ritger  * @brief Determine if P2P loopback over NVLink is supported for
911739a20eSAndy Ritger  *        the given GPU. This function returns true if any link
921739a20eSAndy Ritger  *        is connected in loopback mode.
931739a20eSAndy Ritger  *
941739a20eSAndy Ritger  * @param[in] pGpu           OBJGPU pointer
951739a20eSAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
961739a20eSAndy Ritger  *
971739a20eSAndy Ritger  * @return  NV_TRUE if any link is in loopback mode
981739a20eSAndy Ritger  */
991739a20eSAndy Ritger NvBool
1001739a20eSAndy Ritger knvlinkIsP2pLoopbackSupported_IMPL
1011739a20eSAndy Ritger (
1021739a20eSAndy Ritger     OBJGPU       *pGpu,
1031739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
1041739a20eSAndy Ritger )
1051739a20eSAndy Ritger {
1061739a20eSAndy Ritger #if defined(INCLUDE_NVLINK_LIB)
1071739a20eSAndy Ritger 
1081739a20eSAndy Ritger     NvU32 i;
1091739a20eSAndy Ritger 
1101739a20eSAndy Ritger     if ((pGpu == NULL) || (pKernelNvlink == NULL))
1111739a20eSAndy Ritger     {
1121739a20eSAndy Ritger         return NV_FALSE;
1131739a20eSAndy Ritger     }
1141739a20eSAndy Ritger 
1151739a20eSAndy Ritger     // Return false if P2P loopback is disabled through regkey
1161739a20eSAndy Ritger     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_NVLINK_P2P_LOOPBACK_DISABLED))
1171739a20eSAndy Ritger     {
1181739a20eSAndy Ritger         return NV_FALSE;
1191739a20eSAndy Ritger     }
1201739a20eSAndy Ritger 
1211739a20eSAndy Ritger     FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink->enabledLinks)
1221739a20eSAndy Ritger     {
1231739a20eSAndy Ritger         if (knvlinkIsP2pLoopbackSupportedPerLink_IMPL(pGpu, pKernelNvlink, i))
1241739a20eSAndy Ritger             return NV_TRUE;
1251739a20eSAndy Ritger     }
1261739a20eSAndy Ritger     FOR_EACH_INDEX_IN_MASK_END
1271739a20eSAndy Ritger 
1281739a20eSAndy Ritger #endif
1291739a20eSAndy Ritger 
1301739a20eSAndy Ritger     return NV_FALSE;
1311739a20eSAndy Ritger }
1321739a20eSAndy Ritger 
1331739a20eSAndy Ritger /*!
1341739a20eSAndy Ritger  * @brief Determine if P2P loopback over NVLink is supported for
1351739a20eSAndy Ritger  *        the given link. This function returns true if the link
1361739a20eSAndy Ritger  *        is connected in loopback mode.
1371739a20eSAndy Ritger  *
1381739a20eSAndy Ritger  * @param[in] pGpu           OBJGPU pointer
1391739a20eSAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
1401739a20eSAndy Ritger  * @param[in] link           Link ID
1411739a20eSAndy Ritger  *
1421739a20eSAndy Ritger  * @return  NV_TRUE if the link is in loopback mode
1431739a20eSAndy Ritger  */
1441739a20eSAndy Ritger NvBool
1451739a20eSAndy Ritger knvlinkIsP2pLoopbackSupportedPerLink_IMPL
1461739a20eSAndy Ritger (
1471739a20eSAndy Ritger     OBJGPU       *pGpu,
1481739a20eSAndy Ritger     KernelNvlink *pKernelNvlink,
1491739a20eSAndy Ritger     NvU32         link
1501739a20eSAndy Ritger )
1511739a20eSAndy Ritger {
1521739a20eSAndy Ritger #if defined(INCLUDE_NVLINK_LIB)
1531739a20eSAndy Ritger 
1541739a20eSAndy Ritger    if ((pGpu == NULL) || (pKernelNvlink == NULL))
1551739a20eSAndy Ritger     {
1561739a20eSAndy Ritger         return NV_FALSE;
1571739a20eSAndy Ritger     }
1581739a20eSAndy Ritger 
1591739a20eSAndy Ritger     // Return false if P2P loopback is disabled through regkey
1601739a20eSAndy Ritger     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_NVLINK_P2P_LOOPBACK_DISABLED))
1611739a20eSAndy Ritger     {
1621739a20eSAndy Ritger         return NV_FALSE;
1631739a20eSAndy Ritger     }
1641739a20eSAndy Ritger 
1651739a20eSAndy Ritger     // Return false if the given link is disabled
1661739a20eSAndy Ritger     if (!(NVBIT(link) & pKernelNvlink->enabledLinks))
1671739a20eSAndy Ritger     {
1681739a20eSAndy Ritger         return NV_FALSE;
1691739a20eSAndy Ritger     }
1701739a20eSAndy Ritger 
1711739a20eSAndy Ritger     // Check the link connected to the same GPU (loopback)
1721739a20eSAndy Ritger     if (pKernelNvlink->nvlinkLinks[link].remoteEndInfo.bConnected)
1731739a20eSAndy Ritger     {
174*b5bf85a8SAndy Ritger         if (((pKernelNvlink->nvlinkLinks[link].remoteEndInfo.domain   == gpuGetDomain(pGpu)) &&
1751739a20eSAndy Ritger             (pKernelNvlink->nvlinkLinks[link].remoteEndInfo.bus      == gpuGetBus(pGpu))    &&
1761739a20eSAndy Ritger             (pKernelNvlink->nvlinkLinks[link].remoteEndInfo.device   == gpuGetDevice(pGpu)) &&
177*b5bf85a8SAndy Ritger             (pKernelNvlink->nvlinkLinks[link].remoteEndInfo.function == 0)) ||
178*b5bf85a8SAndy Ritger                 pKernelNvlink->PDB_PROP_KNVLINK_FORCED_LOOPBACK_ON_SWITCH_MODE_ENABLED)
1791739a20eSAndy Ritger         {
1801739a20eSAndy Ritger             return NV_TRUE;
1811739a20eSAndy Ritger         }
1821739a20eSAndy Ritger     }
1831739a20eSAndy Ritger 
1841739a20eSAndy Ritger #endif
1851739a20eSAndy Ritger 
1861739a20eSAndy Ritger     return NV_FALSE;
1871739a20eSAndy Ritger }
1881739a20eSAndy Ritger 
1891739a20eSAndy Ritger /*!
1901739a20eSAndy Ritger  * @brief Determine if P2P over NVLINK is supported between 2 GPUs
1911739a20eSAndy Ritger  *
1921739a20eSAndy Ritger  * @param[in] pGpu           OBJGPU pointer for local GPU
1931739a20eSAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
1941739a20eSAndy Ritger  * @param[in] pPeerGpu       OBJGPU pointer for remote GPU
1951739a20eSAndy Ritger  *
1961739a20eSAndy Ritger  * @return  NV_TRUE if P2P is supported between the 2 GPUs
1971739a20eSAndy Ritger  */
1981739a20eSAndy Ritger NvBool
1991739a20eSAndy Ritger knvlinkIsNvlinkP2pSupported_IMPL
2001739a20eSAndy Ritger (
2011739a20eSAndy Ritger     OBJGPU       *pGpu,
2021739a20eSAndy Ritger     KernelNvlink *pKernelNvlink,
2031739a20eSAndy Ritger     OBJGPU       *pPeerGpu
2041739a20eSAndy Ritger )
2051739a20eSAndy Ritger {
2061739a20eSAndy Ritger     NV_STATUS status = NV_OK;
2071739a20eSAndy Ritger 
2081739a20eSAndy Ritger     if (pKernelNvlink == NULL)
2091739a20eSAndy Ritger     {
2101739a20eSAndy Ritger         return NV_FALSE;
2111739a20eSAndy Ritger     }
2121739a20eSAndy Ritger 
2134397463eSAndy Ritger     if (knvlinkIsBandwidthModeOff(pKernelNvlink))
2144397463eSAndy Ritger     {
2154397463eSAndy Ritger         return NV_FALSE;
2164397463eSAndy Ritger     }
2174397463eSAndy Ritger 
2181739a20eSAndy Ritger     // Get the Nvlink P2P connections from the core library
2191739a20eSAndy Ritger     status = knvlinkGetP2pConnectionStatus(pGpu, pKernelNvlink, pPeerGpu);
2201739a20eSAndy Ritger 
2211739a20eSAndy Ritger     if (status == NV_OK)
2221739a20eSAndy Ritger     {
2231739a20eSAndy Ritger         return NV_TRUE;
2241739a20eSAndy Ritger     }
2251739a20eSAndy Ritger 
2261739a20eSAndy Ritger     return NV_FALSE;
2271739a20eSAndy Ritger }
2281739a20eSAndy Ritger 
229*b5bf85a8SAndy Ritger static NvBool
230*b5bf85a8SAndy Ritger _knvlinkCheckFabricCliqueId
231*b5bf85a8SAndy Ritger (
232*b5bf85a8SAndy Ritger     OBJGPU       *pGpu,
233*b5bf85a8SAndy Ritger     OBJGPU       *pPeerGpu
234*b5bf85a8SAndy Ritger )
235*b5bf85a8SAndy Ritger {
236*b5bf85a8SAndy Ritger     NvU32 cliqueId, peerCliqueId;
237*b5bf85a8SAndy Ritger     NV_STATUS status;
238*b5bf85a8SAndy Ritger 
239*b5bf85a8SAndy Ritger     status = gpuFabricProbeGetFabricCliqueId(pGpu->pGpuFabricProbeInfoKernel,
240*b5bf85a8SAndy Ritger                                              &cliqueId);
241*b5bf85a8SAndy Ritger     if (status != NV_OK)
242*b5bf85a8SAndy Ritger     {
243*b5bf85a8SAndy Ritger         NV_PRINTF(LEVEL_ERROR, "GPU %d failed to get fabric clique Id: 0x%x\n",
244*b5bf85a8SAndy Ritger                                 gpuGetInstance(pGpu), status);
245*b5bf85a8SAndy Ritger         return NV_FALSE;
246*b5bf85a8SAndy Ritger     }
247*b5bf85a8SAndy Ritger 
248*b5bf85a8SAndy Ritger     status = gpuFabricProbeGetFabricCliqueId(pPeerGpu->pGpuFabricProbeInfoKernel,
249*b5bf85a8SAndy Ritger                                              &peerCliqueId);
250*b5bf85a8SAndy Ritger     if (status != NV_OK)
251*b5bf85a8SAndy Ritger     {
252*b5bf85a8SAndy Ritger         NV_PRINTF(LEVEL_ERROR, "GPU %d failed to get fabric clique Id 0x%x\n",
253*b5bf85a8SAndy Ritger                                 gpuGetInstance(pPeerGpu), status);
254*b5bf85a8SAndy Ritger         return NV_FALSE;
255*b5bf85a8SAndy Ritger     }
256*b5bf85a8SAndy Ritger 
257*b5bf85a8SAndy Ritger     if (cliqueId != peerCliqueId)
258*b5bf85a8SAndy Ritger     {
259*b5bf85a8SAndy Ritger         NV_PRINTF(LEVEL_ERROR, "GPU %d and Peer GPU %d cliqueId doesn't match\n",
260*b5bf85a8SAndy Ritger                   gpuGetInstance(pGpu), gpuGetInstance(pPeerGpu));
261*b5bf85a8SAndy Ritger         return NV_FALSE;
262*b5bf85a8SAndy Ritger     }
263*b5bf85a8SAndy Ritger 
264*b5bf85a8SAndy Ritger     return NV_TRUE;
265*b5bf85a8SAndy Ritger }
266*b5bf85a8SAndy Ritger 
2671739a20eSAndy Ritger /*!
2681739a20eSAndy Ritger  * @brief Checks whether necessary the config setup is done to
2691739a20eSAndy Ritger  *        support P2P over NVSwitch
2701739a20eSAndy Ritger  *
2711739a20eSAndy Ritger  * @param[in] pGpu           OBJGPU pointer for local GPU
2721739a20eSAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
2731739a20eSAndy Ritger  * @param[in] pPeerGpu       OBJGPU pointer for remote GPU
2741739a20eSAndy Ritger  *
2751739a20eSAndy Ritger  * @return  NV_TRUE if P2P over NVSwitch
2761739a20eSAndy Ritger  */
2771739a20eSAndy Ritger NvBool
2781739a20eSAndy Ritger knvlinkCheckNvswitchP2pConfig_IMPL
2791739a20eSAndy Ritger (
2801739a20eSAndy Ritger     OBJGPU       *pGpu,
2811739a20eSAndy Ritger     KernelNvlink *pKernelNvlink,
2821739a20eSAndy Ritger     OBJGPU       *pPeerGpu
2831739a20eSAndy Ritger )
2841739a20eSAndy Ritger {
2851739a20eSAndy Ritger     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2861739a20eSAndy Ritger     NvU64          rangeStart     = knvlinkGetUniqueFabricBaseAddress(pGpu, pKernelNvlink);
2871739a20eSAndy Ritger     NvU64          rangeEnd       = rangeStart + (pMemoryManager->Ram.fbTotalMemSizeMb << 20);
2881739a20eSAndy Ritger     NvU64          peerRangeStart = knvlinkGetUniqueFabricBaseAddress(pPeerGpu,
2891739a20eSAndy Ritger                                                              GPU_GET_KERNEL_NVLINK(pPeerGpu));
2901739a20eSAndy Ritger 
2911739a20eSAndy Ritger     if (knvlinkIsGpuConnectedToNvswitch(pGpu, pKernelNvlink))
2921739a20eSAndy Ritger     {
2931739a20eSAndy Ritger         if (gpuIsSriovEnabled(pGpu))
2941739a20eSAndy Ritger         {
2951739a20eSAndy Ritger             // currently vgpu + switch doesn't support GPA addresing.
2961739a20eSAndy Ritger             return NV_TRUE;
2971739a20eSAndy Ritger         }
2981739a20eSAndy Ritger 
299*b5bf85a8SAndy Ritger         if (gpuFabricProbeIsSupported(pGpu) && gpuFabricProbeIsSupported(pPeerGpu))
300*b5bf85a8SAndy Ritger         {
301*b5bf85a8SAndy Ritger             if (!_knvlinkCheckFabricCliqueId(pGpu, pPeerGpu))
302*b5bf85a8SAndy Ritger             {
303*b5bf85a8SAndy Ritger                 return NV_FALSE;
304*b5bf85a8SAndy Ritger             }
305*b5bf85a8SAndy Ritger         }
306*b5bf85a8SAndy Ritger 
3071739a20eSAndy Ritger         if (knvlinkGetUniqueFabricBaseAddress(pGpu, pKernelNvlink) ==
3081739a20eSAndy Ritger             NVLINK_INVALID_FABRIC_ADDR)
3091739a20eSAndy Ritger         {
3101739a20eSAndy Ritger             NV_PRINTF(LEVEL_ERROR, "GPU %d doesn't have a fabric address\n",
3111739a20eSAndy Ritger                       gpuGetInstance(pGpu));
3121739a20eSAndy Ritger 
3131739a20eSAndy Ritger             return NV_FALSE;
3141739a20eSAndy Ritger         }
3151739a20eSAndy Ritger 
3161739a20eSAndy Ritger         if ((pGpu != pPeerGpu) &&
3171739a20eSAndy Ritger             ((peerRangeStart >= rangeStart) && (peerRangeStart < rangeEnd)))
3181739a20eSAndy Ritger         {
3191739a20eSAndy Ritger             NV_PRINTF(LEVEL_ERROR,
3201739a20eSAndy Ritger                       "GPU %d doesn't have a unique fabric address\n",
3211739a20eSAndy Ritger                       gpuGetInstance(pGpu));
3221739a20eSAndy Ritger 
3231739a20eSAndy Ritger             return NV_FALSE;
3241739a20eSAndy Ritger         }
3251739a20eSAndy Ritger     }
3261739a20eSAndy Ritger     else
3271739a20eSAndy Ritger     {
3281739a20eSAndy Ritger         if (knvlinkGetUniqueFabricBaseAddress(pGpu, pKernelNvlink) !=
3291739a20eSAndy Ritger             NVLINK_INVALID_FABRIC_ADDR)
3301739a20eSAndy Ritger         {
3311739a20eSAndy Ritger             NV_PRINTF(LEVEL_ERROR,
3321739a20eSAndy Ritger                       "non-NVSwitch GPU %d has a valid fabric address\n",
3331739a20eSAndy Ritger                       gpuGetInstance(pGpu));
3341739a20eSAndy Ritger 
3351739a20eSAndy Ritger             return NV_FALSE;
3361739a20eSAndy Ritger         }
3371739a20eSAndy Ritger     }
3381739a20eSAndy Ritger 
3391739a20eSAndy Ritger     return NV_TRUE;
3401739a20eSAndy Ritger }
3411739a20eSAndy Ritger 
3421739a20eSAndy Ritger /*!
3431739a20eSAndy Ritger  * @brief Get Nvlink P2P connections between 2 GPUs
3441739a20eSAndy Ritger  *
3451739a20eSAndy Ritger  * @param[in] pGpu           OBJGPU pointer for local GPU
3461739a20eSAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
3471739a20eSAndy Ritger  * @param[in] pPeerGpu       OBJGPU pointer for remote GPU
3481739a20eSAndy Ritger  *
3491739a20eSAndy Ritger  * @return  NV_OK if P2P connections are present
3501739a20eSAndy Ritger  */
3511739a20eSAndy Ritger NV_STATUS
3521739a20eSAndy Ritger knvlinkGetP2pConnectionStatus_IMPL
3531739a20eSAndy Ritger (
3541739a20eSAndy Ritger     OBJGPU       *pGpu,
3551739a20eSAndy Ritger     KernelNvlink *pKernelNvlink,
3561739a20eSAndy Ritger     OBJGPU       *pPeerGpu
3571739a20eSAndy Ritger )
3581739a20eSAndy Ritger {
3591739a20eSAndy Ritger     NV_STATUS     status         = NV_OK;
3601739a20eSAndy Ritger     OBJGPU       *pGpu0          = pGpu;
3611739a20eSAndy Ritger     OBJGPU       *pGpu1          = pPeerGpu;
3621739a20eSAndy Ritger     KernelNvlink *pKernelNvlink0 = pKernelNvlink;
3631739a20eSAndy Ritger     KernelNvlink *pKernelNvlink1 = NULL;
3641739a20eSAndy Ritger     NvU32         numPeerLinks   = 0;
3651739a20eSAndy Ritger 
3661739a20eSAndy Ritger     if (pGpu1 == NULL)
3671739a20eSAndy Ritger     {
368*b5bf85a8SAndy Ritger         NV_PRINTF(LEVEL_INFO, "Invalid pPeerGpu.\n");
3691739a20eSAndy Ritger 
3701739a20eSAndy Ritger         return NV_ERR_INVALID_ARGUMENT;
3711739a20eSAndy Ritger     }
3721739a20eSAndy Ritger     else if ((pGpu0 == pGpu1) &&
3731739a20eSAndy Ritger              (pGpu0->getProperty(pGpu0, PDB_PROP_GPU_NVLINK_P2P_LOOPBACK_DISABLED)))
3741739a20eSAndy Ritger     {
3751739a20eSAndy Ritger         // P2P over loopback links are disabled through regkey overrides
3761739a20eSAndy Ritger         NV_PRINTF(LEVEL_INFO, "loopback P2P on GPU%u disabled by regkey\n",
3771739a20eSAndy Ritger                   gpuGetInstance(pGpu0));
3781739a20eSAndy Ritger 
3791739a20eSAndy Ritger         return NV_ERR_NOT_SUPPORTED;
3801739a20eSAndy Ritger     }
3811739a20eSAndy Ritger     else
3821739a20eSAndy Ritger     {
3831739a20eSAndy Ritger         pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1);
3841739a20eSAndy Ritger     }
3851739a20eSAndy Ritger 
3861739a20eSAndy Ritger     if (pKernelNvlink1 == NULL)
3871739a20eSAndy Ritger     {
388*b5bf85a8SAndy Ritger         NV_PRINTF(LEVEL_INFO,
3891739a20eSAndy Ritger                   "Input mask contains a GPU on which NVLink is disabled.\n");
3901739a20eSAndy Ritger 
3911739a20eSAndy Ritger         return NV_ERR_INVALID_ARGUMENT;
3921739a20eSAndy Ritger     }
3931739a20eSAndy Ritger 
394758b4ee8SAndy Ritger     if(pKernelNvlink0->bIsGpuDegraded)
395758b4ee8SAndy Ritger     {
396758b4ee8SAndy Ritger         NV_PRINTF(LEVEL_INFO,
397758b4ee8SAndy Ritger                   "NVLink P2P is NOT supported between GPU%d and GPU%d\n",
398758b4ee8SAndy Ritger                   gpuGetInstance(pGpu0), gpuGetInstance(pGpu1));
399758b4ee8SAndy Ritger 
400758b4ee8SAndy Ritger         return NV_ERR_NOT_SUPPORTED;
401758b4ee8SAndy Ritger     }
402758b4ee8SAndy Ritger 
403758b4ee8SAndy Ritger     if(pKernelNvlink1->bIsGpuDegraded)
404758b4ee8SAndy Ritger     {
405758b4ee8SAndy Ritger         NV_PRINTF(LEVEL_INFO,
406758b4ee8SAndy Ritger                   "NVLink P2P is NOT supported between GPU%d and GPU%d\n",
407758b4ee8SAndy Ritger                   gpuGetInstance(pGpu0), gpuGetInstance(pGpu1));
408758b4ee8SAndy Ritger 
409758b4ee8SAndy Ritger         return NV_ERR_NOT_SUPPORTED;
410758b4ee8SAndy Ritger     }
411758b4ee8SAndy Ritger 
4121739a20eSAndy Ritger     if ((IS_RTLSIM(pGpu0) && !pKernelNvlink0->bForceEnableCoreLibRtlsims) ||
4131739a20eSAndy Ritger         knvlinkIsForcedConfig(pGpu0, pKernelNvlink0))
4141739a20eSAndy Ritger     {
4151739a20eSAndy Ritger         // For non-legacy configs.
4161739a20eSAndy Ritger         if (pKernelNvlink0->bChiplibConfig)
4171739a20eSAndy Ritger         {
4181739a20eSAndy Ritger             NV_PRINTF(LEVEL_INFO,
4191739a20eSAndy Ritger                       "NVLink P2P is supported between GPU%d and GPU%d\n",
4201739a20eSAndy Ritger                       gpuGetInstance(pGpu0), gpuGetInstance(pGpu1));
4211739a20eSAndy Ritger 
4221739a20eSAndy Ritger             return NV_OK;
4231739a20eSAndy Ritger         }
4241739a20eSAndy Ritger     }
4251739a20eSAndy Ritger 
4261739a20eSAndy Ritger     // Get the remote ends of the links of local GPU from the nvlink core
4274397463eSAndy Ritger     status = knvlinkCoreGetRemoteDeviceInfo(pGpu0, pKernelNvlink0);
4284397463eSAndy Ritger     if (status != NV_OK)
4294397463eSAndy Ritger     {
4304397463eSAndy Ritger         return status;
4314397463eSAndy Ritger     }
4321739a20eSAndy Ritger 
4331739a20eSAndy Ritger     // Post topology link enable on links of local GPU
4341739a20eSAndy Ritger     status = knvlinkEnableLinksPostTopology_HAL(pGpu0, pKernelNvlink0,
4351739a20eSAndy Ritger                                                 pKernelNvlink0->enabledLinks);
4361739a20eSAndy Ritger     if (status != NV_OK)
4371739a20eSAndy Ritger     {
4381739a20eSAndy Ritger         return status;
4391739a20eSAndy Ritger     }
4401739a20eSAndy Ritger 
4411739a20eSAndy Ritger     numPeerLinks = knvlinkGetNumLinksToPeer(pGpu0, pKernelNvlink0, pGpu1);
442758b4ee8SAndy Ritger 
443758b4ee8SAndy Ritger     //
444758b4ee8SAndy Ritger     // Maybe knvlinkCoreGetRemoteDeviceInfo was never called on pGpu1.
445758b4ee8SAndy Ritger     // This can happen on systems where FM doesn't configure GPUs
446758b4ee8SAndy Ritger     // using RM control calls explicitly.
447758b4ee8SAndy Ritger     //
448758b4ee8SAndy Ritger     if ((numPeerLinks == 0) && gpuFabricProbeIsSupported(pGpu1))
449758b4ee8SAndy Ritger     {
450758b4ee8SAndy Ritger         knvlinkCoreGetRemoteDeviceInfo(pGpu1, pKernelNvlink1);
451758b4ee8SAndy Ritger 
452758b4ee8SAndy Ritger         // Post topology link enable on links of remote GPU
453758b4ee8SAndy Ritger         status = knvlinkEnableLinksPostTopology_HAL(pGpu1, pKernelNvlink1,
454758b4ee8SAndy Ritger                                                     pKernelNvlink1->enabledLinks);
455758b4ee8SAndy Ritger         if (status != NV_OK)
456758b4ee8SAndy Ritger         {
457758b4ee8SAndy Ritger             return status;
458758b4ee8SAndy Ritger         }
459758b4ee8SAndy Ritger 
460758b4ee8SAndy Ritger         numPeerLinks = knvlinkGetNumLinksToPeer(pGpu0, pKernelNvlink0, pGpu1);
461758b4ee8SAndy Ritger     }
462758b4ee8SAndy Ritger 
4631739a20eSAndy Ritger     if (numPeerLinks > 0)
4641739a20eSAndy Ritger     {
4651739a20eSAndy Ritger         if (knvlinkGetNumLinksToPeer(pGpu1, pKernelNvlink1, pGpu0) != numPeerLinks)
4661739a20eSAndy Ritger         {
4671739a20eSAndy Ritger             // Get the remote ends of the links of remote GPU from the nvlink core
4684397463eSAndy Ritger             status = knvlinkCoreGetRemoteDeviceInfo(pGpu1, pKernelNvlink1);
4694397463eSAndy Ritger             if (status != NV_OK)
4704397463eSAndy Ritger             {
4714397463eSAndy Ritger                 return status;
4724397463eSAndy Ritger             }
4731739a20eSAndy Ritger 
4741739a20eSAndy Ritger             // Post topology link enable on links of remote GPU
4751739a20eSAndy Ritger             status = knvlinkEnableLinksPostTopology_HAL(pGpu1, pKernelNvlink1,
4761739a20eSAndy Ritger                                                         pKernelNvlink1->enabledLinks);
4771739a20eSAndy Ritger             if (status != NV_OK)
4781739a20eSAndy Ritger             {
4791739a20eSAndy Ritger                 return status;
4801739a20eSAndy Ritger             }
4811739a20eSAndy Ritger         }
4821739a20eSAndy Ritger 
4831739a20eSAndy Ritger         // Peers should have the same number of links pointing back at us
484*b5bf85a8SAndy Ritger         NV_CHECK_OR_RETURN(LEVEL_INFO,
485*b5bf85a8SAndy Ritger             (knvlinkGetNumLinksToPeer(pGpu1, pKernelNvlink1, pGpu0) == numPeerLinks),
4861739a20eSAndy Ritger             NV_ERR_INVALID_STATE);
4871739a20eSAndy Ritger 
488*b5bf85a8SAndy Ritger         NV_CHECK_OR_RETURN(LEVEL_INFO,
489*b5bf85a8SAndy Ritger                 knvlinkCheckNvswitchP2pConfig(pGpu0, pKernelNvlink0, pGpu1),
490*b5bf85a8SAndy Ritger                 NV_ERR_INVALID_STATE);
491*b5bf85a8SAndy Ritger 
492*b5bf85a8SAndy Ritger         NV_CHECK_OR_RETURN(LEVEL_INFO,
493*b5bf85a8SAndy Ritger                 knvlinkCheckNvswitchP2pConfig(pGpu1, pKernelNvlink1, pGpu0),
4941739a20eSAndy Ritger                 NV_ERR_INVALID_STATE);
4951739a20eSAndy Ritger 
4961739a20eSAndy Ritger         NV_PRINTF(LEVEL_INFO,
4971739a20eSAndy Ritger                   "NVLink P2P is supported between GPU%d and GPU%d\n",
4981739a20eSAndy Ritger                   gpuGetInstance(pGpu0), gpuGetInstance(pGpu1));
4991739a20eSAndy Ritger 
5001739a20eSAndy Ritger         return NV_OK;
5011739a20eSAndy Ritger     }
5021739a20eSAndy Ritger 
5031739a20eSAndy Ritger     NV_PRINTF(LEVEL_INFO,
5041739a20eSAndy Ritger               "NVLink P2P is NOT supported between between GPU%d and GPU%d\n",
5051739a20eSAndy Ritger               pGpu->gpuInstance, pGpu1->gpuInstance);
5061739a20eSAndy Ritger 
5071739a20eSAndy Ritger     return NV_ERR_NOT_SUPPORTED;
5081739a20eSAndy Ritger }
5091739a20eSAndy Ritger 
5101739a20eSAndy Ritger /*!
5111739a20eSAndy Ritger  * @brief Update the settings for the current established NVLink
5121739a20eSAndy Ritger  *        topology. This is the top level function that should be
5131739a20eSAndy Ritger  *        called, instead of applying the settings individually,
5141739a20eSAndy Ritger  *        since it grabs the required locks
5151739a20eSAndy Ritger  *
5161739a20eSAndy Ritger  * @param[in] pGpu           OBJGPU pointer
5171739a20eSAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
5181739a20eSAndy Ritger  *
5191739a20eSAndy Ritger  * @return  NV_OK on success
5201739a20eSAndy Ritger  */
5211739a20eSAndy Ritger NV_STATUS
5221739a20eSAndy Ritger knvlinkUpdateCurrentConfig_IMPL
5231739a20eSAndy Ritger (
5241739a20eSAndy Ritger     OBJGPU       *pGpu,
5251739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
5261739a20eSAndy Ritger )
5271739a20eSAndy Ritger {
5281739a20eSAndy Ritger     OBJSYS    *pSys      = SYS_GET_INSTANCE();
5291739a20eSAndy Ritger     KernelCE  *pKCe      = NULL;
5301739a20eSAndy Ritger     NvBool     bOwnsLock = NV_FALSE;
5311739a20eSAndy Ritger     NV_STATUS  status    = NV_OK;
5321739a20eSAndy Ritger 
5331739a20eSAndy Ritger     if (osAcquireRmSema(pSys->pSema) == NV_OK)
5341739a20eSAndy Ritger     {
5351739a20eSAndy Ritger         //
5361739a20eSAndy Ritger         // XXX Bug 1795328: Fix P2P path to acquire locks for the GPU
5371739a20eSAndy Ritger         //  Due to platform differences in the P2P path, the GPU lock is not
5381739a20eSAndy Ritger         //  consistently held at this point in the call stack. This function
5391739a20eSAndy Ritger         //  requires exclusive access to RM/PMU data structures to update HSHUB,
5401739a20eSAndy Ritger         //  and therefore requires the GPU lock to be held at this point.
5411739a20eSAndy Ritger         //  This check should be removed once the P2P paths have been updated to
5421739a20eSAndy Ritger         //  acquire the GPU locks consistently for all platforms.
5431739a20eSAndy Ritger         //
5441739a20eSAndy Ritger         if (!rmDeviceGpuLockIsOwner(pGpu->gpuInstance))
5451739a20eSAndy Ritger         {
5461739a20eSAndy Ritger             status = rmDeviceGpuLocksAcquire(pGpu, GPUS_LOCK_FLAGS_NONE,
5471739a20eSAndy Ritger                                              RM_LOCK_MODULES_NVLINK);
5481739a20eSAndy Ritger             if (status != NV_OK)
5491739a20eSAndy Ritger             {
5501739a20eSAndy Ritger                 NV_ASSERT(0);
5511739a20eSAndy Ritger                 goto fail;
5521739a20eSAndy Ritger             }
5531739a20eSAndy Ritger 
5541739a20eSAndy Ritger             bOwnsLock = NV_TRUE;
5551739a20eSAndy Ritger         }
5561739a20eSAndy Ritger 
5571739a20eSAndy Ritger         //
5581739a20eSAndy Ritger         // Links that have remote end detected should have passed RXDET
5591739a20eSAndy Ritger         // Update the mask of connected links and bridged links
5601739a20eSAndy Ritger         //
5611739a20eSAndy Ritger         knvlinkFilterBridgeLinks_HAL(pGpu, pKernelNvlink);
5621739a20eSAndy Ritger 
5631739a20eSAndy Ritger         NV2080_CTRL_NVLINK_UPDATE_CURRENT_CONFIG_PARAMS params;
5641739a20eSAndy Ritger         portMemSet(&params, 0, sizeof(params));
5651739a20eSAndy Ritger 
5661739a20eSAndy Ritger         // Reset timeout to clear any accumulated timeouts from link init
5671739a20eSAndy Ritger         if (IS_GSP_CLIENT(pGpu))
5681739a20eSAndy Ritger         {
5691739a20eSAndy Ritger             threadStateResetTimeout(pGpu);
5701739a20eSAndy Ritger         }
5711739a20eSAndy Ritger 
5721739a20eSAndy Ritger         //
5731739a20eSAndy Ritger         // RPC into GSP-RM for programming the HSHUB, CONNECTION_CFG and LTCS
5741739a20eSAndy Ritger         // registers.
5751739a20eSAndy Ritger         //
5761739a20eSAndy Ritger         status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
5771739a20eSAndy Ritger                                      NV2080_CTRL_CMD_NVLINK_UPDATE_CURRENT_CONFIG,
5781739a20eSAndy Ritger                                      (void *)&params, sizeof(params));
5791739a20eSAndy Ritger         if (status != NV_OK)
5801739a20eSAndy Ritger         {
5811739a20eSAndy Ritger             NV_PRINTF(LEVEL_ERROR, "Updating current NVLink config failed\n");
5821739a20eSAndy Ritger             goto fail;
5831739a20eSAndy Ritger         }
5841739a20eSAndy Ritger 
5851739a20eSAndy Ritger         // Sync the GPU property for NVLINK over SYSMEM with GSP-RM
5861739a20eSAndy Ritger         pGpu->setProperty(pGpu, PDB_PROP_GPU_NVLINK_SYSMEM, params.bNvlinkSysmemEnabled);
5871739a20eSAndy Ritger 
5881739a20eSAndy Ritger         // Update the PCE-LCE mappings
589758b4ee8SAndy Ritger         status = kceFindFirstInstance(pGpu, &pKCe);
590758b4ee8SAndy Ritger         if (status == NV_OK)
5911739a20eSAndy Ritger         {
5921739a20eSAndy Ritger             status = kceTopLevelPceLceMappingsUpdate(pGpu, pKCe);
5931739a20eSAndy Ritger             if (status != NV_OK)
5941739a20eSAndy Ritger             {
5951739a20eSAndy Ritger                 NV_PRINTF(LEVEL_ERROR, "Failed to update PCE-LCE mappings\n");
5961739a20eSAndy Ritger             }
5971739a20eSAndy Ritger         }
5981739a20eSAndy Ritger 
5991739a20eSAndy Ritger fail:
6001739a20eSAndy Ritger         if (bOwnsLock)
6011739a20eSAndy Ritger         {
6021739a20eSAndy Ritger             rmDeviceGpuLocksRelease(pGpu, GPUS_LOCK_FLAGS_NONE, NULL);
6031739a20eSAndy Ritger         }
6041739a20eSAndy Ritger 
6051739a20eSAndy Ritger         osReleaseRmSema(pSys->pSema, NULL);
6061739a20eSAndy Ritger     }
6071739a20eSAndy Ritger 
6081739a20eSAndy Ritger     return status;
6091739a20eSAndy Ritger }
6101739a20eSAndy Ritger 
6111739a20eSAndy Ritger /*!
612758b4ee8SAndy Ritger  * @brief Clients to register their callback functions for inband data
613758b4ee8SAndy Ritger  *
614758b4ee8SAndy Ritger  * @param[in] pGpu           OBJGPU pointer
615758b4ee8SAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
616758b4ee8SAndy Ritger  * @param[in] params         callback functions
617758b4ee8SAndy Ritger  */
618758b4ee8SAndy Ritger NV_STATUS
619758b4ee8SAndy Ritger knvlinkRegisterInbandCallback_IMPL
620758b4ee8SAndy Ritger (
621758b4ee8SAndy Ritger     OBJGPU *pGpu,
622758b4ee8SAndy Ritger     KernelNvlink *pKernelNvlink,
623758b4ee8SAndy Ritger     NVLINK_INBAND_MSG_CALLBACK *params
624758b4ee8SAndy Ritger )
625758b4ee8SAndy Ritger {
626758b4ee8SAndy Ritger     if (params->messageType >= NVLINK_INBAND_MSG_TYPE_MAX)
627758b4ee8SAndy Ritger     {
628758b4ee8SAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Wrong msgType. Not registering\n");
629758b4ee8SAndy Ritger         return NV_ERR_INVALID_PARAMETER;
630758b4ee8SAndy Ritger     }
631758b4ee8SAndy Ritger 
632758b4ee8SAndy Ritger     if (pKernelNvlink->inbandCallback[params->messageType].pCallback != NULL)
633758b4ee8SAndy Ritger     {
634758b4ee8SAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Callback has been already registered"
635758b4ee8SAndy Ritger                                          "for msgType %d\n", params->messageType);
636758b4ee8SAndy Ritger         return NV_ERR_IN_USE;
637758b4ee8SAndy Ritger     }
638758b4ee8SAndy Ritger 
639758b4ee8SAndy Ritger     pKernelNvlink->inbandCallback[params->messageType].pCallback = params->pCallback;
640758b4ee8SAndy Ritger     pKernelNvlink->inbandCallback[params->messageType].wqItemFlags = params->wqItemFlags;
641758b4ee8SAndy Ritger 
642758b4ee8SAndy Ritger     return NV_OK;
643758b4ee8SAndy Ritger }
644758b4ee8SAndy Ritger 
645758b4ee8SAndy Ritger /*!
646758b4ee8SAndy Ritger  * @brief Clients to unregister their callback functions for inband data
647758b4ee8SAndy Ritger  *
648758b4ee8SAndy Ritger  * @param[in] pGpu           OBJGPU pointer
649758b4ee8SAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
650758b4ee8SAndy Ritger  * @param[in] msgType        Inband Message type
651758b4ee8SAndy Ritger  */
652758b4ee8SAndy Ritger NV_STATUS
653758b4ee8SAndy Ritger knvlinkUnregisterInbandCallback_IMPL
654758b4ee8SAndy Ritger (
655758b4ee8SAndy Ritger     OBJGPU *pGpu,
656758b4ee8SAndy Ritger     KernelNvlink *pKernelNvlink,
657758b4ee8SAndy Ritger     NvU16 msgType
658758b4ee8SAndy Ritger )
659758b4ee8SAndy Ritger {
660758b4ee8SAndy Ritger     if (msgType >= NVLINK_INBAND_MSG_TYPE_MAX)
661758b4ee8SAndy Ritger     {
662758b4ee8SAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Wrong msgType. Not unregistering\n");
663758b4ee8SAndy Ritger         return NV_ERR_INVALID_PARAMETER;
664758b4ee8SAndy Ritger     }
665758b4ee8SAndy Ritger 
666758b4ee8SAndy Ritger     pKernelNvlink->inbandCallback[msgType].pCallback = NULL;
667758b4ee8SAndy Ritger     pKernelNvlink->inbandCallback[msgType].wqItemFlags = 0;
668758b4ee8SAndy Ritger 
669758b4ee8SAndy Ritger     return NV_OK;
670758b4ee8SAndy Ritger }
671758b4ee8SAndy Ritger 
672758b4ee8SAndy Ritger void
673758b4ee8SAndy Ritger knvlinkInbandMsgCallbackDispatcher_WORKITEM
674758b4ee8SAndy Ritger (
675758b4ee8SAndy Ritger     NvU32 gpuInstance,
676758b4ee8SAndy Ritger     void *pData
677758b4ee8SAndy Ritger )
678758b4ee8SAndy Ritger {
679758b4ee8SAndy Ritger     OBJGPU *pGpu    = NULL;
680758b4ee8SAndy Ritger     nvlink_inband_msg_header_t *pHeader;
681758b4ee8SAndy Ritger     KernelNvlink *pKernelNvlink;
682758b4ee8SAndy Ritger     NV2080_CTRL_NVLINK_INBAND_RECEIVED_DATA_PARAMS *pMessage = pData;
683758b4ee8SAndy Ritger     NvU8 *pRsvd = NULL;
684758b4ee8SAndy Ritger 
685758b4ee8SAndy Ritger     pGpu =  gpumgrGetGpu(gpuInstance);
686758b4ee8SAndy Ritger     if (pGpu == NULL)
687758b4ee8SAndy Ritger     {
688758b4ee8SAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Invalid GPU\n");
689758b4ee8SAndy Ritger         return;
690758b4ee8SAndy Ritger     }
691758b4ee8SAndy Ritger 
692758b4ee8SAndy Ritger     pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu);
693758b4ee8SAndy Ritger     if (pKernelNvlink == NULL)
694758b4ee8SAndy Ritger     {
695758b4ee8SAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Invalid NVLink state\n");
696758b4ee8SAndy Ritger         return;
697758b4ee8SAndy Ritger     }
698758b4ee8SAndy Ritger 
699758b4ee8SAndy Ritger     pHeader = (nvlink_inband_msg_header_t *)pMessage->data;
700758b4ee8SAndy Ritger 
701eb5c7665SAndy Ritger     if (pKernelNvlink->inbandCallback[pHeader->type].pCallback == NULL)
702eb5c7665SAndy Ritger     {
703eb5c7665SAndy Ritger         NV_PRINTF(LEVEL_ERROR,
704eb5c7665SAndy Ritger                   "No Callback Registered for type %d. Dropping the msg\n",
705eb5c7665SAndy Ritger                   pHeader->type);
706eb5c7665SAndy Ritger         return;
707eb5c7665SAndy Ritger     }
708eb5c7665SAndy Ritger 
709758b4ee8SAndy Ritger     // Assert reserved in msgHdr are zero
710758b4ee8SAndy Ritger     pRsvd = &pHeader->reserved[0];
711758b4ee8SAndy Ritger     NV_ASSERT((pRsvd[0] == 0) && portMemCmp(pRsvd, pRsvd + 1,
712758b4ee8SAndy Ritger               sizeof(pHeader->reserved) - 1) == 0);
713758b4ee8SAndy Ritger 
714eb5c7665SAndy Ritger     (void)pKernelNvlink->inbandCallback[pHeader->type].pCallback(gpuInstance, pData);
715758b4ee8SAndy Ritger }
716758b4ee8SAndy Ritger 
717758b4ee8SAndy Ritger NV_STATUS
718758b4ee8SAndy Ritger knvlinkInbandMsgCallbackDispatcher_IMPL
719758b4ee8SAndy Ritger (
720758b4ee8SAndy Ritger     OBJGPU *pGpu,
721758b4ee8SAndy Ritger     KernelNvlink *pKernelNvlink,
722758b4ee8SAndy Ritger     NvU32 dataSize,
723758b4ee8SAndy Ritger     NvU8  *pMessage
724758b4ee8SAndy Ritger )
725758b4ee8SAndy Ritger {
726758b4ee8SAndy Ritger     NV_STATUS status;
727758b4ee8SAndy Ritger     nvlink_inband_msg_header_t *pHeader;
728758b4ee8SAndy Ritger     NVLINK_INBAND_MSG_CALLBACK *pParams;
729758b4ee8SAndy Ritger     NV2080_CTRL_NVLINK_INBAND_RECEIVED_DATA_PARAMS *pData = NULL;
730758b4ee8SAndy Ritger 
731758b4ee8SAndy Ritger     pHeader = (nvlink_inband_msg_header_t *)pMessage;
732758b4ee8SAndy Ritger 
733758b4ee8SAndy Ritger     if (pHeader->type >= NVLINK_INBAND_MSG_TYPE_MAX)
734758b4ee8SAndy Ritger     {
735758b4ee8SAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Message type received is Out of Bounds. Dropping  the msg\n");
736758b4ee8SAndy Ritger         return NV_ERR_INVALID_REQUEST;
737758b4ee8SAndy Ritger     }
738758b4ee8SAndy Ritger 
739758b4ee8SAndy Ritger     pParams = &pKernelNvlink->inbandCallback[pHeader->type];
740758b4ee8SAndy Ritger     if (pParams->pCallback == NULL)
741758b4ee8SAndy Ritger     {
742758b4ee8SAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Callback not registered for the message type %d\n", pHeader->type);
743758b4ee8SAndy Ritger         return NV_ERR_INVALID_REQUEST;
744758b4ee8SAndy Ritger     }
745758b4ee8SAndy Ritger 
746758b4ee8SAndy Ritger     pData = portMemAllocNonPaged(sizeof(NV2080_CTRL_NVLINK_INBAND_RECEIVED_DATA_PARAMS));
747758b4ee8SAndy Ritger     if (pData == NULL)
748758b4ee8SAndy Ritger     {
749758b4ee8SAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Out of memory, Dropping message\n");
750758b4ee8SAndy Ritger         return NV_ERR_NO_MEMORY;
751758b4ee8SAndy Ritger     }
752758b4ee8SAndy Ritger 
753758b4ee8SAndy Ritger     pData->dataSize = dataSize;
754758b4ee8SAndy Ritger     portMemCopy(pData->data, pData->dataSize, pMessage, dataSize);
755758b4ee8SAndy Ritger 
756*b5bf85a8SAndy Ritger     status = osQueueWorkItemWithFlags(pGpu, knvlinkInbandMsgCallbackDispatcher_WORKITEM, pData,
757758b4ee8SAndy Ritger                                       pParams->wqItemFlags);
758758b4ee8SAndy Ritger      if (status != NV_OK)
759758b4ee8SAndy Ritger      {
760758b4ee8SAndy Ritger         portMemFree(pData);
761758b4ee8SAndy Ritger         return status;
762758b4ee8SAndy Ritger      }
763758b4ee8SAndy Ritger 
764758b4ee8SAndy Ritger      return NV_OK;
765758b4ee8SAndy Ritger }
766758b4ee8SAndy Ritger 
767758b4ee8SAndy Ritger NV_STATUS
768758b4ee8SAndy Ritger knvlinkSendInbandData_IMPL
769758b4ee8SAndy Ritger (
770758b4ee8SAndy Ritger     OBJGPU       *pGpu,
771758b4ee8SAndy Ritger     KernelNvlink *pKernelNvlink,
772758b4ee8SAndy Ritger     NV2080_CTRL_NVLINK_INBAND_SEND_DATA_PARAMS *pParams
773758b4ee8SAndy Ritger )
774758b4ee8SAndy Ritger {
775758b4ee8SAndy Ritger     NV_STATUS status;
776758b4ee8SAndy Ritger 
777758b4ee8SAndy Ritger     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
778758b4ee8SAndy Ritger                                  NV2080_CTRL_CMD_NVLINK_INBAND_SEND_DATA,
779758b4ee8SAndy Ritger                                  (void *)pParams,
780758b4ee8SAndy Ritger                                  sizeof(*pParams));
781758b4ee8SAndy Ritger 
782758b4ee8SAndy Ritger     return status;
783758b4ee8SAndy Ritger }
784758b4ee8SAndy Ritger /*!
7851739a20eSAndy Ritger  * @brief Return the mask of links enabled on the system
7861739a20eSAndy Ritger  *
7871739a20eSAndy Ritger  * @param[in] pGpu           OBJGPU pointer
7881739a20eSAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
7891739a20eSAndy Ritger  */
7901739a20eSAndy Ritger NvU32
7911739a20eSAndy Ritger knvlinkGetEnabledLinkMask_IMPL
7921739a20eSAndy Ritger (
7931739a20eSAndy Ritger     OBJGPU       *pGpu,
7941739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
7951739a20eSAndy Ritger )
7961739a20eSAndy Ritger {
7971739a20eSAndy Ritger     return pKernelNvlink->enabledLinks;
7981739a20eSAndy Ritger }
7991739a20eSAndy Ritger 
8001739a20eSAndy Ritger /*!
8011739a20eSAndy Ritger  * @brief Return the mask of links discovered on the system
8021739a20eSAndy Ritger  *
8031739a20eSAndy Ritger  * @param[in] pGpu           OBJGPU pointer
8041739a20eSAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
8051739a20eSAndy Ritger  */
8061739a20eSAndy Ritger NvU32
8071739a20eSAndy Ritger knvlinkGetDiscoveredLinkMask_IMPL
8081739a20eSAndy Ritger (
8091739a20eSAndy Ritger     OBJGPU       *pGpu,
8101739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
8111739a20eSAndy Ritger )
8121739a20eSAndy Ritger {
8131739a20eSAndy Ritger     return pKernelNvlink->discoveredLinks;
8141739a20eSAndy Ritger }
8151739a20eSAndy Ritger 
8161739a20eSAndy Ritger /*!
8171739a20eSAndy Ritger  * @brief Returns the number of sysmem links
8181739a20eSAndy Ritger  *
8191739a20eSAndy Ritger  * @param[in] pGpu           OBJGPU pointer
8201739a20eSAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
8211739a20eSAndy Ritger  *
8221739a20eSAndy Ritger  * @return  The #sysmem NVLinks
8231739a20eSAndy Ritger  */
8241739a20eSAndy Ritger NvU32
8251739a20eSAndy Ritger knvlinkGetNumLinksToSystem_IMPL
8261739a20eSAndy Ritger (
8271739a20eSAndy Ritger     OBJGPU       *pGpu,
8281739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
8291739a20eSAndy Ritger )
8301739a20eSAndy Ritger {
8311739a20eSAndy Ritger     NvU32 numSysmemLinks = pKernelNvlink->sysmemLinkMask;
8321739a20eSAndy Ritger 
8331739a20eSAndy Ritger     if (numSysmemLinks != 0)
8341739a20eSAndy Ritger     {
8351739a20eSAndy Ritger         NUMSETBITS_32(numSysmemLinks);
8361739a20eSAndy Ritger     }
8371739a20eSAndy Ritger 
8381739a20eSAndy Ritger     return numSysmemLinks;
8391739a20eSAndy Ritger }
8401739a20eSAndy Ritger 
8411739a20eSAndy Ritger /*!
8421739a20eSAndy Ritger  * @brief Returns number of peer links to a remote GPU
8431739a20eSAndy Ritger  *
8441739a20eSAndy Ritger  * @param[in] pGpu             OBJGPU pointer of local GPU
8451739a20eSAndy Ritger  * @param[in] pKernelNvlink    KernelNvlink pointer
8461739a20eSAndy Ritger  * @param[in] pRemoteGpu       OBJGPU pointer of remote GPU
8471739a20eSAndy Ritger  *
8481739a20eSAndy Ritger  * @return  The #peer NVLinks to the remote GPU
8491739a20eSAndy Ritger  */
8501739a20eSAndy Ritger NvU32
8511739a20eSAndy Ritger knvlinkGetNumLinksToPeer_IMPL
8521739a20eSAndy Ritger (
8531739a20eSAndy Ritger     OBJGPU       *pGpu,
8541739a20eSAndy Ritger     KernelNvlink *pKernelNvlink,
8551739a20eSAndy Ritger     OBJGPU       *pRemoteGpu
8561739a20eSAndy Ritger )
8571739a20eSAndy Ritger {
8581739a20eSAndy Ritger     NvU32 numPeerLinks =
8591739a20eSAndy Ritger         knvlinkGetLinkMaskToPeer(pGpu, pKernelNvlink, pRemoteGpu);
8601739a20eSAndy Ritger 
8611739a20eSAndy Ritger     if (numPeerLinks != 0)
8621739a20eSAndy Ritger     {
8631739a20eSAndy Ritger         NUMSETBITS_32(numPeerLinks);
8641739a20eSAndy Ritger     }
8651739a20eSAndy Ritger 
8661739a20eSAndy Ritger     return numPeerLinks;
8671739a20eSAndy Ritger }
8681739a20eSAndy Ritger 
8691739a20eSAndy Ritger /*!
8701739a20eSAndy Ritger  * @brief Gets the mask of peer links between the GPUs
8711739a20eSAndy Ritger  *
8721739a20eSAndy Ritger  * @param[in] pGpu0           OBJGPU pointer
8731739a20eSAndy Ritger  * @param[in] pKernelNvlink0  Nvlink pointer
8741739a20eSAndy Ritger  * @param[in] pGpu1           Remote OBJGPU pointer
8751739a20eSAndy Ritger  *
8761739a20eSAndy Ritger  * @return    Returns the mask of peer links between the GPUs
8771739a20eSAndy Ritger  */
8781739a20eSAndy Ritger NvU32
8791739a20eSAndy Ritger knvlinkGetLinkMaskToPeer_IMPL
8801739a20eSAndy Ritger (
8811739a20eSAndy Ritger     OBJGPU       *pGpu0,
8821739a20eSAndy Ritger     KernelNvlink *pKernelNvlink0,
8831739a20eSAndy Ritger     OBJGPU       *pGpu1
8841739a20eSAndy Ritger )
8851739a20eSAndy Ritger {
8861739a20eSAndy Ritger     NvU32 peerLinkMask = 0;
887758b4ee8SAndy Ritger     KernelNvlink *pKernelNvlink1 = NULL;
888758b4ee8SAndy Ritger 
889758b4ee8SAndy Ritger     pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1);
890758b4ee8SAndy Ritger 
891758b4ee8SAndy Ritger     if (pKernelNvlink1 == NULL)
892758b4ee8SAndy Ritger     {
893*b5bf85a8SAndy Ritger         NV_PRINTF(LEVEL_INFO,
894758b4ee8SAndy Ritger                   "on GPU%d NVLink is disabled.\n", gpuGetInstance(pGpu1));
895758b4ee8SAndy Ritger 
896758b4ee8SAndy Ritger         return 0;
897758b4ee8SAndy Ritger     }
898758b4ee8SAndy Ritger 
899758b4ee8SAndy Ritger     if(pKernelNvlink0->bIsGpuDegraded)
900758b4ee8SAndy Ritger     {
901758b4ee8SAndy Ritger         return peerLinkMask;
902758b4ee8SAndy Ritger     }
903758b4ee8SAndy Ritger 
904758b4ee8SAndy Ritger     if(pKernelNvlink1->bIsGpuDegraded)
905758b4ee8SAndy Ritger     {
906758b4ee8SAndy Ritger         return peerLinkMask;
907758b4ee8SAndy Ritger     }
9081739a20eSAndy Ritger 
9091739a20eSAndy Ritger     if (!knvlinkIsForcedConfig(pGpu0, pKernelNvlink0))
9101739a20eSAndy Ritger     {
9111739a20eSAndy Ritger         //
9121739a20eSAndy Ritger         // If nvlink topology is not forced, then the hshub registers
9131739a20eSAndy Ritger         // are updated only when a P2P object is allocated. So, return
9141739a20eSAndy Ritger         // the cached value of mask of links connected to a GPU
9151739a20eSAndy Ritger         //
9161739a20eSAndy Ritger         peerLinkMask = pKernelNvlink0->peerLinkMasks[gpuGetInstance(pGpu1)];
9171739a20eSAndy Ritger     }
9181739a20eSAndy Ritger 
9191739a20eSAndy Ritger     return peerLinkMask;
9201739a20eSAndy Ritger }
9211739a20eSAndy Ritger 
9221739a20eSAndy Ritger /*!
9231739a20eSAndy Ritger  * @brief Sets the mask of peer links between the GPUs
9241739a20eSAndy Ritger  *
9251739a20eSAndy Ritger  * @param[in] pGpu0           OBJGPU pointer
9261739a20eSAndy Ritger  * @param[in] pKernelNvlink0  Nvlink pointer
9271739a20eSAndy Ritger  * @param[in] pGpu1           Remote OBJGPU pointer
9281739a20eSAndy Ritger  * @param[in] peerLinkMask    Mask of links to the peer GPU
9291739a20eSAndy Ritger  *
9301739a20eSAndy Ritger  * @return    NV_OK on success
9311739a20eSAndy Ritger  */
9321739a20eSAndy Ritger NV_STATUS
9331739a20eSAndy Ritger knvlinkSetLinkMaskToPeer_IMPL
9341739a20eSAndy Ritger (
9351739a20eSAndy Ritger     OBJGPU       *pGpu0,
9361739a20eSAndy Ritger     KernelNvlink *pKernelNvlink0,
9371739a20eSAndy Ritger     OBJGPU       *pGpu1,
9381739a20eSAndy Ritger     NvU32         peerLinkMask
9391739a20eSAndy Ritger )
9401739a20eSAndy Ritger {
9411739a20eSAndy Ritger     NV_STATUS status = NV_OK;
9421739a20eSAndy Ritger 
9431739a20eSAndy Ritger     // Return early if no update needed to the peer link mask
9441739a20eSAndy Ritger     if (pKernelNvlink0->peerLinkMasks[gpuGetInstance(pGpu1)] == peerLinkMask)
9451739a20eSAndy Ritger         return NV_OK;
9461739a20eSAndy Ritger 
9471739a20eSAndy Ritger     pKernelNvlink0->peerLinkMasks[gpuGetInstance(pGpu1)] = peerLinkMask;
9481739a20eSAndy Ritger 
9491739a20eSAndy Ritger     NV2080_CTRL_NVLINK_UPDATE_PEER_LINK_MASK_PARAMS params;
9501739a20eSAndy Ritger 
9511739a20eSAndy Ritger     portMemSet(&params, 0, sizeof(params));
9521739a20eSAndy Ritger     params.gpuInst      = gpuGetInstance(pGpu1);
9531739a20eSAndy Ritger     params.peerLinkMask = peerLinkMask;
9541739a20eSAndy Ritger 
9551739a20eSAndy Ritger     // Reset timeout to clear any accumulated timeouts from link init
9561739a20eSAndy Ritger     if (IS_GSP_CLIENT(pGpu0))
9571739a20eSAndy Ritger     {
9581739a20eSAndy Ritger         threadStateResetTimeout(pGpu0);
9591739a20eSAndy Ritger     }
9601739a20eSAndy Ritger 
9611739a20eSAndy Ritger     // Sync the peerLinkMask with GSP-RM
9621739a20eSAndy Ritger     status = knvlinkExecGspRmRpc(pGpu0, pKernelNvlink0,
9631739a20eSAndy Ritger                                  NV2080_CTRL_CMD_NVLINK_UPDATE_PEER_LINK_MASK,
9641739a20eSAndy Ritger                                  (void *)&params, sizeof(params));
9651739a20eSAndy Ritger     if (status != NV_OK)
9661739a20eSAndy Ritger     {
9671739a20eSAndy Ritger         NV_PRINTF(LEVEL_ERROR,
9681739a20eSAndy Ritger                   "Failed to sync peerLinksMask from GPU%d to GPU%d\n",
9691739a20eSAndy Ritger                   gpuGetInstance(pGpu0), gpuGetInstance(pGpu1));
9701739a20eSAndy Ritger         return status;
9711739a20eSAndy Ritger     }
9721739a20eSAndy Ritger 
9731739a20eSAndy Ritger     return NV_OK;
9741739a20eSAndy Ritger }
9751739a20eSAndy Ritger 
9761739a20eSAndy Ritger /*!
9771739a20eSAndy Ritger  * @brief Get the mask of links that are peer links
9781739a20eSAndy Ritger  *
9791739a20eSAndy Ritger  * @param[in] pGpu           OBJGPU pointer
9801739a20eSAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
9811739a20eSAndy Ritger  */
9821739a20eSAndy Ritger NvU32
9831739a20eSAndy Ritger knvlinkGetPeersNvlinkMaskFromHshub_IMPL
9841739a20eSAndy Ritger (
9851739a20eSAndy Ritger     OBJGPU       *pGpu,
9861739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
9871739a20eSAndy Ritger )
9881739a20eSAndy Ritger {
9891739a20eSAndy Ritger     NV_STATUS status       = NV_OK;
9901739a20eSAndy Ritger     NvU32     peerLinkMask = 0;
9911739a20eSAndy Ritger     NvU32     i;
9921739a20eSAndy Ritger 
9931739a20eSAndy Ritger     NV2080_CTRL_NVLINK_GET_LINK_AND_CLOCK_INFO_PARAMS params;
9941739a20eSAndy Ritger 
9951739a20eSAndy Ritger     portMemSet(&params, 0, sizeof(params));
9961739a20eSAndy Ritger     params.linkMask = pKernelNvlink->enabledLinks;
9971739a20eSAndy Ritger 
9981739a20eSAndy Ritger     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
9991739a20eSAndy Ritger                                  NV2080_CTRL_CMD_NVLINK_GET_LINK_AND_CLOCK_INFO,
10001739a20eSAndy Ritger                                  (void *)&params, sizeof(params));
10011739a20eSAndy Ritger     if (status != NV_OK)
10021739a20eSAndy Ritger         return 0;
10031739a20eSAndy Ritger 
10041739a20eSAndy Ritger     // Scan enabled links for peer connections
10051739a20eSAndy Ritger     FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink->enabledLinks)
10061739a20eSAndy Ritger     {
10071739a20eSAndy Ritger         if (params.linkInfo[i].bLinkConnectedToPeer)
10081739a20eSAndy Ritger             peerLinkMask |= NVBIT(i);
10091739a20eSAndy Ritger     }
10101739a20eSAndy Ritger     FOR_EACH_INDEX_IN_MASK_END;
10111739a20eSAndy Ritger 
10121739a20eSAndy Ritger     return peerLinkMask;
10131739a20eSAndy Ritger }
10141739a20eSAndy Ritger 
10151739a20eSAndy Ritger /*!
10161739a20eSAndy Ritger  * @brief Prepare a GPU's NVLink engine for reset by removing mappings
10171739a20eSAndy Ritger  *        to it from other GPUs.
10181739a20eSAndy Ritger  *
10191739a20eSAndy Ritger  * @param[in] pGpu          OBJGPU pointer
10201739a20eSAndy Ritger  * @param[in] pKernelNvlink KernelNvlink pointer
10211739a20eSAndy Ritger  *
10221739a20eSAndy Ritger  * return  NV_OK on success
10231739a20eSAndy Ritger  */
10241739a20eSAndy Ritger NV_STATUS
10251739a20eSAndy Ritger knvlinkPrepareForXVEReset_IMPL
10261739a20eSAndy Ritger (
10271739a20eSAndy Ritger     OBJGPU       *pGpu,
102890eb1077SAndy Ritger     KernelNvlink *pKernelNvlink,
102990eb1077SAndy Ritger     NvBool        bForceShutdown
10301739a20eSAndy Ritger )
10311739a20eSAndy Ritger {
10321739a20eSAndy Ritger     OBJSYS    *pSys      = SYS_GET_INSTANCE();
10331739a20eSAndy Ritger     NV_STATUS  retStatus = NV_OK;
10341739a20eSAndy Ritger     OBJGPU    *pRemoteGpu;
10351739a20eSAndy Ritger     NV_STATUS  status;
10361739a20eSAndy Ritger     NvU32      gpuInstance;
10371739a20eSAndy Ritger     NvU32      gpuMask;
10381739a20eSAndy Ritger 
10391739a20eSAndy Ritger     // This is not supported on forced configs
10401739a20eSAndy Ritger     if (knvlinkIsForcedConfig(pGpu, pKernelNvlink))
10411739a20eSAndy Ritger     {
10421739a20eSAndy Ritger         return NV_OK;
10431739a20eSAndy Ritger     }
10441739a20eSAndy Ritger 
10451739a20eSAndy Ritger     //
10461739a20eSAndy Ritger     // Let fabric manager handle link shutdown/reset if the fabric is managed
10471739a20eSAndy Ritger     // externally.
10481739a20eSAndy Ritger     //
10495f40a5aeSAndy Ritger     if (pKernelNvlink->ipVerNvlink < NVLINK_VERSION_40 &&
10505f40a5aeSAndy Ritger         pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED))
10511739a20eSAndy Ritger     {
10521739a20eSAndy Ritger         NV_PRINTF(LEVEL_INFO,
10531739a20eSAndy Ritger                   "NVLink fabric is externally managed, skipping\n");
10541739a20eSAndy Ritger         return NV_OK;
10551739a20eSAndy Ritger     }
10561739a20eSAndy Ritger 
10571739a20eSAndy Ritger     status = gpumgrGetGpuAttachInfo(NULL, &gpuMask);
10581739a20eSAndy Ritger     NV_ASSERT_OR_RETURN(status == NV_OK, status);
10591739a20eSAndy Ritger 
10601739a20eSAndy Ritger     gpuInstance = 0;
10611739a20eSAndy Ritger     while ((pRemoteGpu = gpumgrGetNextGpu(gpuMask, &gpuInstance)) != NULL)
10621739a20eSAndy Ritger     {
10631739a20eSAndy Ritger         KernelNvlink *pRemoteKernelNvlink = GPU_GET_KERNEL_NVLINK(pRemoteGpu);
10641739a20eSAndy Ritger 
10651739a20eSAndy Ritger         if ((pRemoteGpu == pGpu) || (pRemoteKernelNvlink == NULL) ||
10661739a20eSAndy Ritger             (knvlinkGetNumLinksToPeer(pRemoteGpu, pRemoteKernelNvlink, pGpu) == 0) ||
106790eb1077SAndy Ritger             API_GPU_IN_RESET_SANITY_CHECK(pRemoteGpu) ||
106890eb1077SAndy Ritger             pRemoteGpu->getProperty(pRemoteGpu, PDB_PROP_GPU_IS_LOST))
10691739a20eSAndy Ritger         {
10701739a20eSAndy Ritger             continue;
10711739a20eSAndy Ritger         }
10721739a20eSAndy Ritger 
10731739a20eSAndy Ritger         //
10741739a20eSAndy Ritger         // Reset the peer masks in HSHUB of the remote GPU. Partial resets
10751739a20eSAndy Ritger         // (only removing the links connected to the GPU being reset) don't
10761739a20eSAndy Ritger         // appear to be sufficient. The reset will work fine, but the next
10771739a20eSAndy Ritger         // time we attempt to initialize this GPU, the copy engines will time
10781739a20eSAndy Ritger         // out while scrubbing FB and a GPU sysmembar (NV_UFLUSH_FB_FLUSH) will
10791739a20eSAndy Ritger         // fail to complete.
10801739a20eSAndy Ritger         //
10811739a20eSAndy Ritger         // The above symptoms haven't been root-caused (yet), but the current
10821739a20eSAndy Ritger         // POR for GPU reset is that once one GPU is reset, the others
10831739a20eSAndy Ritger         // connected to it over NVLink must also be reset before using NVLink
10841739a20eSAndy Ritger         // for peer traffic, so just use the big hammer and squash all HSHUB
10851739a20eSAndy Ritger         // configs on GPU reset.
10861739a20eSAndy Ritger         //
10871739a20eSAndy Ritger         // This allows us to reset the GPUs one by one, with GPU
10881739a20eSAndy Ritger         // initializations in between, without hanging up the GPU trying to
10891739a20eSAndy Ritger         // flush data over links that aren't available anymore.
10901739a20eSAndy Ritger         //
109112c07393SBernhard Stoeckner         // Starting from Ampere single GPU reset is supported and hence remove
109212c07393SBernhard Stoeckner         // only the nvlink's of the remote GPU's which are connected to the
109312c07393SBernhard Stoeckner         // current GPU.
109412c07393SBernhard Stoeckner         //
109512c07393SBernhard Stoeckner 
109612c07393SBernhard Stoeckner         if (IsAMPEREorBetter(pGpu))
109712c07393SBernhard Stoeckner         {
109812c07393SBernhard Stoeckner             NvU32 remPeerId = kbusGetPeerId_HAL(pRemoteGpu, GPU_GET_KERNEL_BUS(pRemoteGpu), pGpu);
109912c07393SBernhard Stoeckner             if (remPeerId != BUS_INVALID_PEER)
110012c07393SBernhard Stoeckner                 status = knvlinkRemoveMapping_HAL(pRemoteGpu, pRemoteKernelNvlink, NV_FALSE,
110112c07393SBernhard Stoeckner                                                   NVBIT(remPeerId),
110212c07393SBernhard Stoeckner                                                   NV_FALSE /* bL2Entry */);
110312c07393SBernhard Stoeckner         }
110412c07393SBernhard Stoeckner         else
110512c07393SBernhard Stoeckner         {
11061739a20eSAndy Ritger             status = knvlinkRemoveMapping_HAL(pRemoteGpu, pRemoteKernelNvlink, NV_FALSE,
11071739a20eSAndy Ritger                                               ((1 << NVLINK_MAX_PEERS_SW) - 1),
11081739a20eSAndy Ritger                                               NV_FALSE /* bL2Entry */);
110912c07393SBernhard Stoeckner         }
11101739a20eSAndy Ritger         if (status != NV_OK)
11111739a20eSAndy Ritger         {
11121739a20eSAndy Ritger             NV_PRINTF(LEVEL_ERROR,
11131739a20eSAndy Ritger                       "failed to reset HSHUB on GPU%u while preparing for GPU%u XVE reset (0x%x)\n",
11141739a20eSAndy Ritger                       gpuGetInstance(pRemoteGpu), gpuGetInstance(pGpu),
11151739a20eSAndy Ritger                       status);
11161739a20eSAndy Ritger 
11171739a20eSAndy Ritger             retStatus = (retStatus == NV_OK) ? status : retStatus;
11181739a20eSAndy Ritger         }
11191739a20eSAndy Ritger     }
11201739a20eSAndy Ritger 
11211739a20eSAndy Ritger     // Remove all NVLink mappings in HSHUB config registers to init values
112290eb1077SAndy Ritger     if (!API_GPU_IN_RESET_SANITY_CHECK(pGpu) && !pGpu->getProperty(pGpu, PDB_PROP_GPU_IS_LOST))
11231739a20eSAndy Ritger     status = knvlinkRemoveMapping_HAL(pGpu, pKernelNvlink, NV_TRUE, ((1 << NVLINK_MAX_PEERS_SW) - 1),
11241739a20eSAndy Ritger                                       NV_FALSE /* bL2Entry */);
11251739a20eSAndy Ritger     if (status != NV_OK)
11261739a20eSAndy Ritger     {
11271739a20eSAndy Ritger         NV_PRINTF(LEVEL_ERROR,
11281739a20eSAndy Ritger                   "failed to reset HSHUB on GPU%u while preparing XVE reset: %s (0x%x)\n",
11291739a20eSAndy Ritger                   gpuGetInstance(pGpu), nvstatusToString(status), status);
11301739a20eSAndy Ritger 
11311739a20eSAndy Ritger         retStatus = (retStatus == NV_OK) ? status : retStatus;
11321739a20eSAndy Ritger     }
11331739a20eSAndy Ritger 
113490eb1077SAndy Ritger     //
113590eb1077SAndy Ritger     // If GFW is booted and running through link-training, then no need to tear-down the
113690eb1077SAndy Ritger     // links to reset. Exit out early from the function
113790eb1077SAndy Ritger     //
1138eb5c7665SAndy Ritger     if (!bForceShutdown && pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_MINION_GFW_BOOT))
113990eb1077SAndy Ritger     {
114090eb1077SAndy Ritger         return NV_OK;
114190eb1077SAndy Ritger     }
114290eb1077SAndy Ritger 
11431739a20eSAndy Ritger     // Pseudo-clean  shutdown the links from this GPU
114490eb1077SAndy Ritger     status = knvlinkCoreShutdownDeviceLinks(pGpu, pKernelNvlink, bForceShutdown);
11451739a20eSAndy Ritger     if (status != NV_OK)
11461739a20eSAndy Ritger     {
11471739a20eSAndy Ritger         NV_PRINTF(LEVEL_ERROR,
11481739a20eSAndy Ritger                   "failed to shutdown links on GPU%u while preparing XVE reset: %s (0x%x)\n",
11491739a20eSAndy Ritger                   gpuGetInstance(pGpu), nvstatusToString(status), status);
11501739a20eSAndy Ritger 
11511739a20eSAndy Ritger         retStatus = (retStatus == NV_OK) ? status : retStatus;
11521739a20eSAndy Ritger     }
11531739a20eSAndy Ritger 
11541739a20eSAndy Ritger     //
11551739a20eSAndy Ritger     // Reset links related to this device and its peers (see Bug 2346447)
11561739a20eSAndy Ritger     // The property is disabled on Pascal, since the path hasn't been verified
11571739a20eSAndy Ritger     // and link reset after pseudo-clean shutdown results in DL and TL errors.
11581739a20eSAndy Ritger     //
11591739a20eSAndy Ritger     if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_LINKRESET_AFTER_SHUTDOWN))
11601739a20eSAndy Ritger     {
11611739a20eSAndy Ritger         status = knvlinkCoreResetDeviceLinks(pGpu, pKernelNvlink);
11621739a20eSAndy Ritger         if (status != NV_OK)
11631739a20eSAndy Ritger         {
11641739a20eSAndy Ritger             NV_PRINTF(LEVEL_ERROR,
11651739a20eSAndy Ritger                       "failed to reset links on GPU%u while preparing XVE reset: %s (0x%x)\n",
11661739a20eSAndy Ritger                       gpuGetInstance(pGpu), nvstatusToString(status), status);
11671739a20eSAndy Ritger 
11681739a20eSAndy Ritger             retStatus = (retStatus == NV_OK) ? status : retStatus;
11691739a20eSAndy Ritger         }
1170dac2350cSAndy Ritger #if defined(INCLUDE_NVLINK_LIB)
1171dac2350cSAndy Ritger         else
1172dac2350cSAndy Ritger         {
1173dac2350cSAndy Ritger             NvU32 linkId;
1174dac2350cSAndy Ritger 
1175dac2350cSAndy Ritger             //
1176dac2350cSAndy Ritger             // The connections have been successfully reset, update connected and disconnected
1177dac2350cSAndy Ritger             // links masks on both the devices
1178dac2350cSAndy Ritger             //
1179dac2350cSAndy Ritger             FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks)
1180dac2350cSAndy Ritger             {
1181dac2350cSAndy Ritger                 pKernelNvlink->disconnectedLinkMask |=  NVBIT(linkId);
1182dac2350cSAndy Ritger                 pKernelNvlink->connectedLinksMask   &= ~NVBIT(linkId);
1183dac2350cSAndy Ritger 
1184dac2350cSAndy Ritger                 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType !=
1185dac2350cSAndy Ritger                                               NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_GPU)
1186dac2350cSAndy Ritger                 {
1187dac2350cSAndy Ritger                     continue;
1188dac2350cSAndy Ritger                 }
1189dac2350cSAndy Ritger 
1190dac2350cSAndy Ritger                 OBJGPU *pRemoteGpu = gpumgrGetGpuFromBusInfo(
1191dac2350cSAndy Ritger                                             pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain,
1192dac2350cSAndy Ritger                                             pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus,
1193dac2350cSAndy Ritger                                             pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device);
1194dac2350cSAndy Ritger 
1195dac2350cSAndy Ritger                 if (!API_GPU_IN_RESET_SANITY_CHECK(pRemoteGpu))
1196dac2350cSAndy Ritger                 {
1197dac2350cSAndy Ritger                     KernelNvlink *pRemoteKernelNvlink = GPU_GET_KERNEL_NVLINK(pRemoteGpu);
1198dac2350cSAndy Ritger                     NvU32 remoteLinkId = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.linkNumber;
1199dac2350cSAndy Ritger 
1200dac2350cSAndy Ritger                     pRemoteKernelNvlink->disconnectedLinkMask |=  NVBIT(remoteLinkId);
1201dac2350cSAndy Ritger                     pRemoteKernelNvlink->connectedLinksMask   &= ~NVBIT(remoteLinkId);
1202dac2350cSAndy Ritger                 }
1203dac2350cSAndy Ritger             }
1204dac2350cSAndy Ritger             FOR_EACH_INDEX_IN_MASK_END;
1205dac2350cSAndy Ritger         }
1206dac2350cSAndy Ritger #endif
12071739a20eSAndy Ritger 
12081739a20eSAndy Ritger         //
12091739a20eSAndy Ritger         // knvlinkCoreResetDeviceLinks() only resets the links which have
12101739a20eSAndy Ritger         // connectivity.
12111739a20eSAndy Ritger         // Pre-Ampere, we may run into a situation where the PLL
12121739a20eSAndy Ritger         // sharing partner links (both) may not be reset due to no connectivity.
12131739a20eSAndy Ritger         //
12141739a20eSAndy Ritger         // Hence, (re-)reset all the links to recover them after shutdown (pre-Ampere)
12151739a20eSAndy Ritger         //
12161739a20eSAndy Ritger         NV2080_CTRL_NVLINK_RESET_LINKS_PARAMS resetLinksparams;
12171739a20eSAndy Ritger 
12181739a20eSAndy Ritger         portMemSet(&resetLinksparams, 0, sizeof(resetLinksparams));
12191739a20eSAndy Ritger         resetLinksparams.linkMask = pKernelNvlink->enabledLinks;
12201739a20eSAndy Ritger         resetLinksparams.flags    = NV2080_CTRL_NVLINK_RESET_FLAGS_TOGGLE;
12211739a20eSAndy Ritger 
12221739a20eSAndy Ritger         status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
12231739a20eSAndy Ritger                                      NV2080_CTRL_CMD_NVLINK_RESET_LINKS,
12241739a20eSAndy Ritger                                      (void *)&resetLinksparams, sizeof(resetLinksparams));
12251739a20eSAndy Ritger 
12261739a20eSAndy Ritger         retStatus = (retStatus == NV_OK) ? status : retStatus;
12271739a20eSAndy Ritger     }
12281739a20eSAndy Ritger 
12291739a20eSAndy Ritger     return retStatus;
12301739a20eSAndy Ritger }
12311739a20eSAndy Ritger 
12321739a20eSAndy Ritger /*!
12331739a20eSAndy Ritger  * @brief Set the power features supported on this NVLink IP
12341739a20eSAndy Ritger  *
12351739a20eSAndy Ritger  * @param[in] pGpu          OBJGPU pointer
12361739a20eSAndy Ritger  * @param[in] pKernelNvlink KernelNvlink pointer
12371739a20eSAndy Ritger  */
12381739a20eSAndy Ritger void
12391739a20eSAndy Ritger knvlinkSetPowerFeatures_IMPL
12401739a20eSAndy Ritger (
12411739a20eSAndy Ritger     OBJGPU       *pGpu,
12421739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
12431739a20eSAndy Ritger )
12441739a20eSAndy Ritger {
12451739a20eSAndy Ritger     // Get the Ip Verion from the First available IOCTRL.
12461739a20eSAndy Ritger     switch (pKernelNvlink->ipVerNvlink)
12471739a20eSAndy Ritger     {
12481739a20eSAndy Ritger         case NVLINK_VERSION_22:
12491739a20eSAndy Ritger         {
12501739a20eSAndy Ritger             // NVLink L2 is supported only on MODS and Windows LDDM
1251758b4ee8SAndy Ritger             if (RMCFG_FEATURE_PLATFORM_WINDOWS_LDDM || RMCFG_FEATURE_MODS_FEATURES)
12521739a20eSAndy Ritger             {
12531739a20eSAndy Ritger                 pKernelNvlink->setProperty(pKernelNvlink, PDB_PROP_KNVLINK_L2_POWER_STATE_ENABLED,
12541739a20eSAndy Ritger                                            (pKernelNvlink->bDisableL2Mode ? NV_FALSE : NV_TRUE));
12551739a20eSAndy Ritger             }
12561739a20eSAndy Ritger 
12571739a20eSAndy Ritger             break;
12581739a20eSAndy Ritger         }
12591739a20eSAndy Ritger         default:
12601739a20eSAndy Ritger             break;
12611739a20eSAndy Ritger     }
12621739a20eSAndy Ritger }
12631739a20eSAndy Ritger 
12641739a20eSAndy Ritger /*!
12651739a20eSAndy Ritger  * @brief Checks if NVSWITCH_FABRIC_ADDR field is valid.
12661739a20eSAndy Ritger  *
12671739a20eSAndy Ritger  * @param[in] pGpu          OBJGPU pointer
12681739a20eSAndy Ritger  * @param[in] pKernelNvlink KernelNvlink pointer
12691739a20eSAndy Ritger  */
12701739a20eSAndy Ritger void
12711739a20eSAndy Ritger knvlinkDetectNvswitchProxy_IMPL
12721739a20eSAndy Ritger (
12731739a20eSAndy Ritger     OBJGPU       *pGpu,
12741739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
12751739a20eSAndy Ritger )
12761739a20eSAndy Ritger {
12771739a20eSAndy Ritger     OBJSYS    *pSys   = SYS_GET_INSTANCE();
12781739a20eSAndy Ritger     NV_STATUS  status = NV_OK;
12791739a20eSAndy Ritger     NvU32      i;
12801739a20eSAndy Ritger 
12811739a20eSAndy Ritger     // Initialize fabricBaseAddr to NVLINK_INVALID_FABRIC_ADDR
12821739a20eSAndy Ritger     pKernelNvlink->fabricBaseAddr = NVLINK_INVALID_FABRIC_ADDR;
12831739a20eSAndy Ritger 
12841739a20eSAndy Ritger     if (pSys->getProperty(pSys, PDB_PROP_SYS_NVSWITCH_IS_PRESENT) ||
1285758b4ee8SAndy Ritger         pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_MANAGER_IS_REGISTERED) ||
1286758b4ee8SAndy Ritger         GPU_IS_NVSWITCH_DETECTED(pGpu))
12871739a20eSAndy Ritger     {
12881739a20eSAndy Ritger         return;
12891739a20eSAndy Ritger     }
12901739a20eSAndy Ritger 
12911739a20eSAndy Ritger     if (pKernelNvlink->discoveredLinks == 0)
12921739a20eSAndy Ritger     {
12931739a20eSAndy Ritger         return;
12941739a20eSAndy Ritger     }
12951739a20eSAndy Ritger 
12961739a20eSAndy Ritger     // Get the link train status for the enabled link masks
12971739a20eSAndy Ritger     NV2080_CTRL_NVLINK_ARE_LINKS_TRAINED_PARAMS linkTrainedParams;
12981739a20eSAndy Ritger 
12991739a20eSAndy Ritger     portMemSet(&linkTrainedParams, 0, sizeof(linkTrainedParams));
13001739a20eSAndy Ritger     linkTrainedParams.linkMask    = pKernelNvlink->enabledLinks;
13011739a20eSAndy Ritger     linkTrainedParams.bActiveOnly = NV_FALSE;
13021739a20eSAndy Ritger 
13031739a20eSAndy Ritger     // Reset timeout to clear any accumulated timeouts from link init
13041739a20eSAndy Ritger     if (IS_GSP_CLIENT(pGpu))
13051739a20eSAndy Ritger     {
13061739a20eSAndy Ritger         threadStateResetTimeout(pGpu);
13071739a20eSAndy Ritger     }
13081739a20eSAndy Ritger 
13091739a20eSAndy Ritger     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
13101739a20eSAndy Ritger                                  NV2080_CTRL_CMD_NVLINK_ARE_LINKS_TRAINED,
13111739a20eSAndy Ritger                                  (void *)&linkTrainedParams, sizeof(linkTrainedParams));
13121739a20eSAndy Ritger     if (status != NV_OK)
13131739a20eSAndy Ritger     {
13141739a20eSAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Failed to get the link train status for links\n");
13151739a20eSAndy Ritger         return;
13161739a20eSAndy Ritger     }
13171739a20eSAndy Ritger 
13181739a20eSAndy Ritger     FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink->enabledLinks)
13191739a20eSAndy Ritger     {
13201739a20eSAndy Ritger         if (!linkTrainedParams.bIsLinkActive[i])
13211739a20eSAndy Ritger         {
13221739a20eSAndy Ritger             return;
13231739a20eSAndy Ritger         }
13241739a20eSAndy Ritger     }
13251739a20eSAndy Ritger     FOR_EACH_INDEX_IN_MASK_END;
13261739a20eSAndy Ritger 
13271739a20eSAndy Ritger     NV2080_CTRL_INTERNAL_NVLINK_GET_SET_NVSWITCH_FABRIC_ADDR_PARAMS params;
13281739a20eSAndy Ritger 
13291739a20eSAndy Ritger     portMemSet(&params, 0, sizeof(params));
13301739a20eSAndy Ritger     params.bGet = NV_TRUE;
13311739a20eSAndy Ritger     params.addr = NVLINK_INVALID_FABRIC_ADDR;
13321739a20eSAndy Ritger 
13331739a20eSAndy Ritger     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
13341739a20eSAndy Ritger                                  NV2080_CTRL_CMD_INTERNAL_NVLINK_GET_SET_NVSWITCH_FABRIC_ADDR,
13351739a20eSAndy Ritger                                  (void *)&params, sizeof(params));
13361739a20eSAndy Ritger     if (status != NV_OK)
13371739a20eSAndy Ritger     {
13381739a20eSAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Failed to get fabric address for GPU %x\n",
13391739a20eSAndy Ritger                   pGpu->gpuInstance);
13401739a20eSAndy Ritger         return;
13411739a20eSAndy Ritger     }
13421739a20eSAndy Ritger 
13431739a20eSAndy Ritger     if (params.addr != NVLINK_INVALID_FABRIC_ADDR)
13441739a20eSAndy Ritger     {
13451739a20eSAndy Ritger         pKernelNvlink->fabricBaseAddr = params.addr;
13461739a20eSAndy Ritger         pKernelNvlink->bNvswitchProxy = NV_TRUE;
13471739a20eSAndy Ritger     }
13481739a20eSAndy Ritger }
13491739a20eSAndy Ritger 
13501739a20eSAndy Ritger /*!
13511739a20eSAndy Ritger  * @brief Sets NVSWITCH_FLA_ADDR field in the scratch register.
13521739a20eSAndy Ritger  *
13531739a20eSAndy Ritger  * @param[in] pGpu           OBJGPU pointer
13541739a20eSAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
13551739a20eSAndy Ritger  * @param[in] addr           FLA addr
13561739a20eSAndy Ritger  *
13571739a20eSAndy Ritger  * @return  Returns NV_OK upon success.
13581739a20eSAndy Ritger  *          Otherwise, returns NV_ERR_XXX.
13591739a20eSAndy Ritger  */
13601739a20eSAndy Ritger NV_STATUS
13611739a20eSAndy Ritger knvlinkSetNvswitchFlaAddr_IMPL
13621739a20eSAndy Ritger (
13631739a20eSAndy Ritger     OBJGPU       *pGpu,
13641739a20eSAndy Ritger     KernelNvlink *pKernelNvlink,
13651739a20eSAndy Ritger     NvU64         addr
13661739a20eSAndy Ritger )
13671739a20eSAndy Ritger {
13681739a20eSAndy Ritger     return NV_OK;
13691739a20eSAndy Ritger }
13701739a20eSAndy Ritger 
13711739a20eSAndy Ritger /*!
13721739a20eSAndy Ritger  * @brief Gets NVSWITCH_FLA_ADDR field from the scratch register.
13731739a20eSAndy Ritger  *
13741739a20eSAndy Ritger  * @param[in] pGpu           OBJGPU pointer
13751739a20eSAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
13761739a20eSAndy Ritger  *
13771739a20eSAndy Ritger  * @return  Returns the stashed FLA starting address.
13781739a20eSAndy Ritger  */
13791739a20eSAndy Ritger NvU64
13801739a20eSAndy Ritger knvlinkGetNvswitchFlaAddr_IMPL
13811739a20eSAndy Ritger (
13821739a20eSAndy Ritger     OBJGPU       *pGpu,
13831739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
13841739a20eSAndy Ritger )
13851739a20eSAndy Ritger {
13861739a20eSAndy Ritger     return 0;
13871739a20eSAndy Ritger }
13881739a20eSAndy Ritger 
13891739a20eSAndy Ritger /*!
13901739a20eSAndy Ritger  * @brief Checks if fabricBaseAddr is valid.
13911739a20eSAndy Ritger  *
13921739a20eSAndy Ritger  * @param[in] pGpu          OBJGPU pointer
13931739a20eSAndy Ritger  * @param[in] pKernelNvlink KernelNvlink pointer
13941739a20eSAndy Ritger  *
13951739a20eSAndy Ritger  * @return  Returns true if the fabricBaseAddr is valid.
13961739a20eSAndy Ritger  */
13971739a20eSAndy Ritger NvBool
13981739a20eSAndy Ritger knvlinkIsNvswitchProxyPresent_IMPL
13991739a20eSAndy Ritger (
14001739a20eSAndy Ritger     OBJGPU       *pGpu,
14011739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
14021739a20eSAndy Ritger )
14031739a20eSAndy Ritger {
14041739a20eSAndy Ritger     return pKernelNvlink->bNvswitchProxy;
14051739a20eSAndy Ritger }
14061739a20eSAndy Ritger 
14071739a20eSAndy Ritger 
14081739a20eSAndy Ritger /*!
14091739a20eSAndy Ritger  * @brief   Set unique FLA base address for NVSwitch enabled systems.
14101739a20eSAndy Ritger  *          Validates FLA base address and programs the base address
14111739a20eSAndy Ritger  *          in switch scratch registers for guest VM to pick it up.
14121739a20eSAndy Ritger  *
14131739a20eSAndy Ritger  * @param[in]   pGpu               OBJGPU pointer
14141739a20eSAndy Ritger  * @param[in]   pKernelNvlink      KernelNvlink pointer
14151739a20eSAndy Ritger  * @param[in]   flaBaseAddr        NvU64  base address
14161739a20eSAndy Ritger  *
14171739a20eSAndy Ritger  * @returns On success, sets unique FLA base address and returns NV_OK.
14181739a20eSAndy Ritger  *          On failure, returns NV_ERR_XXX.
14191739a20eSAndy Ritger  */
14201739a20eSAndy Ritger NV_STATUS
14211739a20eSAndy Ritger knvlinkSetUniqueFlaBaseAddress_IMPL
14221739a20eSAndy Ritger (
14231739a20eSAndy Ritger     OBJGPU       *pGpu,
14241739a20eSAndy Ritger     KernelNvlink *pKernelNvlink,
14251739a20eSAndy Ritger     NvU64         flaBaseAddr
14261739a20eSAndy Ritger )
14271739a20eSAndy Ritger {
14281739a20eSAndy Ritger     NV_STATUS  status     = NV_OK;
14291739a20eSAndy Ritger     KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
14301739a20eSAndy Ritger 
14311739a20eSAndy Ritger     NV2080_CTRL_NVLINK_GET_SET_NVSWITCH_FLA_ADDR_PARAMS params;
14321739a20eSAndy Ritger 
14331739a20eSAndy Ritger     if (!knvlinkIsForcedConfig(pGpu, pKernelNvlink))
14341739a20eSAndy Ritger     {
14351739a20eSAndy Ritger         knvlinkCoreGetRemoteDeviceInfo(pGpu, pKernelNvlink);
14361739a20eSAndy Ritger 
14371739a20eSAndy Ritger         status = knvlinkEnableLinksPostTopology_HAL(pGpu, pKernelNvlink,
14381739a20eSAndy Ritger                                                     pKernelNvlink->enabledLinks);
14391739a20eSAndy Ritger         if (status != NV_OK)
14401739a20eSAndy Ritger         {
14411739a20eSAndy Ritger             return status;
14421739a20eSAndy Ritger         }
14431739a20eSAndy Ritger     }
14441739a20eSAndy Ritger 
14451739a20eSAndy Ritger     status = kbusValidateFlaBaseAddress_HAL(pGpu, pKernelBus, flaBaseAddr);
14461739a20eSAndy Ritger     if (status != NV_OK)
14471739a20eSAndy Ritger     {
14481739a20eSAndy Ritger         NV_PRINTF(LEVEL_ERROR, "FLA base addr validation failed for GPU %x\n",
14491739a20eSAndy Ritger                   pGpu->gpuInstance);
14501739a20eSAndy Ritger         return status;
14511739a20eSAndy Ritger     }
14521739a20eSAndy Ritger 
14531739a20eSAndy Ritger     if (IsSLIEnabled(pGpu))
14541739a20eSAndy Ritger     {
14551739a20eSAndy Ritger         NV_PRINTF(LEVEL_ERROR,
14561739a20eSAndy Ritger                   "Operation is unsupported on SLI enabled GPU %x\n",
14571739a20eSAndy Ritger                   pGpu->gpuInstance);
14581739a20eSAndy Ritger         return NV_ERR_NOT_SUPPORTED;
14591739a20eSAndy Ritger     }
14601739a20eSAndy Ritger 
14611739a20eSAndy Ritger     portMemSet(&params, 0, sizeof(params));
14621739a20eSAndy Ritger     params.bGet = NV_FALSE;
14631739a20eSAndy Ritger     params.addr = flaBaseAddr;
14641739a20eSAndy Ritger 
14651739a20eSAndy Ritger     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
14661739a20eSAndy Ritger                                  NV2080_CTRL_CMD_NVLINK_GET_SET_NVSWITCH_FLA_ADDR,
14671739a20eSAndy Ritger                                  (void *)&params, sizeof(params));
14681739a20eSAndy Ritger     if (status != NV_OK)
14691739a20eSAndy Ritger     {
14701739a20eSAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Failed to stash fla base address for GPU %x\n",
14711739a20eSAndy Ritger                   pGpu->gpuInstance);
14721739a20eSAndy Ritger         return status;
14731739a20eSAndy Ritger     }
14741739a20eSAndy Ritger 
14751739a20eSAndy Ritger     NV_PRINTF(LEVEL_INFO, "FLA base addr %llx is assigned to GPU %x\n",
14761739a20eSAndy Ritger               flaBaseAddr, pGpu->gpuInstance);
14771739a20eSAndy Ritger 
14781739a20eSAndy Ritger     return NV_OK;
14791739a20eSAndy Ritger }
14801739a20eSAndy Ritger 
14811739a20eSAndy Ritger /*!
14821739a20eSAndy Ritger  * @brief Synchronize the link masks and vbios defined properties
14831739a20eSAndy Ritger  *        between CPU and GSP-RMs
14841739a20eSAndy Ritger  *
14851739a20eSAndy Ritger  * @param[in]   pGpu           OBJGPU pointer
14861739a20eSAndy Ritger  * @param[in]   pKernelNvlink  KernelNvlink pointer
14871739a20eSAndy Ritger  */
14881739a20eSAndy Ritger NV_STATUS
14891739a20eSAndy Ritger knvlinkSyncLinkMasksAndVbiosInfo_IMPL
14901739a20eSAndy Ritger (
14911739a20eSAndy Ritger     OBJGPU       *pGpu,
14921739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
14931739a20eSAndy Ritger )
14941739a20eSAndy Ritger {
14951739a20eSAndy Ritger     NV_STATUS status = NV_OK;
14961739a20eSAndy Ritger 
14971739a20eSAndy Ritger     NV2080_CTRL_NVLINK_SYNC_LINK_MASKS_AND_VBIOS_INFO_PARAMS params;
14981739a20eSAndy Ritger 
14991739a20eSAndy Ritger     portMemSet(&params, 0, sizeof(params));
15001739a20eSAndy Ritger 
15011739a20eSAndy Ritger     params.discoveredLinks     = pKernelNvlink->discoveredLinks;
15021739a20eSAndy Ritger     params.connectedLinksMask  = pKernelNvlink->connectedLinksMask;
15031739a20eSAndy Ritger     params.bridgeSensableLinks = pKernelNvlink->bridgeSensableLinks;
15041739a20eSAndy Ritger     params.bridgedLinks        = pKernelNvlink->bridgedLinks;
15051739a20eSAndy Ritger 
15061739a20eSAndy Ritger     // Reset timeout to clear any accumulated timeouts from link init
15071739a20eSAndy Ritger     if (IS_GSP_CLIENT(pGpu))
15081739a20eSAndy Ritger     {
15091739a20eSAndy Ritger         threadStateResetTimeout(pGpu);
15101739a20eSAndy Ritger     }
15111739a20eSAndy Ritger 
15121739a20eSAndy Ritger     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
15131739a20eSAndy Ritger                                  NV2080_CTRL_CMD_NVLINK_SYNC_LINK_MASKS_AND_VBIOS_INFO,
15141739a20eSAndy Ritger                                  (void *)&params, sizeof(params));
15151739a20eSAndy Ritger 
15161739a20eSAndy Ritger     pKernelNvlink->vbiosDisabledLinkMask = params.vbiosDisabledLinkMask;
15171739a20eSAndy Ritger     pKernelNvlink->initializedLinks      = params.initializedLinks;
15181739a20eSAndy Ritger     pKernelNvlink->initDisabledLinksMask = params.initDisabledLinksMask;
15191739a20eSAndy Ritger     pKernelNvlink->bEnableSafeModeAtLoad = params.bEnableSafeModeAtLoad;
15201739a20eSAndy Ritger     pKernelNvlink->bEnableTrainingAtLoad = params.bEnableTrainingAtLoad;
15211739a20eSAndy Ritger 
15221739a20eSAndy Ritger     return status;
15231739a20eSAndy Ritger }
15241739a20eSAndy Ritger 
15251739a20eSAndy Ritger /*!
15261739a20eSAndy Ritger  * @brief Update link connection status.
15271739a20eSAndy Ritger  *
15281739a20eSAndy Ritger  * @param[in]   pGpu           OBJGPU pointer
15291739a20eSAndy Ritger  * @param[in]   pKernelNvlink  KernelNvlink pointer
15301739a20eSAndy Ritger  * @param[in]   linkId         Target link Id
15311739a20eSAndy Ritger  */
15321739a20eSAndy Ritger NV_STATUS
15331739a20eSAndy Ritger knvlinkUpdateLinkConnectionStatus_IMPL
15341739a20eSAndy Ritger (
15351739a20eSAndy Ritger     OBJGPU       *pGpu,
15361739a20eSAndy Ritger     KernelNvlink *pKernelNvlink,
15371739a20eSAndy Ritger     NvU32         linkId
15381739a20eSAndy Ritger )
15391739a20eSAndy Ritger {
15401739a20eSAndy Ritger     NV_STATUS status = NV_OK;
15411739a20eSAndy Ritger 
15421739a20eSAndy Ritger     NV2080_CTRL_NVLINK_UPDATE_LINK_CONNECTION_PARAMS params;
15431739a20eSAndy Ritger 
15441739a20eSAndy Ritger     portMemSet(&params, 0, sizeof(params));
15451739a20eSAndy Ritger 
15461739a20eSAndy Ritger     params.linkId = linkId;
15471739a20eSAndy Ritger 
15481739a20eSAndy Ritger #if defined(INCLUDE_NVLINK_LIB)
15491739a20eSAndy Ritger 
15501739a20eSAndy Ritger     params.bConnected = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected;
15511739a20eSAndy Ritger     params.remoteDeviceType = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType;
15521739a20eSAndy Ritger     params.remoteLinkNumber = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.linkNumber;
1553758b4ee8SAndy Ritger     params.remoteChipSid = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.chipSid;
1554758b4ee8SAndy Ritger     params.remoteDomain = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain;
1555758b4ee8SAndy Ritger     params.remoteBus = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus;
1556758b4ee8SAndy Ritger     params.remoteDevice = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device;
1557758b4ee8SAndy Ritger     params.remoteFunction = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.function;
1558758b4ee8SAndy Ritger     params.remotePciDeviceId = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.pciDeviceId;
1559758b4ee8SAndy Ritger     params.laneRxdetStatusMask = pKernelNvlink->nvlinkLinks[linkId].laneRxdetStatusMask;
15601739a20eSAndy Ritger 
15611739a20eSAndy Ritger #endif
15621739a20eSAndy Ritger 
15631739a20eSAndy Ritger     // Reset timeout to clear any accumulated timeouts from link init
15641739a20eSAndy Ritger     if (IS_GSP_CLIENT(pGpu))
15651739a20eSAndy Ritger     {
15661739a20eSAndy Ritger         threadStateResetTimeout(pGpu);
15671739a20eSAndy Ritger     }
15681739a20eSAndy Ritger 
15691739a20eSAndy Ritger     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
15701739a20eSAndy Ritger                                  NV2080_CTRL_CMD_NVLINK_UPDATE_LINK_CONNECTION,
15711739a20eSAndy Ritger                                  (void *)&params, sizeof(params));
15721739a20eSAndy Ritger     if (status != NV_OK)
15731739a20eSAndy Ritger     {
15741739a20eSAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Failed to update Link connection status!\n");
15751739a20eSAndy Ritger         return status;
15761739a20eSAndy Ritger     }
15771739a20eSAndy Ritger 
15781739a20eSAndy Ritger     return NV_OK;
15791739a20eSAndy Ritger }
15801739a20eSAndy Ritger 
15811739a20eSAndy Ritger /*!
158290eb1077SAndy Ritger  * @brief Execute initial steps to Train links for ALI.
158390eb1077SAndy Ritger  *
158490eb1077SAndy Ritger  * @param[in] pGpu           OBJGPU pointer for local GPU
158590eb1077SAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
158690eb1077SAndy Ritger  * @param[in] linkMask       Masks of links to enable
158790eb1077SAndy Ritger  * @param[in] bSync          Input sync boolean
158890eb1077SAndy Ritger  *
158990eb1077SAndy Ritger  */
159090eb1077SAndy Ritger NV_STATUS
159190eb1077SAndy Ritger knvlinkPreTrainLinksToActiveAli_IMPL
159290eb1077SAndy Ritger (
159390eb1077SAndy Ritger     OBJGPU       *pGpu,
159490eb1077SAndy Ritger     KernelNvlink *pKernelNvlink,
159590eb1077SAndy Ritger     NvU32         linkMask,
159690eb1077SAndy Ritger     NvBool        bSync
159790eb1077SAndy Ritger )
159890eb1077SAndy Ritger {
159990eb1077SAndy Ritger     NV_STATUS status = NV_OK;
160090eb1077SAndy Ritger 
160190eb1077SAndy Ritger     NV2080_CTRL_NVLINK_PRE_LINK_TRAIN_ALI_PARAMS params;
160290eb1077SAndy Ritger 
160390eb1077SAndy Ritger     portMemSet(&params, 0, sizeof(params));
160490eb1077SAndy Ritger 
160590eb1077SAndy Ritger     params.linkMask = linkMask;
160690eb1077SAndy Ritger     params.bSync    = bSync;
160790eb1077SAndy Ritger 
160890eb1077SAndy Ritger     // Reset timeout to clear any accumulated timeouts from link init
160990eb1077SAndy Ritger     if (IS_GSP_CLIENT(pGpu))
161090eb1077SAndy Ritger     {
161190eb1077SAndy Ritger         threadStateResetTimeout(pGpu);
161290eb1077SAndy Ritger     }
161390eb1077SAndy Ritger 
161490eb1077SAndy Ritger     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
161590eb1077SAndy Ritger                                  NV2080_CTRL_CMD_NVLINK_PRE_LINK_TRAIN_ALI,
161690eb1077SAndy Ritger                                  (void *)&params, sizeof(params));
161790eb1077SAndy Ritger     if (status != NV_OK)
161890eb1077SAndy Ritger     {
161990eb1077SAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Failed to execute Pre Link Training ALI steps!\n");
162090eb1077SAndy Ritger         return status;
162190eb1077SAndy Ritger     }
162290eb1077SAndy Ritger 
162390eb1077SAndy Ritger     return NV_OK;
162490eb1077SAndy Ritger }
162590eb1077SAndy Ritger 
162690eb1077SAndy Ritger /*!
162790eb1077SAndy Ritger  * @brief Train links to active for ALI.
162890eb1077SAndy Ritger  *
162990eb1077SAndy Ritger  * @param[in] pGpu           OBJGPU pointer for local GPU
163090eb1077SAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
163190eb1077SAndy Ritger  * @param[in] linkMask       Masks of links to enable
163290eb1077SAndy Ritger  * @param[in] bSync          Input sync boolean
163390eb1077SAndy Ritger  *
163490eb1077SAndy Ritger  */
163590eb1077SAndy Ritger NV_STATUS
163690eb1077SAndy Ritger knvlinkTrainLinksToActiveAli_IMPL
163790eb1077SAndy Ritger (
163890eb1077SAndy Ritger     OBJGPU       *pGpu,
163990eb1077SAndy Ritger     KernelNvlink *pKernelNvlink,
164090eb1077SAndy Ritger     NvU32         linkMask,
164190eb1077SAndy Ritger     NvBool        bSync
164290eb1077SAndy Ritger )
164390eb1077SAndy Ritger {
164490eb1077SAndy Ritger     NV_STATUS status = NV_OK;
164590eb1077SAndy Ritger 
164690eb1077SAndy Ritger     NV2080_CTRL_NVLINK_PRE_LINK_TRAIN_ALI_PARAMS params;
164790eb1077SAndy Ritger 
164890eb1077SAndy Ritger     portMemSet(&params, 0, sizeof(params));
164990eb1077SAndy Ritger 
165090eb1077SAndy Ritger     params.linkMask = linkMask;
165190eb1077SAndy Ritger     params.bSync    = bSync;
165290eb1077SAndy Ritger 
165390eb1077SAndy Ritger     // Reset timeout to clear any accumulated timeouts from link init
165490eb1077SAndy Ritger     if (IS_GSP_CLIENT(pGpu))
165590eb1077SAndy Ritger     {
165690eb1077SAndy Ritger         threadStateResetTimeout(pGpu);
165790eb1077SAndy Ritger     }
165890eb1077SAndy Ritger 
165990eb1077SAndy Ritger     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
166090eb1077SAndy Ritger                                  NV2080_CTRL_CMD_NVLINK_LINK_TRAIN_ALI,
166190eb1077SAndy Ritger                                  (void *)&params, sizeof(params));
166290eb1077SAndy Ritger     if (status != NV_OK)
166390eb1077SAndy Ritger     {
166490eb1077SAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Failed to change ALI Links to active!\n");
166590eb1077SAndy Ritger         return status;
166690eb1077SAndy Ritger     }
166790eb1077SAndy Ritger 
166890eb1077SAndy Ritger     return NV_OK;
166990eb1077SAndy Ritger }
167090eb1077SAndy Ritger 
167190eb1077SAndy Ritger /*!
16721739a20eSAndy Ritger  * @brief Update the post Rx Detect link mask.
16731739a20eSAndy Ritger  *
16741739a20eSAndy Ritger  * @param[in] pGpu           OBJGPU pointer for local GPU
16751739a20eSAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
16761739a20eSAndy Ritger  *
16771739a20eSAndy Ritger  */
16781739a20eSAndy Ritger NV_STATUS
16791739a20eSAndy Ritger knvlinkUpdatePostRxDetectLinkMask_IMPL
16801739a20eSAndy Ritger (
16811739a20eSAndy Ritger     OBJGPU       *pGpu,
16821739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
16831739a20eSAndy Ritger )
16841739a20eSAndy Ritger {
16851739a20eSAndy Ritger     NV_STATUS status = NV_OK;
16861739a20eSAndy Ritger 
16871739a20eSAndy Ritger     NV2080_CTRL_NVLINK_GET_LINK_MASK_POST_RX_DET_PARAMS params;
16881739a20eSAndy Ritger 
16891739a20eSAndy Ritger     portMemSet(&params, 0, sizeof(params));
16901739a20eSAndy Ritger 
16911739a20eSAndy Ritger     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
16921739a20eSAndy Ritger                                  NV2080_CTRL_CMD_NVLINK_GET_LINK_MASK_POST_RX_DET,
16931739a20eSAndy Ritger                                  (void *)&params, sizeof(params));
16941739a20eSAndy Ritger     if (status != NV_OK)
16951739a20eSAndy Ritger     {
16961739a20eSAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Failed to update Rx Detect Link mask!\n");
16971739a20eSAndy Ritger         return status;
16981739a20eSAndy Ritger     }
16991739a20eSAndy Ritger 
17001739a20eSAndy Ritger     pKernelNvlink->postRxDetLinkMask = params.postRxDetLinkMask;
17011739a20eSAndy Ritger 
17021739a20eSAndy Ritger     return NV_OK;
17031739a20eSAndy Ritger }
17041739a20eSAndy Ritger 
17051739a20eSAndy Ritger /*!
17061739a20eSAndy Ritger  * @brief Copy over the NVLink devices information from GSP-RM.
17071739a20eSAndy Ritger  *
17081739a20eSAndy Ritger  * @param[in] pGpu          OBJGPU pointer for local GPU
17091739a20eSAndy Ritger  * @param[in] pKernelNvlink KernelNvlink pointer
17101739a20eSAndy Ritger  */
17111739a20eSAndy Ritger NV_STATUS
17121739a20eSAndy Ritger knvlinkCopyNvlinkDeviceInfo_IMPL
17131739a20eSAndy Ritger (
17141739a20eSAndy Ritger     OBJGPU       *pGpu,
17151739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
17161739a20eSAndy Ritger )
17171739a20eSAndy Ritger {
17181739a20eSAndy Ritger     NV_STATUS status = NV_OK;
17191739a20eSAndy Ritger     NvU32     i;
17201739a20eSAndy Ritger 
17211739a20eSAndy Ritger     NV2080_CTRL_NVLINK_GET_NVLINK_DEVICE_INFO_PARAMS nvlinkInfoParams;
17221739a20eSAndy Ritger 
17231739a20eSAndy Ritger     portMemSet(&nvlinkInfoParams, 0, sizeof(nvlinkInfoParams));
17241739a20eSAndy Ritger 
17251739a20eSAndy Ritger     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
17261739a20eSAndy Ritger                                  NV2080_CTRL_CMD_NVLINK_GET_NVLINK_DEVICE_INFO,
17271739a20eSAndy Ritger                                  (void *)&nvlinkInfoParams, sizeof(nvlinkInfoParams));
17281739a20eSAndy Ritger 
17291739a20eSAndy Ritger     if (status == NV_ERR_NOT_SUPPORTED)
17301739a20eSAndy Ritger     {
17311739a20eSAndy Ritger         NV_PRINTF(LEVEL_WARNING, "NVLink is unavailable\n");
17321739a20eSAndy Ritger         return status;
17331739a20eSAndy Ritger     }
17341739a20eSAndy Ritger     else if (status != NV_OK)
17351739a20eSAndy Ritger     {
17361739a20eSAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Failed to retrieve all nvlink device info!\n");
17371739a20eSAndy Ritger         return status;
17381739a20eSAndy Ritger     }
17391739a20eSAndy Ritger 
17401739a20eSAndy Ritger     // Update CPU-RM's NVLink state with the information received from GSP-RM RPC
17411739a20eSAndy Ritger     pKernelNvlink->ioctrlMask       = nvlinkInfoParams.ioctrlMask;
17421739a20eSAndy Ritger     pKernelNvlink->ioctrlNumEntries = nvlinkInfoParams.ioctrlNumEntries;
17431739a20eSAndy Ritger     pKernelNvlink->ioctrlSize       = nvlinkInfoParams.ioctrlSize;
17441739a20eSAndy Ritger     pKernelNvlink->discoveredLinks  = nvlinkInfoParams.discoveredLinks;
17451739a20eSAndy Ritger     pKernelNvlink->ipVerNvlink      = nvlinkInfoParams.ipVerNvlink;
17461739a20eSAndy Ritger 
17471739a20eSAndy Ritger     for (i = 0; i < NVLINK_MAX_LINKS_SW; i++)
17481739a20eSAndy Ritger     {
17491739a20eSAndy Ritger         pKernelNvlink->nvlinkLinks[i].pGpu     = pGpu;
17501739a20eSAndy Ritger         pKernelNvlink->nvlinkLinks[i].bValid   = nvlinkInfoParams.linkInfo[i].bValid;
17511739a20eSAndy Ritger         pKernelNvlink->nvlinkLinks[i].linkId   = nvlinkInfoParams.linkInfo[i].linkId;
17521739a20eSAndy Ritger         pKernelNvlink->nvlinkLinks[i].ioctrlId = nvlinkInfoParams.linkInfo[i].ioctrlId;
17531739a20eSAndy Ritger 
17541739a20eSAndy Ritger         // Copy over the link PLL master and slave relationship for each link
17551739a20eSAndy Ritger         pKernelNvlink->nvlinkLinks[i].pllMasterLinkId = nvlinkInfoParams.linkInfo[i].pllMasterLinkId;
17561739a20eSAndy Ritger         pKernelNvlink->nvlinkLinks[i].pllSlaveLinkId  = nvlinkInfoParams.linkInfo[i].pllSlaveLinkId;
17571739a20eSAndy Ritger 
17581739a20eSAndy Ritger         // Copy over the ip versions for DLPL devices discovered
17591739a20eSAndy Ritger         pKernelNvlink->nvlinkLinks[i].ipVerDlPl = nvlinkInfoParams.linkInfo[i].ipVerDlPl;
17601739a20eSAndy Ritger     }
17611739a20eSAndy Ritger 
17621739a20eSAndy Ritger     return NV_OK;
17631739a20eSAndy Ritger }
17641739a20eSAndy Ritger 
17651739a20eSAndy Ritger /*!
17661739a20eSAndy Ritger  * @brief Copy over the Ioctrl devices information from GSP-RM.
17671739a20eSAndy Ritger  *
17681739a20eSAndy Ritger  * @param[in] pGpu          OBJGPU pointer for local GPU
17691739a20eSAndy Ritger  * @param[in] pKernelNvlink KernelNvlink pointer
17701739a20eSAndy Ritger  */
17711739a20eSAndy Ritger NV_STATUS
17721739a20eSAndy Ritger knvlinkCopyIoctrlDeviceInfo_IMPL
17731739a20eSAndy Ritger (
17741739a20eSAndy Ritger     OBJGPU       *pGpu,
17751739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
17761739a20eSAndy Ritger )
17771739a20eSAndy Ritger {
17781739a20eSAndy Ritger     KernelIoctrl *pKernelIoctrl = NULL;
17791739a20eSAndy Ritger     NV_STATUS     status        = NV_OK;
17801739a20eSAndy Ritger     NvU32         ioctrlIdx;
17811739a20eSAndy Ritger 
17821739a20eSAndy Ritger     NV2080_CTRL_NVLINK_GET_IOCTRL_DEVICE_INFO_PARAMS ioctrlInfoParams;
17831739a20eSAndy Ritger 
17841739a20eSAndy Ritger     // Query the IOCTRL information for each of the IOCTRLs discovered
17851739a20eSAndy Ritger     FOR_EACH_INDEX_IN_MASK(32, ioctrlIdx, pKernelNvlink->ioctrlMask)
17861739a20eSAndy Ritger     {
17871739a20eSAndy Ritger         portMemSet(&ioctrlInfoParams, 0, sizeof(ioctrlInfoParams));
17881739a20eSAndy Ritger 
17891739a20eSAndy Ritger         ioctrlInfoParams.ioctrlIdx = ioctrlIdx;
17901739a20eSAndy Ritger 
17911739a20eSAndy Ritger         status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
17921739a20eSAndy Ritger                                      NV2080_CTRL_CMD_NVLINK_GET_IOCTRL_DEVICE_INFO,
17931739a20eSAndy Ritger                                      (void *)&ioctrlInfoParams, sizeof(ioctrlInfoParams));
17941739a20eSAndy Ritger 
17951739a20eSAndy Ritger         if (status == NV_ERR_NOT_SUPPORTED)
17961739a20eSAndy Ritger         {
17971739a20eSAndy Ritger             NV_PRINTF(LEVEL_WARNING, "NVLink is unavailable\n");
17981739a20eSAndy Ritger             return status;
17991739a20eSAndy Ritger         }
18001739a20eSAndy Ritger         else if (status != NV_OK)
18011739a20eSAndy Ritger         {
18021739a20eSAndy Ritger             NV_PRINTF(LEVEL_ERROR, "Failed to retrieve device info for IOCTRL %d!\n", ioctrlIdx);
18031739a20eSAndy Ritger             return status;
18041739a20eSAndy Ritger         }
18051739a20eSAndy Ritger 
18061739a20eSAndy Ritger         pKernelIoctrl = KNVLINK_GET_IOCTRL(pKernelNvlink, ioctrlIdx);
18071739a20eSAndy Ritger 
18081739a20eSAndy Ritger         // Update CPU-RM's NVLink state with the information received from GSP-RM RPC
18091739a20eSAndy Ritger         pKernelIoctrl->PublicId              = ioctrlInfoParams.PublicId;
18101739a20eSAndy Ritger         pKernelIoctrl->localDiscoveredLinks  = ioctrlInfoParams.localDiscoveredLinks;
18111739a20eSAndy Ritger         pKernelIoctrl->localGlobalLinkOffset = ioctrlInfoParams.localGlobalLinkOffset;
18121739a20eSAndy Ritger         pKernelIoctrl->ioctrlDiscoverySize   = ioctrlInfoParams.ioctrlDiscoverySize;
18131739a20eSAndy Ritger         pKernelIoctrl->numDevices            = ioctrlInfoParams.numDevices;
18141739a20eSAndy Ritger 
18151739a20eSAndy Ritger         // Copy over the ip versions for the ioctrl and minion devices discovered
18161739a20eSAndy Ritger         pKernelIoctrl->ipVerIoctrl = ioctrlInfoParams.ipRevisions.ipVerIoctrl;
18171739a20eSAndy Ritger         pKernelIoctrl->ipVerMinion = ioctrlInfoParams.ipRevisions.ipVerMinion;
18181739a20eSAndy Ritger 
18191739a20eSAndy Ritger         if (pKernelIoctrl->ipVerMinion == 0)
18201739a20eSAndy Ritger         {
18211739a20eSAndy Ritger             pKernelIoctrl->setProperty(pKernelIoctrl, PDB_PROP_KIOCTRL_MINION_AVAILABLE, NV_FALSE);
18221739a20eSAndy Ritger         }
18231739a20eSAndy Ritger     }
18241739a20eSAndy Ritger     FOR_EACH_INDEX_IN_MASK_END;
18251739a20eSAndy Ritger 
18261739a20eSAndy Ritger     return NV_OK;
18271739a20eSAndy Ritger }
18281739a20eSAndy Ritger 
18291739a20eSAndy Ritger /**
18301739a20eSAndy Ritger  * @brief Setup topology information for the forced nvlink configurations
18311739a20eSAndy Ritger  *
18321739a20eSAndy Ritger  * @param[in] pGpu          OBJGPU pointer for local GPU
18331739a20eSAndy Ritger  * @param[in] pKernelNvlink KernelNvlink pointer
18341739a20eSAndy Ritger  */
18351739a20eSAndy Ritger NV_STATUS
18361739a20eSAndy Ritger knvlinkSetupTopologyForForcedConfig_IMPL
18371739a20eSAndy Ritger (
18381739a20eSAndy Ritger     OBJGPU       *pGpu,
18391739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
18401739a20eSAndy Ritger )
18411739a20eSAndy Ritger {
18421739a20eSAndy Ritger     NV_STATUS status  = NV_OK;
18431739a20eSAndy Ritger     NvU32     i, physLink;
18441739a20eSAndy Ritger 
18451739a20eSAndy Ritger     // Start with all links disabled and no forced config in effect
18461739a20eSAndy Ritger     pKernelNvlink->bRegistryLinkOverride = NV_TRUE;
18471739a20eSAndy Ritger     pKernelNvlink->registryLinkMask      = 0;
18481739a20eSAndy Ritger     pKernelNvlink->bChiplibConfig        = NV_FALSE;
18491739a20eSAndy Ritger 
18501739a20eSAndy Ritger     for (i = 0; i < NVLINK_MAX_LINKS_SW; i++)
18511739a20eSAndy Ritger     {
18521739a20eSAndy Ritger         // Filter against the links discovered from IOCTRL
18531739a20eSAndy Ritger         if (!(pKernelNvlink->discoveredLinks & NVBIT(i)))
18541739a20eSAndy Ritger             continue;
18551739a20eSAndy Ritger 
18561739a20eSAndy Ritger         // The physical link is guaranteed valid in all cases
18571739a20eSAndy Ritger         physLink = DRF_VAL(_NVLINK, _ARCH_CONNECTION, _PHYSICAL_LINK, pKernelNvlink->pLinkConnection[i]);
18581739a20eSAndy Ritger 
18591739a20eSAndy Ritger         // Update link tracking
18601739a20eSAndy Ritger         if (DRF_VAL(_NVLINK, _ARCH_CONNECTION, _ENABLED, pKernelNvlink->pLinkConnection[i]))
18611739a20eSAndy Ritger         {
18621739a20eSAndy Ritger             NV_PRINTF(LEVEL_INFO,
18631739a20eSAndy Ritger                       "ARCH_CONNECTION info from chiplib: ENABLED Logical link %d (Physical "
18641739a20eSAndy Ritger                       "link %d) = 0x%X\n", i, physLink,
18651739a20eSAndy Ritger                       pKernelNvlink->pLinkConnection[i]);
18661739a20eSAndy Ritger 
18671739a20eSAndy Ritger             //
18681739a20eSAndy Ritger             // This "link" should be ENABLED. We use the physical link since RM only deals with
18691739a20eSAndy Ritger             // physical links.
18701739a20eSAndy Ritger             //
18711739a20eSAndy Ritger             pKernelNvlink->registryLinkMask |= NVBIT(physLink);
18721739a20eSAndy Ritger 
18731739a20eSAndy Ritger             // Config is forced (at least one link requested)
18741739a20eSAndy Ritger             pKernelNvlink->bChiplibConfig = NV_TRUE;
18751739a20eSAndy Ritger         }
18761739a20eSAndy Ritger         else
18771739a20eSAndy Ritger         {
18781739a20eSAndy Ritger             NV_PRINTF(LEVEL_INFO,
18791739a20eSAndy Ritger                       "ARCH_CONNECTION info from chiplib: DISABLED Logical link %d (Physical "
18801739a20eSAndy Ritger                       "link %d) = 0x%X\n", i, physLink,
18811739a20eSAndy Ritger                       pKernelNvlink->pLinkConnection[i]);
18821739a20eSAndy Ritger         }
18831739a20eSAndy Ritger 
18841739a20eSAndy Ritger         // Accumulate any PEER links
18851739a20eSAndy Ritger         if (DRF_VAL(_NVLINK, _ARCH_CONNECTION, _PEER_MASK, pKernelNvlink->pLinkConnection[i]))
18861739a20eSAndy Ritger         {
18871739a20eSAndy Ritger #if defined(INCLUDE_NVLINK_LIB)
18881739a20eSAndy Ritger             // Ensure reginit has the info it needs for the remote side
18891739a20eSAndy Ritger             pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bConnected = NV_TRUE;
18901739a20eSAndy Ritger             pKernelNvlink->nvlinkLinks[i].remoteEndInfo.deviceType =
18911739a20eSAndy Ritger                                                     NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_GPU;
18921739a20eSAndy Ritger 
18931739a20eSAndy Ritger #endif
18941739a20eSAndy Ritger         }
18951739a20eSAndy Ritger 
18961739a20eSAndy Ritger         // Accumulate any CPU links
18971739a20eSAndy Ritger         if (DRF_VAL(_NVLINK, _ARCH_CONNECTION, _CPU, pKernelNvlink->pLinkConnection[i]))
18981739a20eSAndy Ritger         {
18991739a20eSAndy Ritger #if defined(INCLUDE_NVLINK_LIB)
19001739a20eSAndy Ritger             // Ensure reginit has the info it needs for the remote side
19011739a20eSAndy Ritger             pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bConnected = NV_TRUE;
19021739a20eSAndy Ritger             pKernelNvlink->nvlinkLinks[i].remoteEndInfo.deviceType = pKernelNvlink->forcedSysmemDeviceType;
19031739a20eSAndy Ritger #endif
19041739a20eSAndy Ritger         }
19051739a20eSAndy Ritger 
19061739a20eSAndy Ritger         // RPC into GSP-RM to update the link remote connection status
19071739a20eSAndy Ritger         status = knvlinkUpdateLinkConnectionStatus(pGpu, pKernelNvlink, i);
19081739a20eSAndy Ritger         if (status != NV_OK)
19091739a20eSAndy Ritger         {
19101739a20eSAndy Ritger             return status;
19111739a20eSAndy Ritger         }
19121739a20eSAndy Ritger     }
19131739a20eSAndy Ritger 
19141739a20eSAndy Ritger     // Update enabledLinks mask with the mask of forced link configurations
19151739a20eSAndy Ritger     pKernelNvlink->enabledLinks = pKernelNvlink->discoveredLinks & pKernelNvlink->registryLinkMask;
19161739a20eSAndy Ritger 
19171739a20eSAndy Ritger     return NV_OK;
19181739a20eSAndy Ritger }
19191739a20eSAndy Ritger 
19201739a20eSAndy Ritger /*!
19211739a20eSAndy Ritger  * @brief Sync the lane shutdown properties with GSP-RM
19221739a20eSAndy Ritger  *
19231739a20eSAndy Ritger  * @param[in] pGpu          OBJGPU pointer
19241739a20eSAndy Ritger  * @param[in] pKernelNvlink KernelNvlink pointer
19251739a20eSAndy Ritger  */
19261739a20eSAndy Ritger NV_STATUS
19271739a20eSAndy Ritger knvlinkSyncLaneShutdownProps_IMPL
19281739a20eSAndy Ritger (
19291739a20eSAndy Ritger     OBJGPU       *pGpu,
19301739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
19311739a20eSAndy Ritger )
19321739a20eSAndy Ritger {
19331739a20eSAndy Ritger     NV_STATUS status = NV_OK;
19341739a20eSAndy Ritger 
19351739a20eSAndy Ritger     NV2080_CTRL_NVLINK_SYNC_NVLINK_SHUTDOWN_PROPS_PARAMS params;
19361739a20eSAndy Ritger 
19371739a20eSAndy Ritger     portMemSet(&params, 0, sizeof(params));
19381739a20eSAndy Ritger 
19391739a20eSAndy Ritger     params.bLaneShutdownEnabled  =
19401739a20eSAndy Ritger         pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED);
19411739a20eSAndy Ritger     params.bLaneShutdownOnUnload =
19421739a20eSAndy Ritger         pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_LANE_SHUTDOWN_ON_UNLOAD);
19431739a20eSAndy Ritger 
19441739a20eSAndy Ritger     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
19451739a20eSAndy Ritger                                  NV2080_CTRL_CMD_NVLINK_SYNC_NVLINK_SHUTDOWN_PROPS,
19461739a20eSAndy Ritger                                  (void *)&params, sizeof(params));
19471739a20eSAndy Ritger     if (status != NV_OK)
19481739a20eSAndy Ritger     {
19491739a20eSAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Failed to sync NVLink shutdown properties with GSP!\n");
19501739a20eSAndy Ritger         return status;
19511739a20eSAndy Ritger     }
19521739a20eSAndy Ritger 
19531739a20eSAndy Ritger     return NV_OK;
19541739a20eSAndy Ritger }
19551739a20eSAndy Ritger 
19561739a20eSAndy Ritger /*!
195790eb1077SAndy Ritger  * @brief   Get the number of active links allowed per IOCTRL
195890eb1077SAndy Ritger  *
195990eb1077SAndy Ritger  * @param[in] pGpu           OBJGPU pointer
196090eb1077SAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
196190eb1077SAndy Ritger  *
196290eb1077SAndy Ritger  * @returns On success, returns the number of active links per IOCTRL.
196390eb1077SAndy Ritger  *          On failure, returns 0.
196490eb1077SAndy Ritger  */
196590eb1077SAndy Ritger NvU32
196690eb1077SAndy Ritger knvlinkGetNumActiveLinksPerIoctrl_IMPL
196790eb1077SAndy Ritger (
196890eb1077SAndy Ritger     OBJGPU       *pGpu,
196990eb1077SAndy Ritger     KernelNvlink *pKernelNvlink
197090eb1077SAndy Ritger )
197190eb1077SAndy Ritger {
197290eb1077SAndy Ritger     NV_STATUS status;
197390eb1077SAndy Ritger     NV2080_CTRL_INTERNAL_NVLINK_GET_NUM_ACTIVE_LINK_PER_IOCTRL_PARAMS params;
197490eb1077SAndy Ritger     portMemSet(&params, 0, sizeof(params));
197590eb1077SAndy Ritger     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
197690eb1077SAndy Ritger                                  NV2080_CTRL_INTERNAL_NVLINK_GET_NUM_ACTIVE_LINK_PER_IOCTRL,
197790eb1077SAndy Ritger                                  (void *)&params, sizeof(params));
197890eb1077SAndy Ritger     if (status != NV_OK)
197990eb1077SAndy Ritger     {
198090eb1077SAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Failed to get the number of active links per IOCTRL\n");
198190eb1077SAndy Ritger         return 0;
198290eb1077SAndy Ritger     }
198390eb1077SAndy Ritger     return params.numActiveLinksPerIoctrl;
198490eb1077SAndy Ritger }
198590eb1077SAndy Ritger 
198690eb1077SAndy Ritger /*!
198790eb1077SAndy Ritger  * @brief   Get the number of total links  per IOCTRL
198890eb1077SAndy Ritger  *
198990eb1077SAndy Ritger  * @param[in] pGpu           OBJGPU pointer
199090eb1077SAndy Ritger  * @param[in] pKernelNvlink  KernelNvlink pointer
199190eb1077SAndy Ritger  *
199290eb1077SAndy Ritger  * @returns On success, returns the number of total links per IOCTRL.
199390eb1077SAndy Ritger  *          On failure, returns 0.
199490eb1077SAndy Ritger  */
199590eb1077SAndy Ritger NvU32
199690eb1077SAndy Ritger knvlinkGetTotalNumLinksPerIoctrl_IMPL
199790eb1077SAndy Ritger (
199890eb1077SAndy Ritger     OBJGPU       *pGpu,
199990eb1077SAndy Ritger     KernelNvlink *pKernelNvlink
200090eb1077SAndy Ritger )
200190eb1077SAndy Ritger {
200290eb1077SAndy Ritger     NV_STATUS status;
200390eb1077SAndy Ritger     NV2080_CTRL_INTERNAL_NVLINK_GET_TOTAL_NUM_LINK_PER_IOCTRL_PARAMS params;
200490eb1077SAndy Ritger     portMemSet(&params, 0, sizeof(params));
200590eb1077SAndy Ritger     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
200690eb1077SAndy Ritger                                  NV2080_CTRL_INTERNAL_NVLINK_GET_TOTAL_NUM_LINK_PER_IOCTRL,
200790eb1077SAndy Ritger                                  (void *)&params, sizeof(params));
200890eb1077SAndy Ritger     if (status != NV_OK)
200990eb1077SAndy Ritger     {
201090eb1077SAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Failed to get the total number of links per IOCTRL\n");
201190eb1077SAndy Ritger         return 0;
201290eb1077SAndy Ritger     }
201390eb1077SAndy Ritger     return params.numLinksPerIoctrl;
201490eb1077SAndy Ritger }
201590eb1077SAndy Ritger 
20161739a20eSAndy Ritger /**
20171739a20eSAndy Ritger  * @brief Process the mask of init disabled links
20181739a20eSAndy Ritger  *
20191739a20eSAndy Ritger  * @param[in] pGpu          OBJGPU pointer
20201739a20eSAndy Ritger  * @param[in] pKernelNvlink KernelNvlink pointer
20211739a20eSAndy Ritger  */
20221739a20eSAndy Ritger NV_STATUS
20231739a20eSAndy Ritger knvlinkProcessInitDisabledLinks_IMPL
20241739a20eSAndy Ritger (
20251739a20eSAndy Ritger     OBJGPU       *pGpu,
20261739a20eSAndy Ritger     KernelNvlink *pKernelNvlink
20271739a20eSAndy Ritger )
20281739a20eSAndy Ritger {
20291739a20eSAndy Ritger     NvU32     mask                 = 0;
20301739a20eSAndy Ritger     NvBool    bSkipHwNvlinkDisable = 0;
20311739a20eSAndy Ritger     NV_STATUS status               = NV_OK;
20321739a20eSAndy Ritger 
20331739a20eSAndy Ritger     NV2080_CTRL_NVLINK_PROCESS_INIT_DISABLED_LINKS_PARAMS params;
20341739a20eSAndy Ritger 
20351739a20eSAndy Ritger     status = gpumgrGetGpuInitDisabledNvlinks(pGpu->gpuId, &mask, &bSkipHwNvlinkDisable);
20361739a20eSAndy Ritger     if (status != NV_OK)
20371739a20eSAndy Ritger     {
20381739a20eSAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Failed to get init disabled links from gpumgr\n");
20391739a20eSAndy Ritger         return status;
20401739a20eSAndy Ritger     }
20411739a20eSAndy Ritger 
20421739a20eSAndy Ritger     portMemSet(&params, 0, sizeof(params));
20431739a20eSAndy Ritger 
20441739a20eSAndy Ritger     params.initDisabledLinksMask = mask;
20451739a20eSAndy Ritger     params.bSkipHwNvlinkDisable  = bSkipHwNvlinkDisable;
20461739a20eSAndy Ritger 
20471739a20eSAndy Ritger     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
20481739a20eSAndy Ritger                                  NV2080_CTRL_CMD_NVLINK_PROCESS_INIT_DISABLED_LINKS,
20491739a20eSAndy Ritger                                  (void *)&params, sizeof(params));
20501739a20eSAndy Ritger     if (status != NV_OK)
20511739a20eSAndy Ritger     {
20521739a20eSAndy Ritger         NV_PRINTF(LEVEL_ERROR, "Failed to process init disabled links in GSP\n");
20531739a20eSAndy Ritger         return status;
20541739a20eSAndy Ritger     }
20551739a20eSAndy Ritger 
20561739a20eSAndy Ritger     pKernelNvlink->initDisabledLinksMask = params.initDisabledLinksMask;
20571739a20eSAndy Ritger 
20581739a20eSAndy Ritger     return NV_OK;
20591739a20eSAndy Ritger }
20601739a20eSAndy Ritger 
20611739a20eSAndy Ritger // Grab GPU locks before RPCing into GSP-RM for NVLink RPCs
20621739a20eSAndy Ritger NV_STATUS
20631739a20eSAndy Ritger knvlinkExecGspRmRpc_IMPL
20641739a20eSAndy Ritger (
20651739a20eSAndy Ritger     OBJGPU       *pGpu,
20661739a20eSAndy Ritger     KernelNvlink *pKernelNvlink,
20671739a20eSAndy Ritger     NvU32         cmd,
20681739a20eSAndy Ritger     void         *paramAddr,
20691739a20eSAndy Ritger     NvU32         paramSize
20701739a20eSAndy Ritger )
20711739a20eSAndy Ritger {
20721739a20eSAndy Ritger     NvU32     gpuMaskRelease = 0;
20731739a20eSAndy Ritger     NvU32     gpuMaskInitial = rmGpuLocksGetOwnedMask();
20741739a20eSAndy Ritger     NvU32     gpuMask        = gpuMaskInitial | NVBIT(pGpu->gpuInstance);
20751739a20eSAndy Ritger     NV_STATUS status         = NV_OK;
20761739a20eSAndy Ritger 
20771739a20eSAndy Ritger     if (IS_GSP_CLIENT(pGpu))
20781739a20eSAndy Ritger     {
20791739a20eSAndy Ritger         if (!rmGpuGroupLockIsOwner(pGpu->gpuInstance, GPU_LOCK_GRP_MASK, &gpuMask))
20801739a20eSAndy Ritger         {
20811739a20eSAndy Ritger             status = rmGpuGroupLockAcquire(pGpu->gpuInstance,
20821739a20eSAndy Ritger                                            GPU_LOCK_GRP_MASK,
20831739a20eSAndy Ritger                                            GPU_LOCK_FLAGS_SAFE_LOCK_UPGRADE,
20841739a20eSAndy Ritger                                            RM_LOCK_MODULES_NVLINK,
20851739a20eSAndy Ritger                                            &gpuMask);
20861739a20eSAndy Ritger             if (status != NV_OK)
20871739a20eSAndy Ritger             {
20881739a20eSAndy Ritger                 NV_PRINTF(LEVEL_ERROR, "Failed to acquire locks for gpumask 0x%x\n", gpuMask);
20891739a20eSAndy Ritger                 return status;
20901739a20eSAndy Ritger             }
20911739a20eSAndy Ritger 
20921739a20eSAndy Ritger             gpuMaskRelease = (gpuMask & (~gpuMaskInitial));
20931739a20eSAndy Ritger         }
20941739a20eSAndy Ritger     }
20951739a20eSAndy Ritger 
20961739a20eSAndy Ritger     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
20971739a20eSAndy Ritger     status = pRmApi->Control(pRmApi,
20981739a20eSAndy Ritger                              pGpu->hInternalClient,
20991739a20eSAndy Ritger                              pGpu->hInternalSubdevice,
21001739a20eSAndy Ritger                              cmd, paramAddr, paramSize);
21011739a20eSAndy Ritger     if (gpuMaskRelease)
21021739a20eSAndy Ritger     {
21031739a20eSAndy Ritger         rmGpuGroupLockRelease(gpuMaskRelease, GPUS_LOCK_FLAGS_NONE);
21041739a20eSAndy Ritger     }
21051739a20eSAndy Ritger 
21061739a20eSAndy Ritger     return status;
21071739a20eSAndy Ritger }
21081739a20eSAndy Ritger 
21091739a20eSAndy Ritger void
21101739a20eSAndy Ritger knvlinkUtoa(NvU8 *str, NvU64 length, NvU64 val)
21111739a20eSAndy Ritger {
21121739a20eSAndy Ritger     NvU8  temp[NV2080_GPU_MAX_NAME_STRING_LENGTH];
21131739a20eSAndy Ritger     NvU8 *ptr = temp;
21141739a20eSAndy Ritger     NvU64 i = 0;
21151739a20eSAndy Ritger 
21161739a20eSAndy Ritger     NV_ASSERT(str != NULL);
21171739a20eSAndy Ritger 
21181739a20eSAndy Ritger     do
21191739a20eSAndy Ritger     {
21201739a20eSAndy Ritger         i   = val % 10;
21211739a20eSAndy Ritger         val = val / 10;
21221739a20eSAndy Ritger         *ptr++ = (NvU8)(i + '0');
21231739a20eSAndy Ritger     } while(val);
21241739a20eSAndy Ritger 
21251739a20eSAndy Ritger     NV_ASSERT(length > (NvU64) (ptr - temp));
21261739a20eSAndy Ritger 
21271739a20eSAndy Ritger     while (ptr > temp)
21281739a20eSAndy Ritger         *str++ = *--ptr;
21291739a20eSAndy Ritger 
21301739a20eSAndy Ritger     *str = '\0';
21311739a20eSAndy Ritger }
2132