1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #define NVOC_KERNEL_NVLINK_H_PRIVATE_ACCESS_ALLOWED
25 
26 #include "os/os.h"
27 #include "core/hal.h"
28 #include "core/info_block.h"
29 #include "core/locks.h"
30 #include "core/thread_state.h"
31 #include "gpu/gpu.h"
32 
33 #include "kernel/gpu/nvlink/kernel_nvlink.h"
34 #include "kernel/gpu/nvlink/kernel_ioctrl.h"
35 
36 #include "nverror.h"
37 
38 #if defined(INCLUDE_NVLINK_LIB)
39 #include "nvlink_os.h"
40 #endif
41 
42 static void  _knvlinkRetrainLinkPrologue(OBJGPU *, KernelNvlink *, NvU32);
43 
44 #if defined(INCLUDE_NVLINK_LIB)
45 
46 static NV_STATUS _knvlinkActivateDiscoveredConns(OBJGPU *, KernelNvlink *, NvBool);
47 static NV_STATUS _knvlinkActivateDiscoveredP2pConn(OBJGPU *, KernelNvlink *, NvU32);
48 static NV_STATUS _knvlinkActivateDiscoveredSwitchConn(OBJGPU *, KernelNvlink *, NvU32);
49 static NV_STATUS _knvlinkActivateDiscoveredSysmemConn(OBJGPU *, KernelNvlink *, NvU32);
50 static NV_STATUS _knvlinkEnterSleep(OBJGPU *, KernelNvlink *, NvU32);
51 static NV_STATUS _knvlinkExitSleep(OBJGPU *, KernelNvlink *, NvU32);
52 static NvBool    _knvlinkUpdateSwitchLinkMasks(OBJGPU *, KernelNvlink *, NvU32);
53 static NvBool    _knvlinkUpdateSwitchLinkMasksGpuDegraded(OBJGPU *, KernelNvlink *);
54 static void      _knvlinkUpdatePeerConfigs(OBJGPU *, KernelNvlink *);
55 static void      _knvlinkPrintTopologySummary(OBJGPU *, KernelNvlink *);
56 
57 #endif
58 
59 /*!
60  * @brief Get the nvlink connections for the GPU.
61  *        This function calls into the core library to trigger topology discovery
62  *        on the set of links that have been registered with the core library.
63  *
64  * @param[in]  pGpu          OBJGPU pointer
65  * @param[in]  pKernelNvlink KernelNvlink pointer
66  *
67  * @return  NV_OK on success
68  */
69 NV_STATUS
70 knvlinkCoreGetRemoteDeviceInfo_IMPL
71 (
72     OBJGPU       *pGpu,
73     KernelNvlink *pKernelNvlink
74 )
75 {
76     NV_STATUS status = NV_OK;
77 
78 #if defined(INCLUDE_NVLINK_LIB)
79 
80     OBJSYS *pSys                  = SYS_GET_INSTANCE();
81     NvU32   flags                 = NVLINK_STATE_CHANGE_SYNC;
82     NvBool  bNvswitchProxyPresent = NV_FALSE;
83     NvBool  bUpdateConnStatus     = NV_FALSE;
84     NvBool  bCheckDegradedMode    = NV_FALSE;
85     nvlink_conn_info conn_info    = {0};
86     NvU32   linkId;
87     NvU32     numActiveLinksPerIoctrl = 0;
88     NvU32     numLinksPerIoctrl       = 0;
89 
90     //
91     // Topology discovery should NOT be triggered in RTD3/FGC6 exit path if L2 is
92     // supported. The remote information will be restored when RM state is restored
93     //
94     if (!knvlinkPoweredUpForD3_HAL(pGpu, pKernelNvlink))
95     {
96         //
97         // Optimization: Check for nvlink proxy only when system fabric is externally
98         // managed. This would avoid RPCs in non-nvswitch cases.
99         //
100         if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED))
101         {
102             bNvswitchProxyPresent = knvlinkIsNvswitchProxyPresent(pGpu, pKernelNvlink);
103         }
104 
105         //
106         //  UpdatePostRxDetect has to happen only if there is a disconnected link
107         //
108         if (pKernelNvlink->disconnectedLinkMask && pKernelNvlink->bEnableAli)
109         {
110             // Update the post Rx Det link Mask for the GPU
111             knvlinkUpdatePostRxDetectLinkMask(pGpu, pKernelNvlink);
112         }
113 
114         if (pKernelNvlink->ipVerNvlink >= NVLINK_VERSION_40                     &&
115             !bNvswitchProxyPresent                                              &&
116             !pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED) &&
117             pKernelNvlink->pNvlinkDev != NULL                                   &&
118             !pKernelNvlink->bFloorSwept)
119         {
120             numLinksPerIoctrl = knvlinkGetTotalNumLinksPerIoctrl(pGpu, pKernelNvlink);
121             status = knvlinkFloorSweep(pGpu, pKernelNvlink,
122                                     numLinksPerIoctrl, &numActiveLinksPerIoctrl);
123 
124             if (status != NV_OK)
125             {
126                 NV_PRINTF(LEVEL_ERROR, "Failed to floorsweep valid nvlink config!\n");
127                 return NV_ERR_NOT_READY;
128             }
129         }
130 
131         // We only need to look at links that are still considered disconnected
132         FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->disconnectedLinkMask)
133         {
134             //
135             // If we are using ALI training, make sure the
136             // disconneted link is a valid link that is progressing
137             // passed RxDet
138             //
139             if (pKernelNvlink->bEnableAli &&
140                 !(pKernelNvlink->postRxDetLinkMask & NVBIT(linkId)))
141             {
142                 continue;
143             }
144 
145             bUpdateConnStatus = NV_FALSE;
146 
147             if (pKernelNvlink->nvlinkLinks[linkId].core_link)
148             {
149                 // Call the core library to get the remote end information
150                 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED))
151                 {
152                     if (gpuFabricProbeIsSupported(pGpu))
153                     {
154                         //
155                         // If FM doesn't talk to NVLink driver using control calls
156                         // (i.e. uses NVLink inband comm instread) such as
157                         // IOCTL CTRL_NVLINK_DISCOVER_INTRANODE_CONNS,
158                         // discover remote information explicitly.
159                         //
160                         nvlink_lib_discover_and_get_remote_conn_info(
161                             pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, flags);
162                     }
163                     else
164                     {
165                         nvlink_lib_get_remote_conn_info(
166                             pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info);
167                     }
168 
169                     //
170                     // nvlink_lib_get_remote_conn_info could fail to return connection info if
171                     // it runs on a shared-NVSwitch virtualization model (HGX-2) where GPU nodes
172                     // can't see NVSwitches. In that case, examine the NVLink scratch register
173                     // for connectivity information.
174                     //
175                     if (!conn_info.bConnected &&
176                         (bNvswitchProxyPresent ||
177                         GPU_IS_NVSWITCH_DETECTED(pGpu)))
178                     {
179                         conn_info.bConnected  = NV_TRUE;
180                         conn_info.deviceType  = NVLINK_DEVICE_TYPE_NVSWITCH;
181                         conn_info.pciDeviceId = NV_U32_MAX;
182                         conn_info.domain      = NV_U32_MAX;
183                         conn_info.bus         = NV_U16_MAX;
184                         conn_info.device      = NV_U16_MAX;
185                         conn_info.function    = NV_U16_MAX;
186                     }
187 
188                     //
189                     // New connection is discovered from core library. But we don't know if this
190                     // connection was shutdown or reset by fabric manager while enabling degraded
191                     // mode. So, we have to check for degraded mode.
192                     //
193                     if (conn_info.bConnected)
194                     {
195                         bCheckDegradedMode = NV_TRUE;
196                     }
197                 }
198                 else
199                 {
200                     // Aynchronous link initialization for IP 2.2
201                     if (pKernelNvlink->ipVerNvlink == NVLINK_VERSION_22)
202                     {
203                         flags = NVLINK_STATE_CHANGE_ASYNC;
204                     }
205 
206                     nvlink_lib_discover_and_get_remote_conn_info(
207                             pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, flags);
208                 }
209 
210                 // RPC into GSP-RM to update the link connected status only if its required
211                 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected != conn_info.bConnected)
212                     bUpdateConnStatus = NV_TRUE;
213 
214                 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected = conn_info.bConnected;
215 
216                 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected)
217                 {
218                     // Update the RM cache for the remote device information for the link
219                     pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain      = conn_info.domain;
220                     pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus         = conn_info.bus;
221                     pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device      = conn_info.device;
222                     pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.function    = conn_info.function;
223                     pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.pciDeviceId = conn_info.pciDeviceId;
224                     pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType  = conn_info.deviceType;
225                     pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.linkNumber  = conn_info.linkNumber;
226                     pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.chipSid     = conn_info.chipSid;
227 
228                     nvlink_memcpy(pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.devUuid,
229                                   conn_info.devUuid,
230                                   NV_UUID_LEN);
231                 }
232 
233                 if (bUpdateConnStatus)
234                 {
235                     // RPC into GSP-RM to update the link remote connection status for pGpu
236                     status = knvlinkUpdateLinkConnectionStatus(pGpu, pKernelNvlink, linkId);
237                     if (status != NV_OK)
238                     {
239                         return status;
240                     }
241                 }
242             }
243         }
244         FOR_EACH_INDEX_IN_MASK_END;
245     }
246     else
247     {
248         NV_PRINTF(LEVEL_INFO,
249                   "L2 supported. Skip topology discovery on GPU%d in RTD3/FGC6 exit\n",
250                   pGpu->gpuInstance);
251     }
252 
253     //
254     // Update the RM cache for the discovered connections and then activate
255     // those connections. This includes all the post-topology settings like
256     // buffer-ready and interrupt enables
257     //
258     status = _knvlinkActivateDiscoveredConns(pGpu, pKernelNvlink, bCheckDegradedMode);
259     if (status != NV_OK)
260     {
261         NV_PRINTF(LEVEL_INFO,
262                   "Failed to activate the discovered connections on GPU%d\n",
263                   pGpu->gpuInstance);
264     }
265 
266 #endif
267 
268     return status;
269 }
270 
271 /*!
272  * @brief Train all the connected sysmem links associated with the device
273  *        to active through the nvlink core library.
274  *
275  * @param[in]  pGpu          OBJGPU pointer
276  * @param[in]  pKernelNvlink KernelNvlink pointer
277  *
278  * @return  NV_OK on success
279  */
280 NV_STATUS
281 knvlinkTrainSysmemLinksToActive_IMPL
282 (
283     OBJGPU       *pGpu,
284     KernelNvlink *pKernelNvlink
285 )
286 {
287 #if defined(INCLUDE_NVLINK_LIB)
288 
289     OBJSYS *pSys = SYS_GET_INSTANCE();
290     NvU32   i;
291 
292     // On Fmodel, sysmem link training is not supported
293     if (IS_FMODEL(pGpu))
294     {
295         NV_PRINTF(LEVEL_INFO,
296                   "Skipping unsupported sysmem link training on GPU%d\n",
297                   pGpu->gpuInstance);
298 
299         return NV_OK;
300     }
301 
302     // Return if link training is force disabled through regkey
303     if (pKernelNvlink->bSkipLinkTraining)
304     {
305         NV_PRINTF(LEVEL_INFO,
306                   "Skipping link training due to regkey on GPU%d\n",
307                   pGpu->gpuInstance);
308 
309         return NV_OK;
310     }
311 
312     // If fabric is managed by FM, return
313     if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED))
314     {
315         NV_PRINTF(LEVEL_INFO,
316                   "Fabric is externally managed, skip link training\n");
317 
318         return NV_OK;
319     }
320 
321     NV_PRINTF(LEVEL_INFO, "Training sysmem links for GPU%d\n",
322               pGpu->gpuInstance);
323 
324     FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink->enabledLinks)
325     {
326         if (pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bConnected &&
327             ((pKernelNvlink->nvlinkLinks[i].remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_IBMNPU)    ||
328              (pKernelNvlink->nvlinkLinks[i].remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_TEGRASHIM) ||
329              (pKernelNvlink->nvlinkLinks[i].remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_EBRIDGE)))
330         {
331             if (nvlink_lib_train_links_from_swcfg_to_active(
332                     &pKernelNvlink->nvlinkLinks[i].core_link, 1, NVLINK_STATE_CHANGE_SYNC)
333                     != NVL_SUCCESS)
334             {
335                 nvErrorLog_va((void *)pGpu, NVLINK_ERROR,
336                     "NVLink: failed to train link %d to remote PCI:%04x:%02x:%02x",
337                     i,
338                     pKernelNvlink->nvlinkLinks[i].remoteEndInfo.domain,
339                     pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bus,
340                     pKernelNvlink->nvlinkLinks[i].remoteEndInfo.device);
341 
342                 return NV_ERR_NOT_SUPPORTED;
343             }
344         }
345     }
346     FOR_EACH_INDEX_IN_MASK_END;
347 
348     //
349     // After training links, we may have used up most of the available 4s
350     // timeout during GPU state load. As a WAR in lieu of improving the
351     // performance of link training SW, reset the timeout for now.
352     //
353     NV_PRINTF(LEVEL_INFO, "resetting timeout after link training\n");
354 
355     threadStateResetTimeout(pGpu);
356 
357 #endif
358 
359     return NV_OK;
360 }
361 
362 /*!
363  * @brief Ensure links are trained and put into active.
364  *
365  * @param[in]  pGpu0          OBJGPU pointer
366  * @param[in]  pGpu1          OBJGPU pointer
367  * @param[in]  pKernelNvlink0 KernelNvlink pointer
368  *
369  * @return  NV_OK on success
370  */
371 NV_STATUS
372 knvlinkCheckTrainingIsComplete_IMPL
373 (
374     OBJGPU       *pGpu0,
375     OBJGPU       *pGpu1,
376     KernelNvlink *pKernelNvlink0
377 )
378 {
379     NV_STATUS status = NV_OK;
380 
381 #if defined(INCLUDE_NVLINK_LIB)
382 
383     OBJSYS       *pSys           = SYS_GET_INSTANCE();
384     NvU32         version        = pKernelNvlink0->ipVerNvlink;
385     KernelNvlink *pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1);
386     NvU32         count          = 0;
387     NvU32         i;
388 
389     if (pKernelNvlink1 == NULL)
390     {
391         NV_PRINTF(LEVEL_ERROR,
392                   "Input mask contains a GPU on which NVLink is disabled.\n");
393 
394         return NV_ERR_INVALID_ARGUMENT;
395     }
396 
397     nvlink_link *pLinks[NVLINK_MAX_LINKS_SW] = { 0 };
398 
399     // Link training will be triggered from KMD in L2 exit path
400     if (knvlinkPoweredUpForD3_HAL(pGpu0, pKernelNvlink0))
401     {
402         NV_PRINTF(LEVEL_INFO,
403                   "Skip link training on GPU%d in RTD3/FGC6 exit. Links will train to "
404                   "ACTIVE in L2 exit path\n", pGpu0->gpuInstance);
405         return NV_OK;
406     }
407 
408     // Minion and SW training is by default disabled on RTL
409     if (IS_RTLSIM(pGpu0) && !pKernelNvlink0->bForceEnableCoreLibRtlsims)
410     {
411         return NV_OK;
412     }
413 
414     // Return if link training is force disabled through regkey
415     if (pKernelNvlink0->bSkipLinkTraining)
416     {
417         NV_PRINTF(LEVEL_INFO,
418                   "Skipping link training due to regkey on GPU%d\n",
419                   pGpu0->gpuInstance);
420         return NV_OK;
421     }
422 
423     // Return if forced config, since SW training is not supported
424     if (knvlinkIsForcedConfig(pGpu0, pKernelNvlink0))
425     {
426         NV_PRINTF(LEVEL_INFO, "Skipping link due to forced configuration\n");
427         return NV_OK;
428     }
429 
430     // If fabric is managed by FM, return
431     if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED))
432     {
433         NV_PRINTF(LEVEL_INFO,
434                   "Fabric is externally managed, skip link training\n");
435         return NV_OK;
436     }
437 
438     //
439     // If ALI then ensure it has completed
440     // Else run through training for legacy nvlink versions
441     //
442     if (pKernelNvlink0->bEnableAli || pKernelNvlink1->bEnableAli)
443     {
444         // polling for train complete is only allowed for NvLink 4.0+
445         NV_ASSERT(version >= NVLINK_VERSION_40);
446 
447         //
448         // Check to make sure that the links for the first GPU have
449         // all completed training
450         //
451         FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->postRxDetLinkMask)
452         {
453             pLinks[count] = pKernelNvlink0->nvlinkLinks[i].core_link;
454             count++;
455         }
456         FOR_EACH_INDEX_IN_MASK_END;
457 
458         // If the return code is non-zero, links are still training
459         if (nvlink_lib_check_training_complete(pLinks, count) != 0)
460         {
461             NV_PRINTF(LEVEL_INFO, "Links aren't fully trained yet!\n");
462             knvlinkLogAliDebugMessages(pGpu0, pKernelNvlink0);
463             return NV_ERR_GENERIC;
464         }
465 
466         //
467         // For all links in the postRxDetLinkMask, get it's peer
468         // links information
469         //
470         FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->postRxDetLinkMask)
471         {
472             NV2080_CTRL_NVLINK_UPDATE_REMOTE_LOCAL_SID_PARAMS params;
473             portMemSet(&params, 0, sizeof(params));
474 
475             params.linkId = i;
476 
477             status = knvlinkExecGspRmRpc(pGpu0, pKernelNvlink0,
478                                          NV2080_CTRL_CMD_NVLINK_UPDATE_REMOTE_LOCAL_SID,
479                                          (void *)&params, sizeof(params));
480             if (status != NV_OK)
481             {
482                 NV_PRINTF(LEVEL_ERROR, "Error updating Local/Remote Sid Info!\n");
483                 return status;
484             }
485 
486             pKernelNvlink0->nvlinkLinks[i].core_link->remoteSid =
487                 params.remoteLocalSidInfo.remoteSid;
488             pKernelNvlink0->nvlinkLinks[i].core_link->remoteDeviceType =
489                 params.remoteLocalSidInfo.remoteDeviceType;
490             pKernelNvlink0->nvlinkLinks[i].core_link->remoteLinkId =
491                 params.remoteLocalSidInfo.remoteLinkId;
492             pKernelNvlink0->nvlinkLinks[i].core_link->localSid =
493                 params.remoteLocalSidInfo.localSid;
494         }
495         FOR_EACH_INDEX_IN_MASK_END;
496 
497         // Only enter if not in loopBack
498         if (pKernelNvlink0 != pKernelNvlink1)
499         {
500             //
501             // Check to make sure that the links for the second GPU have
502             // all completed training. Reset count for this GPU prior
503             // to querying for the links
504             //
505             count = 0;
506             FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink1->postRxDetLinkMask)
507             {
508                 pLinks[count] = pKernelNvlink1->nvlinkLinks[i].core_link;
509                 count++;
510             }
511             FOR_EACH_INDEX_IN_MASK_END;
512 
513             // If the return code is non-zero, links are still training
514             if (nvlink_lib_check_training_complete(pLinks, count) != 0)
515             {
516                 NV_PRINTF(LEVEL_INFO, "Links aren't fully trained yet!\n");
517                 knvlinkLogAliDebugMessages(pGpu1, pKernelNvlink1);
518                 return NV_ERR_GENERIC;
519             }
520 
521             //
522             // For all links in the postRxDetLinkMask, get it's peer
523             // links information
524             //
525             FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink1->postRxDetLinkMask)
526             {
527                 NV2080_CTRL_NVLINK_UPDATE_REMOTE_LOCAL_SID_PARAMS params;
528                 portMemSet(&params, 0, sizeof(params));
529 
530                 params.linkId = i;
531 
532                 status = knvlinkExecGspRmRpc(pGpu1, pKernelNvlink1,
533                                              NV2080_CTRL_CMD_NVLINK_UPDATE_REMOTE_LOCAL_SID,
534                                              (void *)&params, sizeof(params));
535                 if (status != NV_OK)
536                 {
537                     NV_PRINTF(LEVEL_ERROR, "Error updating Local/Remote Sid Info!\n");
538                     return status;
539                 }
540 
541                 pKernelNvlink1->nvlinkLinks[i].core_link->remoteSid =
542                     params.remoteLocalSidInfo.remoteSid;
543                 pKernelNvlink1->nvlinkLinks[i].core_link->remoteDeviceType =
544                     params.remoteLocalSidInfo.remoteDeviceType;
545                 pKernelNvlink1->nvlinkLinks[i].core_link->remoteLinkId =
546                     params.remoteLocalSidInfo.remoteLinkId;
547                 pKernelNvlink1->nvlinkLinks[i].core_link->localSid =
548                     params.remoteLocalSidInfo.localSid;
549             }
550             FOR_EACH_INDEX_IN_MASK_END;
551         }
552     }
553 
554 #endif
555 
556     return status;
557 }
558 
559 /*!
560  * @brief Train all the connected links between the two given devices
561  *        to active through the nvlink core library.
562  *
563  * @param[in]  pGpu0          OBJGPU pointer
564  * @param[in]  pGpu1          OBJGPU pointer
565  * @param[in]  pKernelNvlink0 KernelNvlink pointer
566  *
567  * @return  NV_OK on success
568  */
569 NV_STATUS
570 knvlinkTrainP2pLinksToActive_IMPL
571 (
572     OBJGPU       *pGpu0,
573     OBJGPU       *pGpu1,
574     KernelNvlink *pKernelNvlink0
575 )
576 {
577     NV_STATUS status = NV_OK;
578 
579 #if defined(INCLUDE_NVLINK_LIB)
580 
581     OBJSYS       *pSys           = SYS_GET_INSTANCE();
582     KernelNvlink *pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1);
583     NvU32         version        = pKernelNvlink0->ipVerNvlink;
584     NvBool        bTrainLinks    = NV_FALSE;
585     NvU32         count          = 0;
586     NvU32         remoteLink;
587     NvU32         i;
588 
589     nvlink_link *pLinks[NVLINK_MAX_LINKS_SW] = { 0 };
590 
591     // Link training will be triggered from KMD in L2 exit path
592     if (knvlinkPoweredUpForD3_HAL(pGpu0, pKernelNvlink0))
593     {
594         NV_PRINTF(LEVEL_INFO,
595                   "Skip link training on GPU%d in RTD3/FGC6 exit. Links will train to "
596                   "ACTIVE in L2 exit path\n", pGpu0->gpuInstance);
597 
598         return NV_OK;
599     }
600 
601     // Minion and SW training is by default disabled on RTL
602     if (IS_RTLSIM(pGpu0) && !pKernelNvlink0->bForceEnableCoreLibRtlsims)
603     {
604         return NV_OK;
605     }
606 
607     // Return if link training is force disabled through regkey
608     if (pKernelNvlink0->bSkipLinkTraining)
609     {
610         NV_PRINTF(LEVEL_INFO,
611                   "Skipping link training due to regkey on GPU%d\n",
612                   pGpu0->gpuInstance);
613 
614         return NV_OK;
615     }
616 
617     // Return if forced config, since SW training is not supported
618     if (knvlinkIsForcedConfig(pGpu0, pKernelNvlink0))
619     {
620         NV_PRINTF(LEVEL_INFO, "Skipping link due to forced configuration\n");
621 
622         return NV_OK;
623     }
624 
625     // If fabric is managed by FM, return
626     if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED))
627     {
628         NV_PRINTF(LEVEL_INFO,
629                   "Fabric is externally managed, skip link training\n");
630 
631         return NV_OK;
632     }
633 
634     //
635     // Bug# 3601144: On Ampere+ systems, return if links are already initialized,
636     // since that implies links are already trained.
637     //
638     if (IsAMPEREorBetter(pGpu0))
639     {
640         NvU32 localMask  = 0;
641         NvU32 remoteMask = 0;
642 
643         FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->enabledLinks)
644         {
645             if (KNVLINK_IS_LINK_CONNECTED_TO_GPU(pKernelNvlink0, i, pGpu1))
646             {
647                 remoteLink = pKernelNvlink0->nvlinkLinks[i].remoteEndInfo.linkNumber;
648 
649                 localMask  |= BIT(i);
650                 remoteMask |= BIT(remoteLink);
651             }
652         }
653         FOR_EACH_INDEX_IN_MASK_END;
654 
655         if (((pKernelNvlink0->initializedLinks & localMask)  == localMask) &&
656             ((pKernelNvlink1->initializedLinks & remoteMask) == remoteMask))
657         {
658             NV_PRINTF(LEVEL_INFO, "P2P links are all trained already, return\n");
659             return NV_OK;
660         }
661     }
662 
663     // Get the link train status for the enabled link masks
664     NV2080_CTRL_NVLINK_ARE_LINKS_TRAINED_PARAMS linkTrainedParams;
665 
666     portMemSet(&linkTrainedParams, 0, sizeof(linkTrainedParams));
667     linkTrainedParams.linkMask    = pKernelNvlink0->enabledLinks;
668     linkTrainedParams.bActiveOnly = NV_TRUE;
669 
670     // Reset timeout to clear any accumulated timeouts from link init
671     if (IS_GSP_CLIENT(pGpu0))
672     {
673         threadStateResetTimeout(pGpu0);
674     }
675 
676     status = knvlinkExecGspRmRpc(pGpu0, pKernelNvlink0,
677                                  NV2080_CTRL_CMD_NVLINK_ARE_LINKS_TRAINED,
678                                  (void *)&linkTrainedParams,
679                                  sizeof(linkTrainedParams));
680     if (status != NV_OK)
681     {
682         NV_PRINTF(LEVEL_ERROR, "Failed to get the link train status for links\n");
683         return status;
684     }
685 
686     //
687     // Bug# 3424466: Optimization - Return if all enabled links for this GPU are
688     // already trained. The core library makes several callbacks to check link
689     // state which results in numerous RPCs on GSP-RM platforms resulting in low
690     // perf on chips which have low link training latency and low links count.
691     //
692     FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->enabledLinks)
693     {
694         if (!KNVLINK_IS_LINK_CONNECTED_TO_GPU(pKernelNvlink0, i, pGpu1))
695         {
696             continue;
697         }
698 
699         if (!linkTrainedParams.bIsLinkActive[i])
700         {
701             bTrainLinks = NV_TRUE;
702             break;
703         }
704     }
705     FOR_EACH_INDEX_IN_MASK_END;
706 
707     if (!bTrainLinks)
708     {
709         NV_PRINTF(LEVEL_INFO, "Enabled links are all trained already, return\n");
710         return NV_OK;
711     }
712 
713     // Train the mask of enabled links to ACTIVE state
714     FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->enabledLinks)
715     {
716         if (!KNVLINK_IS_LINK_CONNECTED_TO_GPU(pKernelNvlink0, i, pGpu1))
717         {
718             continue;
719         }
720 
721         if (version >= NVLINK_VERSION_22)
722         {
723             // Capture links for parallel link training
724             pLinks[count] = pKernelNvlink0->nvlinkLinks[i].core_link;
725             count++;
726         }
727         else
728         {
729             // Invoke link training for NVLINK <= 2.0
730             (void)nvlink_lib_train_links_from_swcfg_to_active(
731                 &pKernelNvlink0->nvlinkLinks[i].core_link, 1, NVLINK_STATE_CHANGE_SYNC);
732         }
733     }
734     FOR_EACH_INDEX_IN_MASK_END;
735 
736     // Invoke link training for NVLINK >= 2.2
737     if (count > 0)
738     {
739         //
740         // nvlink_lib_train_links_from_swcfg_to_active with
741         // NVLINK_STATE_CHANGE_ASYNC flag invokes link training asynchronously,
742         // but the call itself is synchronous i.e. it will poll for link
743         // training to complete.
744         //
745         NV_ASSERT(version >= NVLINK_VERSION_22);
746         (void)nvlink_lib_train_links_from_swcfg_to_active(
747             pLinks, count, NVLINK_STATE_CHANGE_ASYNC);
748     }
749 
750     // Get the link train status for the enabled link masks
751     portMemSet(&linkTrainedParams, 0, sizeof(linkTrainedParams));
752     linkTrainedParams.linkMask    = pKernelNvlink0->enabledLinks;
753     linkTrainedParams.bActiveOnly = NV_TRUE;
754 
755     // Reset timeout to clear any accumulated timeouts from link init
756     if (IS_GSP_CLIENT(pGpu0))
757     {
758         threadStateResetTimeout(pGpu0);
759     }
760 
761     status = knvlinkExecGspRmRpc(pGpu0, pKernelNvlink0,
762                                  NV2080_CTRL_CMD_NVLINK_ARE_LINKS_TRAINED,
763                                  (void *)&linkTrainedParams,
764                                  sizeof(linkTrainedParams));
765     if (status != NV_OK)
766     {
767         NV_PRINTF(LEVEL_ERROR, "Failed to get the link train status for links\n");
768         return status;
769     }
770 
771     // Check if the links are trained to "active" state.
772     FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->enabledLinks)
773     {
774         if (!KNVLINK_IS_LINK_CONNECTED_TO_GPU(pKernelNvlink0, i, pGpu1))
775         {
776             continue;
777         }
778 
779         if (linkTrainedParams.bIsLinkActive[i])
780         {
781             continue;
782         }
783 
784         nvErrorLog_va((void *)pGpu0, NVLINK_ERROR,
785             "NVLink: Failed to train link %d to remote PCI:%04x:%02x:%02x",
786             i,
787             pKernelNvlink0->nvlinkLinks[i].remoteEndInfo.domain,
788             pKernelNvlink0->nvlinkLinks[i].remoteEndInfo.bus,
789             pKernelNvlink0->nvlinkLinks[i].remoteEndInfo.device);
790 
791         status = NV_ERR_INVALID_STATE;
792     }
793     FOR_EACH_INDEX_IN_MASK_END;
794 
795 #endif
796 
797     return status;
798 }
799 
800 /*!
801  * knvlinkTrainFabricLinksToActive_IMPL
802  *     Setup NVLinks between 2 peers connected to switch. Train the links to
803  *     High Speed.
804  *
805  * Note: Desired sequence to setup NvLink P2P is:
806  *       1. A client queries P2P capability among GPUs.
807  *       2. If the GPUs are P2P compatible, create NV50_P2P object which invokes
808  *          link training.
809  * However, existing GPU<->GPU link training happens during step 1 through
810  * gpumgrGetP2PCaps - which gets called on RmInitAdapter and may lead to timeout
811  * based upon the time consumed by costly link training operations.
812  *
813  * For now, we are fixing this for nvswitch systems by adding this helper
814  * function which should just get invoked during NV50_P2P object creation.
815  *
816  * This issue needs to be fixed for non-nvswitch systems as well. Bug:200285708.
817  * Once the bug is fixed, knvlinkTrainFabricLinksToActive can be called from
818  * knvlinkTrainP2pLinksToActive.
819  *
820  * @param[in]  pGpu           OBJGPU pointer
821  * @param[in]  pKernelNvlink  KernelNvlink pointer
822  *
823  * @return  NV_OK on success
824  */
825 NV_STATUS
826 knvlinkTrainFabricLinksToActive_IMPL
827 (
828     OBJGPU       *pGpu,
829     KernelNvlink *pKernelNvlink
830 )
831 {
832 #if defined(INCLUDE_NVLINK_LIB)
833 
834     OBJSYS *pSys = SYS_GET_INSTANCE();
835     NvU32   i;
836 
837     // Minion and SW training is by default disabled on RTL
838     if (IS_RTLSIM(pGpu) && !pKernelNvlink->bForceEnableCoreLibRtlsims)
839     {
840         return NV_OK;
841     }
842 
843     // Return if link training is force disabled through regkey
844     if (pKernelNvlink->bSkipLinkTraining)
845     {
846         NV_PRINTF(LEVEL_INFO,
847                   "Skipping link training due to regkey on GPU%d\n",
848                   pGpu->gpuInstance);
849 
850         return NV_OK;
851     }
852 
853     // If fabric is managed by FM, return
854     if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED))
855     {
856         NV_PRINTF(LEVEL_INFO,
857                   "Fabric is externally managed, skip link training\n");
858 
859         return NV_OK;
860     }
861 
862     if (knvlinkIsForcedConfig(pGpu, pKernelNvlink))
863     {
864         NV_PRINTF(LEVEL_INFO,
865                   "Nvlink in Forced Config - skip link training.\n");
866 
867         return NV_OK;
868     }
869 
870     FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink->enabledLinks)
871     {
872         if ( pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bConnected &&
873             (pKernelNvlink->nvlinkLinks[i].remoteEndInfo.deviceType ==
874                 NVLINK_DEVICE_TYPE_NVSWITCH))
875         {
876             if (nvlink_lib_train_links_from_swcfg_to_active(
877                 &pKernelNvlink->nvlinkLinks[i].core_link, 1, NVLINK_STATE_CHANGE_SYNC)
878                 != NVL_SUCCESS)
879             {
880                 nvErrorLog_va((void *)pGpu, NVLINK_ERROR,
881                     "NVLink: failed to train link %d to remote PCI:%04x:%02x:%02x",
882                     i,
883                     pKernelNvlink->nvlinkLinks[i].remoteEndInfo.domain,
884                     pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bus,
885                     pKernelNvlink->nvlinkLinks[i].remoteEndInfo.device);
886 
887                 return NV_ERR_INVALID_STATE;
888             }
889         }
890     }
891     FOR_EACH_INDEX_IN_MASK_END;
892 
893 #endif
894 
895     return NV_OK;
896 }
897 
898 /*!
899  * @brief Transition/Wakeup the links into/from sleep (L2) state
900  *
901  * @param[in]  pGpu           OBJGPU pointer
902  * @param[in]  pKernelNvlink  KernelNvlink pointer
903  * @param[in]  linkMask       Mask of links
904  * @param[in]  bEntry         Enter/Exit sleep (L2)
905  *
906  * @return  NV_OK on success
907  */
908 NV_STATUS
909 knvlinkEnterExitSleep_IMPL
910 (
911     OBJGPU       *pGpu,
912     KernelNvlink *pKernelNvlink,
913     NvU32         linkMask,
914     NvBool        bEntry
915 )
916 {
917 #if defined(INCLUDE_NVLINK_LIB)
918 
919     OBJSYS *pSys = SYS_GET_INSTANCE();
920     NvU32   linkId;
921 
922     // NVLink L2 as a feature should be enabled
923     if (!pKernelNvlink->getProperty(pKernelNvlink,
924                                     PDB_PROP_KNVLINK_L2_POWER_STATE_ENABLED))
925     {
926         NV_PRINTF(LEVEL_ERROR, "NVLink L2 is not supported. Returning\n");
927 
928         return NV_ERR_NOT_SUPPORTED;
929     }
930 
931     // Return error if NVLink fabric is managed by FM
932     if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED))
933     {
934         NV_PRINTF(LEVEL_ERROR,
935                   "Skipping L2 entry/exit since fabric is externally managed\n");
936 
937         return NV_ERR_NOT_SUPPORTED;
938     }
939 
940     // Check if all the links in the mask are connected
941     FOR_EACH_INDEX_IN_MASK(32, linkId, linkMask)
942     {
943         if (!pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected)
944         {
945             NV_PRINTF(LEVEL_ERROR,
946                       "GPU%d: Link%d is not connected. Returning\n",
947                       pGpu->gpuInstance, linkId);
948 
949             return NV_ERR_NOT_SUPPORTED;
950         }
951     }
952     FOR_EACH_INDEX_IN_MASK_END;
953 
954     // Links that share a PLL must enter/exit L2 together
955     FOR_EACH_INDEX_IN_MASK(32, linkId, linkMask)
956     {
957         // If the link is a PLL master, consider the slave link
958         if (pKernelNvlink->nvlinkLinks[linkId].pllMasterLinkId == linkId)
959         {
960             // If the slave link exists and is not init-disabled, it should be included
961             if ( (pKernelNvlink->nvlinkLinks[linkId].pllSlaveLinkId != NVLINK_MAX_LINKS_SW)               &&
962                  (NVBIT(pKernelNvlink->nvlinkLinks[linkId].pllSlaveLinkId) & pKernelNvlink->enabledLinks) &&
963                 !(NVBIT(pKernelNvlink->nvlinkLinks[linkId].pllSlaveLinkId) & linkMask) )
964             {
965                 NV_PRINTF(LEVEL_ERROR,
966                           "GPU%d: Links sharing PLL should enter/exit L2 together. Returning\n",
967                           pGpu->gpuInstance);
968 
969                 return NV_ERR_NOT_SUPPORTED;
970             }
971         }
972         else
973         {
974             // For a slave link, its PLL master should be included if not init-disabled
975             if ( (NVBIT(pKernelNvlink->nvlinkLinks[linkId].pllMasterLinkId) & pKernelNvlink->enabledLinks) &&
976                 !(NVBIT(pKernelNvlink->nvlinkLinks[linkId].pllMasterLinkId) & linkMask) )
977             {
978                 NV_PRINTF(LEVEL_ERROR,
979                           "GPU%d: Links sharing PLL should enter/exit L2 together. Returning\n",
980                           pGpu->gpuInstance);
981 
982                 return NV_ERR_NOT_SUPPORTED;
983             }
984         }
985     }
986     FOR_EACH_INDEX_IN_MASK_END;
987 
988     // Device must be registered in the nvlink core library
989     if (!pKernelNvlink->pNvlinkDev)
990     {
991         NV_PRINTF(LEVEL_ERROR,
992                   "GPU%d: not registered in core lib. Returning\n",
993                   pGpu->gpuInstance);
994 
995         return NV_ERR_NOT_SUPPORTED;
996     }
997 
998     if (bEntry)
999     {
1000         // Remove the peer mapping in HSHUB and transition links to sleep (L2)
1001         return _knvlinkEnterSleep(pGpu, pKernelNvlink, linkMask);
1002     }
1003     else
1004     {
1005         // Wakeup the links from sleep (L2) and setup the peer mapping in HSHUB
1006         return _knvlinkExitSleep(pGpu, pKernelNvlink, linkMask);
1007     }
1008 #endif
1009 
1010     return NV_OK;
1011 }
1012 
1013 /*!
1014  * @brief Shutdown all the connected links associated with the device
1015  *        through the nvlink core library.
1016  *
1017  * @param[in]  pGpu           OBJGPU pointer
1018  * @param[in]  pKernelNvlink  KernelNvlink pointer
1019  *
1020  * @return  NV_OK on success
1021  */
1022 NV_STATUS
1023 knvlinkCoreShutdownDeviceLinks_IMPL
1024 (
1025     OBJGPU       *pGpu,
1026     KernelNvlink *pKernelNvlink,
1027     NvBool        bForceShutdown
1028 )
1029 {
1030 #if defined(INCLUDE_NVLINK_LIB)
1031 
1032     nvlink_link *pLinks[NVLINK_MAX_LINKS_SW] = {0};
1033     OBJSYS      *pSys  = SYS_GET_INSTANCE();
1034     NvU32        count = 0;
1035     NvU32        linkId;
1036 
1037     // Skip link shutdown where fabric manager is present, for nvlink version bellow 4.0
1038     if ((pKernelNvlink->ipVerNvlink < NVLINK_VERSION_40 &&
1039          pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) ||
1040         (pKernelNvlink->pNvlinkDev == NULL))
1041     {
1042         NV_PRINTF(LEVEL_INFO,
1043                   "core lib device is either externally managed or not present, skipping\n");
1044 
1045         return NV_OK;
1046     }
1047 
1048     // return early if there are no enabled links
1049     if (pKernelNvlink->enabledLinks == 0)
1050     {
1051         NV_PRINTF(LEVEL_INFO, "No links to shutdown for the GPU%d\n",
1052                   pGpu->gpuInstance);
1053 
1054         return NV_OK;
1055     }
1056 
1057     if (!bForceShutdown && pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_MINION_GFW_BOOT))
1058     {
1059         NV_PRINTF(LEVEL_INFO,
1060                 "GFW boot is enabled. Link shutdown is not required, skipping\n");
1061 
1062         return NV_OK;
1063     }
1064 
1065     FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks)
1066     {
1067         // Capture the links for lane shutdown through core lib if supported
1068         if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED))
1069         {
1070             // Skip GPU in reset
1071             if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType ==
1072                                                            NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_GPU)
1073             {
1074                 OBJGPU* pRemoteGpu = gpumgrGetGpuFromBusInfo(
1075                     pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain,
1076                     pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus,
1077                     pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device);
1078                 if (API_GPU_IN_RESET_SANITY_CHECK(pRemoteGpu))
1079                 {
1080                     continue;
1081                 }
1082             }
1083             pLinks[count] = pKernelNvlink->nvlinkLinks[linkId].core_link;
1084             count++;
1085         }
1086         else
1087         {
1088             nvlink_lib_powerdown_links_from_active_to_swcfg(
1089                         &pKernelNvlink->nvlinkLinks[linkId].core_link,
1090                         1, NVLINK_STATE_CHANGE_SYNC);
1091         }
1092     }
1093     FOR_EACH_INDEX_IN_MASK_END;
1094 
1095     // Trigger laneshutdown through core lib if shutdown is supported
1096     if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED) && (count > 0))
1097     {
1098         if (nvlink_lib_powerdown_links_from_active_to_off(
1099                         pLinks, count, NVLINK_STATE_CHANGE_SYNC))
1100         {
1101             NV_PRINTF(LEVEL_ERROR, "Unable to turn off links for the GPU%d\n",
1102                       pGpu->gpuInstance);
1103 
1104             return NV_ERR_INVALID_STATE;
1105         }
1106     }
1107 
1108 #endif
1109 
1110     return NV_OK;
1111 }
1112 
1113 /*!
1114  * @brief Reset all the connected links associated with the device
1115  *        through the nvlink core library.
1116  *
1117  * @param[in]  pGpu           OBJGPU pointer
1118  * @param[in]  pKernelNvlink  KernelNvlink pointer
1119  *
1120  * @return  NV_OK on success
1121  */
1122 NV_STATUS
1123 knvlinkCoreResetDeviceLinks_IMPL
1124 (
1125     OBJGPU       *pGpu,
1126     KernelNvlink *pKernelNvlink
1127 )
1128 {
1129 #if defined(INCLUDE_NVLINK_LIB)
1130 
1131     nvlink_link *pLinks[NVLINK_MAX_LINKS_SW] = {0};
1132     OBJSYS      *pSys  = SYS_GET_INSTANCE();
1133     NvU32        count = 0;
1134     NvU32        linkId;
1135 
1136     // Skip link reset where fabric manager is present, for nvlink version bellow 4.0
1137     if ((pKernelNvlink->ipVerNvlink < NVLINK_VERSION_40 &&
1138          pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) ||
1139         (pKernelNvlink->pNvlinkDev == NULL))
1140     {
1141         NV_PRINTF(LEVEL_INFO,
1142                   "core lib device is either externally managed or not present, skipping\n");
1143 
1144         return NV_OK;
1145     }
1146 
1147     // return early if there are no enabled links
1148     if (pKernelNvlink->enabledLinks == 0)
1149     {
1150         NV_PRINTF(LEVEL_INFO, "No links to reset for the GPU%d\n",
1151                   pGpu->gpuInstance);
1152 
1153         return NV_OK;
1154     }
1155 
1156     // We only perform the link reset if lane shutdown is enabled
1157     if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED))
1158     {
1159         FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks)
1160         {
1161             // Skip GPU in reset
1162             if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType ==
1163                                              NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_GPU)
1164             {
1165                 OBJGPU* pRemoteGpu = gpumgrGetGpuFromBusInfo(
1166                     pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain,
1167                     pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus,
1168                     pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device);
1169                 if (API_GPU_IN_RESET_SANITY_CHECK(pRemoteGpu))
1170                 {
1171                     continue;
1172                 }
1173             }
1174             pLinks[count] = pKernelNvlink->nvlinkLinks[linkId].core_link;
1175             count++;
1176         }
1177         FOR_EACH_INDEX_IN_MASK_END;
1178 
1179         if (nvlink_lib_reset_links(pLinks, count, NVLINK_STATE_CHANGE_SYNC) && (count > 0))
1180         {
1181             NV_PRINTF(LEVEL_ERROR, "Unable to reset link(s) for GPU%d\n",
1182                       pGpu->gpuInstance);
1183 
1184             return NV_ERR_INVALID_STATE;
1185         }
1186     }
1187     else
1188     {
1189         NV_PRINTF(LEVEL_INFO,
1190                   "Lane shutdown not enabled, skipping link(s) reset for GPU%d\n",
1191                   pGpu->gpuInstance);
1192 
1193         return NV_ERR_INVALID_STATE;
1194     }
1195 
1196 #endif
1197 
1198     return NV_OK;
1199 }
1200 
1201 /*!
1202  * @brief Retrain a link from either safe mode or off.
1203  *
1204  * @param[in]  pGpu          OBJGPU pointer
1205  * @param[in]  pKernelNvlink KernelNvlink pointer
1206  * @param[in]  linkId        Link ID of the link in question
1207  * @param[in]  bFromOff      Whether link should be retrained from SAFE/OFF
1208  *
1209  * @returns NV_OK if link retraining was successful
1210  */
1211 NV_STATUS
1212 knvlinkRetrainLink_IMPL
1213 (
1214     OBJGPU       *pGpu,
1215     KernelNvlink *pKernelNvlink,
1216     NvU32         linkId,
1217     NvBool        bFromOff
1218 )
1219 {
1220     NV_STATUS status = NV_OK;
1221 
1222     // If NVLINK_LIB isn't enabled, we just execute prologue and return.
1223     _knvlinkRetrainLinkPrologue(pGpu, pKernelNvlink, linkId);
1224 
1225     OBJSYS *pSys    = SYS_GET_INSTANCE();
1226 
1227     // If fabric is managed by FM
1228     if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED))
1229     {
1230 #if defined(INCLUDE_NVLINK_LIB)
1231 
1232         //
1233         // Notify FM for link re-training.
1234         //
1235         // Note, at this point all DL interrupts should be disabled. The interrupts
1236         // will be enabled through nvlinkCoreReenableLinkInterruptsCallback only if
1237         // links can be successfully re-trained.
1238         //
1239         // It is responsibility of FM to highlight link re-training failures to the
1240         // system admin. Hence, we shouldn't be logging Xid in this case.
1241         //
1242         // It is worth to note that, there is no race in between interrupt
1243         // enable/disable register update as we notify FM only after disabling
1244         // interrupts.
1245         //
1246         gpuNotifySubDeviceEvent(pGpu,
1247                                 NV2080_NOTIFIERS_NVLINK_ERROR_RECOVERY_REQUIRED,
1248                                 NULL, 0, 0, (NvV16)NV2080_CTRL_NVLINK_UNIT_DL);
1249 
1250         return NV_OK;
1251 #endif
1252     }
1253 
1254 #if defined(INCLUDE_NVLINK_LIB)
1255     //
1256     // If this is a slave endpoint requesting the retrain, kick off a request
1257     // to the master instead. There is no need to (and indeed, we should not)
1258     // hold the master endpoint lock here.
1259     //
1260     if (!pKernelNvlink->nvlinkLinks[linkId].core_link->master)
1261     {
1262         nvlink_link_change *link_change;
1263         nvlink_link *slave, *master;
1264 
1265         slave = pKernelNvlink->nvlinkLinks[linkId].core_link;
1266         if (nvlink_lib_get_link_master(slave, &master) != NVL_SUCCESS)
1267         {
1268             NV_PRINTF(LEVEL_ERROR,
1269                       "link master could not be found from GPU%u link %u\n",
1270                       gpuGetInstance(pGpu), linkId);
1271 
1272             return NV_ERR_INVALID_STATE;
1273         }
1274 
1275         NV_ASSERT_OR_RETURN(master != slave, NV_ERR_INVALID_STATE);
1276 
1277         link_change         = &slave->link_change;
1278         link_change->slave  = slave;
1279         link_change->master = master;
1280         link_change->change_type = bFromOff ? nvlink_retrain_from_off :
1281                                                   nvlink_retrain_from_safe;
1282 
1283         if (master->link_handlers->queue_link_change(link_change) != NVL_SUCCESS)
1284         {
1285             return NV_ERR_GENERIC;
1286         }
1287 
1288         //
1289         // Because the link retrain request to the master is asynchronous,
1290         // tell the caller they'll need to wait.
1291         //
1292         return NV_WARN_MORE_PROCESSING_REQUIRED;
1293     }
1294 #endif
1295 
1296     if (bFromOff)
1297     {
1298         status = knvlinkRetrainLinkFromOff(pGpu, pKernelNvlink, linkId);
1299     }
1300     else
1301     {
1302         status = knvlinkRetrainLinkFromSafe(pGpu, pKernelNvlink, linkId);
1303     }
1304 
1305     return status;
1306 }
1307 
1308 /*!
1309  * @brief Floorsweep the nvlink config for the chip
1310  *
1311  * @param[in]  pGpu            OBJGPU pointer
1312  * @param[in]  pKernelNvlink   KernelNvlink pointer
1313  * @param[in]  numLinksPerIp   number of total links found in discovery
1314  * @param[out] pNumLinkActive  number of links needed to be active
1315  *
1316  * @returns On success, sets unique fabric address and returns NV_OK.
1317  *          On failure, returns NV_ERR_XXX.
1318  */
1319 NV_STATUS
1320 knvlinkFloorSweep_IMPL
1321 (
1322     OBJGPU *pGpu,
1323     KernelNvlink *pKernelNvlink,
1324     NvU32         numLinksPerIoctrl,
1325     NvU32        *pNumActiveLinksPerIoctrl
1326 )
1327 {
1328 
1329 #if defined(INCLUDE_NVLINK_LIB)
1330     NV_STATUS status = NV_OK;
1331     NvU32   linkId;
1332     NvU32   tmpDisabledLinkMask    = 0;
1333     NvU32   tmpEnabledLinkMask     = 0;
1334     nvlink_conn_info conn_info;
1335 
1336     *pNumActiveLinksPerIoctrl = knvlinkGetNumActiveLinksPerIoctrl(pGpu, pKernelNvlink);
1337     if (!knvlinkIsFloorSweepingNeeded_HAL(pGpu, pKernelNvlink, *pNumActiveLinksPerIoctrl, numLinksPerIoctrl))
1338     {
1339         return NV_OK;
1340     }
1341 
1342     // The path here is important not getting the connection info
1343     FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks)
1344     {
1345         nvlink_lib_discover_and_get_remote_conn_info(
1346                     pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, 0);
1347     }
1348     FOR_EACH_INDEX_IN_MASK_END;
1349 
1350     //
1351     // This call must be before the floorswept to cache the NVLink bridge
1352     // information in physical RM.
1353     //
1354     knvlinkDirectConnectCheck_HAL(pGpu, pKernelNvlink);
1355 
1356     // floorsweeping in corelib will update connection info that RM qill query below
1357     (void)nvlink_lib_powerdown_floorswept_links_to_off(pKernelNvlink->pNvlinkDev);
1358 
1359     //
1360     // If a link in the enabledLinkMask is not trained after floorsweeping then
1361     // then add it to a tmp disabled linkMask
1362     //
1363 
1364     // Get the link train status for the enabled link masks
1365     NV2080_CTRL_NVLINK_ARE_LINKS_TRAINED_PARAMS linkTrainedParams;
1366 
1367     portMemSet(&linkTrainedParams, 0, sizeof(linkTrainedParams));
1368     linkTrainedParams.linkMask    = pKernelNvlink->enabledLinks;
1369     linkTrainedParams.bActiveOnly = NV_TRUE;
1370 
1371     // Reset timeout to clear any accumulated timeouts from link init
1372     if (IS_GSP_CLIENT(pGpu))
1373     {
1374         threadStateResetTimeout(pGpu);
1375     }
1376 
1377     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
1378                                  NV2080_CTRL_CMD_NVLINK_ARE_LINKS_TRAINED,
1379                                  (void *)&linkTrainedParams,
1380                                  sizeof(linkTrainedParams));
1381     if (status != NV_OK)
1382     {
1383         NV_PRINTF(LEVEL_ERROR, "Failed to get the link train status for links\n");
1384         return status;
1385     }
1386 
1387     //
1388     // Create a temporary mask of all links that are now enabled:
1389     // classified as a link in active
1390     //
1391     FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks)
1392     {
1393         if (linkTrainedParams.bIsLinkActive[linkId])
1394         {
1395             tmpEnabledLinkMask |= BIT(linkId);
1396         }
1397         else
1398         {
1399             tmpDisabledLinkMask |= BIT(linkId);
1400         }
1401     }
1402     FOR_EACH_INDEX_IN_MASK_END;
1403 
1404     // Redo linkMasks based on the search above being the ground truth
1405     pKernelNvlink->enabledLinks          = tmpEnabledLinkMask;
1406 
1407     //
1408     // remove any links not in active in the tmpEnabledLinkMask from all
1409     // other link masks as these have been floorswept by the corelib
1410     //
1411     pKernelNvlink->disconnectedLinkMask    = tmpEnabledLinkMask;
1412     pKernelNvlink->initDisabledLinksMask   = tmpDisabledLinkMask;
1413 
1414 
1415     status = knvlinkProcessInitDisabledLinks(pGpu, pKernelNvlink);
1416     if (status != NV_OK)
1417     {
1418         NV_ASSERT(status == NV_OK);
1419         return status;
1420     }
1421 
1422     // Re-sync the link masks with GSP
1423     status = knvlinkSyncLinkMasksAndVbiosInfo(pGpu, pKernelNvlink);
1424     if (status != NV_OK)
1425     {
1426         NV_ASSERT(status == NV_OK);
1427         return status;
1428     }
1429 
1430     //
1431     // Assert that the number of links in active is always less then
1432     // or equal to the number of active links on the chips
1433     //
1434     if(!(nvPopCount32(tmpEnabledLinkMask) <= *pNumActiveLinksPerIoctrl * nvPopCount32(pKernelNvlink->ioctrlMask)))
1435     {
1436         NV_PRINTF(LEVEL_INFO,
1437               "Floorsweeping didn't work! enabledMaskCount: 0x%x and numActiveLinksTotal: 0x%x. Current link info cached in SW: discoveredLinks: 0x%x; enabledLinks:0x%x; disconnectedLinks:0x%x; initDisabledLinksMask:0x%x\n",
1438               nvPopCount32(tmpEnabledLinkMask), *pNumActiveLinksPerIoctrl * nvPopCount32(pKernelNvlink->ioctrlMask), pKernelNvlink->discoveredLinks, pKernelNvlink->enabledLinks, pKernelNvlink->disconnectedLinkMask, pKernelNvlink->initDisabledLinksMask);
1439 
1440         return NV_ERR_NOT_READY;
1441     }
1442 
1443     pKernelNvlink->bFloorSwept = NV_TRUE;
1444 #endif //INCLUDE_NVLINK_LIB
1445     return NV_OK;
1446 }
1447 
1448 /*!
1449  * @brief Retrain the link from OFF state
1450  *
1451  * @param[in]  pGpu           OBJGPU pointer
1452  * @param[in]  pKernelNvlink  KernelNvlink pointer
1453  * @param[in]  linkId         Link ID of the link in question
1454  *
1455  * @returns NV_OK if link retraining was successful
1456  */
1457 NV_STATUS
1458 knvlinkRetrainLinkFromOff
1459 (
1460     OBJGPU       *pGpu,
1461     KernelNvlink *pKernelNvlink,
1462     NvU32         linkId
1463 )
1464 {
1465 
1466     return NV_OK;
1467 }
1468 
1469 /*!
1470  * @brief Retrain the link from SAFE state
1471  *
1472  * @param[in]  pGpu           OBJGPU pointer
1473  * @param[in]  pKernelNvlink  KernelNvlink pointer
1474  * @param[in]  linkId         Link ID of the link in question
1475  *
1476  * @returns NV_OK if link retraining was successful
1477  */
1478 NV_STATUS
1479 knvlinkRetrainLinkFromSafe
1480 (
1481     OBJGPU       *pGpu,
1482     KernelNvlink *pKernelNvlink,
1483     NvU32         linkId
1484 )
1485 {
1486 
1487     return NV_OK;
1488 }
1489 
1490 /*!
1491  * @brief _knvlinkRetrainLinkPrologue currently disables DL interrupts
1492  *
1493  * @param[in]  pGpu           OBJGPU pointer
1494  * @param[in]  pKernelNvlink  KernelNvlink pointer
1495  * @param[in]  linkId         Link ID of the link in question
1496  */
1497 static void
1498 _knvlinkRetrainLinkPrologue
1499 (
1500     OBJGPU       *pGpu,
1501     KernelNvlink *pKernelNvlink,
1502     NvU32         linkId
1503 )
1504 {
1505 
1506     return;
1507 }
1508 
1509 #if defined(INCLUDE_NVLINK_LIB)
1510 
1511 /*!
1512  * @brief Activate the connections discovered in topology discovery
1513  *
1514  * @param[in]  pGpu               OBJGPU pointer
1515  * @param[in]  pKernelNvlink      KernelNvlink pointer
1516  * @param[in]  bCheckDegradedMode Whether to check for degraded mode
1517  *
1518  * @return  NV_OK on success
1519  */
1520 static NV_STATUS
1521 _knvlinkActivateDiscoveredConns
1522 (
1523     OBJGPU       *pGpu,
1524     KernelNvlink *pKernelNvlink,
1525     NvBool        bCheckDegradedMode
1526 )
1527 {
1528     NvU32      initDisconnectedLinkMask = pKernelNvlink->disconnectedLinkMask;
1529     NvU32      switchLinkMasks          = 0;
1530     NvBool     bPeerUpdated             = NV_FALSE;
1531     NV_STATUS  status                   = NV_OK;
1532     NvU32      linkId;
1533 
1534     //
1535     // Degraded Mode on LR10+ systems. Check for degraded mode if this was not done before
1536     // and if new connections were discovered from the core library.
1537     //
1538     if (bCheckDegradedMode)
1539     {
1540         status = knvlinkApplyNvswitchDegradedModeSettings_HAL(pGpu, pKernelNvlink,
1541                                                               &switchLinkMasks);
1542     }
1543 
1544     // We only need to look at links that are considered disconnected
1545     FOR_EACH_INDEX_IN_MASK(32, linkId, initDisconnectedLinkMask)
1546     {
1547         if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected)
1548         {
1549             // This link is now marked connected
1550             pKernelNvlink->disconnectedLinkMask &= ~NVBIT(linkId);
1551 
1552             if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType
1553                     == NVLINK_DEVICE_TYPE_GPU)
1554             {
1555                 bPeerUpdated = NV_TRUE;
1556 
1557                 //
1558                 // Activate the p2p link. This includes copying the remote device
1559                 // information for the remote link and enabling the post topology
1560                 // steps on both the ends of the link.
1561                 //
1562                 // NOTE: HSHUB will nott be setup for the discovered peer link here
1563                 //       and will only be configured when a P2P object is created
1564                 //
1565                 status = _knvlinkActivateDiscoveredP2pConn(pGpu, pKernelNvlink, linkId);
1566             }
1567             else if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType
1568                     == NVLINK_DEVICE_TYPE_NVSWITCH)
1569             {
1570                 status = _knvlinkActivateDiscoveredSwitchConn(pGpu, pKernelNvlink, linkId);
1571 
1572                 //
1573                 // There is no need to mark link as a master. On NVSwitch systems,
1574                 // External Fabric Management should be enabled by default.
1575                 //
1576                 switchLinkMasks |= NVBIT(linkId);
1577             }
1578             else
1579             {
1580                 //
1581                 // Activate the sysmem link. This includes even training the link to
1582                 // ACTIVE, since for sysmem link post-topology steps should be setup
1583                 // only after ACTIVE
1584                 //
1585                 status = _knvlinkActivateDiscoveredSysmemConn(pGpu, pKernelNvlink, linkId);
1586             }
1587 
1588             // If any of the above failed, return failure
1589             if (status != NV_OK)
1590             {
1591                 NV_PRINTF(LEVEL_ERROR,
1592                           "Failed to activate link%d on GPU%d!!!\n", linkId,
1593                           pGpu->gpuInstance);
1594 
1595                 return status;
1596             }
1597         }
1598     }
1599     FOR_EACH_INDEX_IN_MASK_END;
1600 
1601 #if defined(NVCPU_PPC64LE) || defined(NVCPU_AARCH64)
1602     if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_SYSMEM_SUPPORT_ENABLED))
1603     {
1604         // Credits should be released after Active for sysmem
1605         knvlinkEnableLinksPostTopology_HAL(pGpu, pKernelNvlink, pKernelNvlink->enabledLinks);
1606         if (status != NV_OK)
1607         {
1608             return status;
1609         }
1610 
1611         // Enable SYSMEM links in HSHUB.  On P9 this must happen after Active
1612         knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink);
1613     }
1614 #endif
1615 
1616     // If any new connection was discovered in this call
1617     if (initDisconnectedLinkMask != pKernelNvlink->disconnectedLinkMask)
1618     {
1619         if (pKernelNvlink->disconnectedLinkMask == pKernelNvlink->enabledLinks) //GPU degraded case
1620         {
1621             bPeerUpdated |= _knvlinkUpdateSwitchLinkMasksGpuDegraded(pGpu, pKernelNvlink);
1622         }
1623         else // other cases
1624         {
1625             bPeerUpdated |= _knvlinkUpdateSwitchLinkMasks(pGpu, pKernelNvlink,
1626                                                           switchLinkMasks);
1627         }
1628 
1629         _knvlinkPrintTopologySummary(pGpu, pKernelNvlink);
1630 
1631         //
1632         // Make sure we update the CE mappings for this GPU, if the known set
1633         // of peers has changed.
1634         //
1635         knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink);
1636         if (bPeerUpdated)
1637         {
1638             //
1639             // Request that any peers updated also update their CE mappings,
1640             // since they now have a new peer.
1641             //
1642             _knvlinkUpdatePeerConfigs(pGpu, pKernelNvlink);
1643         }
1644     }
1645 
1646     return status;
1647 }
1648 
1649 /*!
1650  * @brief Activate the given P2P connection
1651  *        This function updates the RM state for the discovered P2P connection
1652  *        and enables post-topology steps on both ends of the connection. But,
1653  *        it does not configure HSHUB on any end of the connection. HSHUB will
1654  *        be configured only when a P2P object is created
1655  *
1656  * @param[in]  pGpu           OBJGPU pointer
1657  * @param[in]  pKernelNvlink  KernelNvlink pointer
1658  * @param[in]  linkId         Link ID
1659  *
1660  * @return  NV_OK on success
1661  */
1662 static NV_STATUS
1663 _knvlinkActivateDiscoveredP2pConn
1664 (
1665     OBJGPU       *pGpu,
1666     KernelNvlink *pKernelNvlink,
1667     NvU32         linkId
1668 )
1669 {
1670     OBJGPU       *pGpu0             = pGpu;
1671     OBJGPU       *pGpu1             = NULL;
1672     KernelNvlink *pKernelNvlink0    = GPU_GET_KERNEL_NVLINK(pGpu0);
1673     NV_STATUS     status            = NV_OK;
1674     NvBool        bUpdateConnStatus = NV_FALSE;
1675     NvU32         remoteLinkId;
1676     NvU32         gpuInst;
1677 
1678     // Get the remote OBJGPU and Nvlink
1679     for (gpuInst = 0; gpuInst < NV_MAX_DEVICES; gpuInst++)
1680     {
1681         pGpu1 = gpumgrGetGpu(gpuInst);
1682 
1683         if (pGpu1 &&
1684             // Just rely on PCIe DBDF values for detecting the remote
1685             (pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.domain   == gpuGetDomain(pGpu1)) &&
1686             (pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.bus      == gpuGetBus(pGpu1))    &&
1687             (pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.device   == gpuGetDevice(pGpu1)) &&
1688             (pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.function == 0))
1689         {
1690             KernelNvlink *pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1);
1691 
1692             // Map the remote GPU's instance number to the associated links on this GPU.
1693             status = knvlinkSetLinkMaskToPeer(pGpu0, pKernelNvlink0, pGpu1,
1694                                              (pKernelNvlink0->peerLinkMasks[gpuInst] | NVBIT(linkId)));
1695             if (status != NV_OK)
1696                 return status;
1697 
1698             //
1699             // Post Topology enable on the local end of the link.
1700             // Needs to happen before HSHUB is setup for this link on any end.
1701             //
1702             status = knvlinkEnableLinksPostTopology_HAL(pGpu0, pKernelNvlink0, NVBIT(linkId));
1703             if (status != NV_OK)
1704             {
1705                 return status;
1706             }
1707 
1708             // Set the remote device information for the remote device
1709             if (pKernelNvlink1)
1710             {
1711                 remoteLinkId = pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.linkNumber;
1712 
1713                 // RPC into GSP-RM to update the link remote connection status only if its required
1714                 if (pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.bConnected == NV_FALSE)
1715                     bUpdateConnStatus = NV_TRUE;
1716 
1717                 // Set the PCI information for remote end
1718                 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.bConnected  = NV_TRUE;
1719                 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.domain      = pKernelNvlink0->pNvlinkDev->pciInfo.domain;
1720                 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.bus         = pKernelNvlink0->pNvlinkDev->pciInfo.bus;
1721                 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.device      = pKernelNvlink0->pNvlinkDev->pciInfo.device;
1722                 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.function    = pKernelNvlink0->pNvlinkDev->pciInfo.function;
1723                 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.pciDeviceId = pKernelNvlink0->pNvlinkDev->pciInfo.pciDeviceId;
1724                 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.deviceType  = pKernelNvlink0->pNvlinkDev->type;
1725                 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.chipSid     = pKernelNvlink0->nvlinkLinks[linkId].core_link->localSid;
1726                 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.linkNumber  = linkId;
1727 
1728                 // Update the DLPL revision in the connection information
1729                 pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.ipVerDlPl = pKernelNvlink1->nvlinkLinks[remoteLinkId].ipVerDlPl;
1730                 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.ipVerDlPl = pKernelNvlink0->nvlinkLinks[linkId].ipVerDlPl;
1731 
1732                 if (bUpdateConnStatus)
1733                 {
1734                     // RPC into GSP-RM to update the link remote connection status for pGpu1 for the given link
1735                     status = knvlinkUpdateLinkConnectionStatus(pGpu1, pKernelNvlink1, remoteLinkId);
1736                     if (status != NV_OK)
1737                     {
1738                         return status;
1739                     }
1740                 }
1741 
1742                 pKernelNvlink1->disconnectedLinkMask &= ~NVBIT(remoteLinkId);
1743 
1744                 // Map this GPU's instance number to the associated link on the remote end.
1745                 status = knvlinkSetLinkMaskToPeer(pGpu1, pKernelNvlink1, pGpu0,
1746                                                   (pKernelNvlink1->peerLinkMasks[gpuGetInstance(pGpu0)] | NVBIT(remoteLinkId)));
1747                 if (status != NV_OK)
1748                     return status;
1749 
1750                 //
1751                 // Post Topology enable on the remote end of the link.
1752                 // Needs to happen before HSHUB is setup for this link on any end.
1753                 //
1754                 status = knvlinkEnableLinksPostTopology_HAL(pGpu1, pKernelNvlink1, NVBIT(remoteLinkId));
1755                 if (status != NV_OK)
1756                 {
1757                     return status;
1758                 }
1759 
1760                 // Set the deviceUUID
1761                 portMemCopy(pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.devUuid,
1762                             NV_UUID_LEN,
1763                             pGpu0->gpuUuid.uuid,
1764                             NV_UUID_LEN);
1765 
1766                 //
1767                 // The master of a GPU <-> GPU link depends on instance number. This is so that when locking
1768                 // (which requires the master to be locked before the slave), the lower GPU instance number
1769                 // will always be locked first, which is how rmGpuLocksAcquire acquires them. For loopback,
1770                 // fall back to link ID instead.
1771                 //
1772                 if ((gpuGetInstance(pGpu0) < gpuGetInstance(pGpu1)) ||
1773                     ((gpuGetInstance(pGpu0) == gpuGetInstance(pGpu1)) &&
1774                      (linkId < remoteLinkId)))
1775                 {
1776                     NV_ASSERT(NVL_SUCCESS == nvlink_lib_set_link_master(
1777                             pKernelNvlink0->nvlinkLinks[linkId].core_link));
1778                 }
1779                 else if ((gpuGetInstance(pGpu1) < gpuGetInstance(pGpu0)) ||
1780                          ((gpuGetInstance(pGpu1) == gpuGetInstance(pGpu0)) &&
1781                           (remoteLinkId < linkId)))
1782                 {
1783                     NV_ASSERT(NVL_SUCCESS == nvlink_lib_set_link_master(
1784                             pKernelNvlink1->nvlinkLinks[remoteLinkId].core_link));
1785                 }
1786 
1787                 break;
1788             }
1789         }
1790     }
1791 
1792     return status;
1793 }
1794 
1795 /*!
1796  * @brief Activate the given switch connection
1797  *
1798  * @param[in]  pGpu          OBJGPU pointer
1799  * @param[in]  pKernelNvlink KernelNvlink pointer
1800  * @param[in]  linkId        Link ID
1801  *
1802  * @return  NV_OK on success
1803  */
1804 static NV_STATUS
1805 _knvlinkActivateDiscoveredSwitchConn
1806 (
1807     OBJGPU       *pGpu,
1808     KernelNvlink *pKernelNvlink,
1809     NvU32         linkId
1810 )
1811 {
1812     NV_STATUS status = NV_OK;
1813 
1814     // Post Topology enablement for switch links
1815     status = knvlinkEnableLinksPostTopology_HAL(pGpu, pKernelNvlink, NVBIT(linkId));
1816     if (status != NV_OK)
1817     {
1818         return status;
1819     }
1820 
1821     return NV_OK;
1822 }
1823 
1824 /*!
1825  * @brief Activate the given P2P connection
1826  *        This function updates the RM state for the discovered sysmem
1827  *        connection and trains the connection to ACTIVE, because, for
1828  *        sysmem link post-topology steps can only be configured after
1829  *        ACTIVE. HSHUB is also configured for sysmem link here.
1830  *
1831  * @param[in]  pGpu          OBJGPU pointer
1832  * @param[in]  pKernelNvlink KernelNvlink pointer
1833  * @param[in]  linkId        Link ID
1834  *
1835  * @return  NV_OK on success
1836  */
1837 static NV_STATUS
1838 _knvlinkActivateDiscoveredSysmemConn
1839 (
1840     OBJGPU       *pGpu,
1841     KernelNvlink *pKernelNvlink,
1842     NvU32         linkId
1843 )
1844 {
1845     NV_STATUS status = NV_OK;
1846 
1847     NV2080_CTRL_NVLINK_UPDATE_HSHUB_MUX_PARAMS    updateHshubMuxParams;
1848     NV2080_CTRL_NVLINK_SETUP_NVLINK_SYSMEM_PARAMS nvlinkSysmemParams;
1849 
1850     pKernelNvlink->sysmemLinkMask |= NVBIT(linkId);
1851 
1852     portMemSet(&nvlinkSysmemParams, 0, sizeof(nvlinkSysmemParams));
1853     nvlinkSysmemParams.sysmemLinkMask = pKernelNvlink->sysmemLinkMask;
1854 
1855     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
1856                                  NV2080_CTRL_CMD_NVLINK_SETUP_NVLINK_SYSMEM,
1857                                  (void *)&nvlinkSysmemParams,
1858                                  sizeof(nvlinkSysmemParams));
1859     if (status != NV_OK)
1860     {
1861         NV_PRINTF(LEVEL_ERROR, "Failed to setup HSHUB NVLink sysmem links state\n");
1862         return status;
1863     }
1864 
1865     // Always make the GPU side the master for NPU connections
1866     NV_ASSERT(NVL_SUCCESS == nvlink_lib_set_link_master(
1867         pKernelNvlink->nvlinkLinks[linkId].core_link));
1868 
1869     // Train SYSMEM links to Active, and only then enable traffic
1870     status = knvlinkTrainSysmemLinksToActive(pGpu, pKernelNvlink);
1871     if (status != NV_OK)
1872     {
1873         NV_PRINTF(LEVEL_ERROR,
1874                   "FAILED TO TRAIN CPU/SYSMEM LINKS TO ACTIVE on GPU%d!!!\n",
1875                   pGpu->gpuInstance);
1876 
1877         NV_ASSERT(0);
1878     }
1879 
1880     portMemSet(&updateHshubMuxParams, 0, sizeof(updateHshubMuxParams));
1881     updateHshubMuxParams.updateType = NV2080_CTRL_NVLINK_UPDATE_HSHUB_MUX_TYPE_PROGRAM;
1882     updateHshubMuxParams.bSysMem    = NV_TRUE;
1883 
1884     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
1885                                  NV2080_CTRL_CMD_NVLINK_UPDATE_HSHUB_MUX,
1886                                  (void *)&updateHshubMuxParams,
1887                                  sizeof(updateHshubMuxParams));
1888     return status;
1889 }
1890 
1891 /*!
1892  * @brief Transition the mask of links into sleep (L2) state
1893  *
1894  * @param[in]  pGpu          OBJGPU pointer
1895  * @param[in]  pKernelNvlink KernelNvlink pointer
1896  * @param[in]  linkMask      Mask of links
1897  *
1898  * @return  NV_OK on success
1899  */
1900 static NV_STATUS
1901 _knvlinkEnterSleep
1902 (
1903     OBJGPU       *pGpu,
1904     KernelNvlink *pKernelNvlink,
1905     NvU32         linkMask
1906 )
1907 {
1908     NV_STATUS retStatus = NV_OK;
1909     NvlStatus status    = NVL_SUCCESS;
1910 
1911     NV2080_CTRL_NVLINK_PROGRAM_BUFFERREADY_PARAMS      programBufferRdyParams;
1912     NV2080_CTRL_NVLINK_SAVE_RESTORE_HSHUB_STATE_PARAMS saveRestoreHshubStateParams;
1913 
1914     portMemSet(&programBufferRdyParams, 0, sizeof(programBufferRdyParams));
1915     programBufferRdyParams.flags        = NV2080_CTRL_NVLINK_PROGRAM_BUFFERREADY_FLAGS_SAVE;
1916     programBufferRdyParams.bSysmem      = NV_FALSE;
1917     programBufferRdyParams.peerLinkMask = linkMask;
1918 
1919     // Save Bufferready state for the the mask of links entering L2
1920     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
1921                                  NV2080_CTRL_CMD_NVLINK_PROGRAM_BUFFERREADY,
1922                                  (void *)&programBufferRdyParams,
1923                                  sizeof(programBufferRdyParams));
1924     if (status != NV_OK)
1925         return status;
1926 
1927     portMemSet(&saveRestoreHshubStateParams, 0, sizeof(saveRestoreHshubStateParams));
1928     saveRestoreHshubStateParams.linkMask = linkMask;
1929     saveRestoreHshubStateParams.bSave    = NV_TRUE;
1930 
1931     // Save HSHUB SW state for the links which will need to be restored later
1932     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
1933                                  NV2080_CTRL_CMD_NVLINK_SAVE_RESTORE_HSHUB_STATE,
1934                                  (void *)&saveRestoreHshubStateParams,
1935                                  sizeof(saveRestoreHshubStateParams));
1936     if (status != NV_OK)
1937         return status;
1938 
1939     // In L2 Entry path
1940     pKernelNvlink->bL2Entry = NV_TRUE;
1941 
1942     // Put the mask of links of the device to sleep
1943     status = nvlink_lib_powerdown_links_from_active_to_L2(pKernelNvlink->pNvlinkDev,
1944                                                           linkMask,
1945                                                           NVLINK_STATE_CHANGE_ASYNC);
1946     if (status == NVL_MORE_PROCESSING_REQUIRED)
1947     {
1948         NV_PRINTF(LEVEL_INFO,
1949                   "Transition to L2 for GPU%d: linkMask 0x%x in progress... Waiting for "
1950                   "remote endpoints to request L2 entry\n", pGpu->gpuInstance,
1951                   linkMask);
1952 
1953         return NV_WARN_MORE_PROCESSING_REQUIRED;
1954     }
1955 
1956     if (status != NVL_SUCCESS)
1957     {
1958         NV_PRINTF(LEVEL_ERROR,
1959                   "Unable to put the linkmask 0x%x of GPU%d to SLEEP\n",
1960                   linkMask, pGpu->gpuInstance);
1961 
1962         return NV_ERR_GENERIC;
1963     }
1964 
1965     return retStatus;
1966 }
1967 
1968 /*!
1969  * @brief Wakeup the mask of links from sleep (L2) state
1970  *
1971  * @param[in]  pGpu          OBJGPU pointer
1972  * @param[in]  pKernelNvlink KernelNvlink pointer
1973  * @param[in]  linkMask      Mask of links
1974  *
1975  * @return  NV_OK on success
1976  */
1977 static NV_STATUS
1978 _knvlinkExitSleep
1979 (
1980     OBJGPU       *pGpu,
1981     KernelNvlink *pKernelNvlink,
1982     NvU32         linkMask
1983 )
1984 {
1985     NvlStatus  status         = NVL_SUCCESS;
1986     NvlStatus  trainingStatus = NVL_SUCCESS;
1987     NvU32      linkId;
1988     NvU32      remoteLinkId;
1989     NvU32      gpuInst;
1990     RMTIMEOUT  timeout;
1991     NvU32 linkTrainingTimeout = 10000000;
1992 
1993     NV2080_CTRL_NVLINK_PROGRAM_BUFFERREADY_PARAMS      programBufferRdyParams;
1994     NV2080_CTRL_NVLINK_SAVE_RESTORE_HSHUB_STATE_PARAMS saveRestoreHshubStateParams;
1995 
1996     pKernelNvlink->bL2Entry = NV_FALSE;
1997 
1998     // Kick-off ALI if it is enabled
1999     if (pKernelNvlink->bEnableAli)
2000     {
2001         //
2002         // For each link, request a change to active.
2003         // Don't have to wait for the request to finish as links
2004         // will be queries via DLSTAT to know their status and training
2005         // progression.
2006         //
2007         FOR_EACH_INDEX_IN_MASK(32, linkId, linkMask)
2008         {
2009             status = knvlinkTrainLinksToActiveAli(pGpu, pKernelNvlink, NVBIT(linkId), NV_FALSE);
2010             if (status != NV_OK)
2011             {
2012                 NV_PRINTF(LEVEL_ERROR,
2013                           "Failed to request Link %d to transition to active\n", linkId);
2014             }
2015 #if defined(INCLUDE_NVLINK_LIB)
2016                 pKernelNvlink->nvlinkLinks[linkId].core_link->bStateSaved = NV_FALSE;
2017 #endif
2018         }
2019         FOR_EACH_INDEX_IN_MASK_END;
2020 
2021         //
2022         // Get all links that are passed RxDet after L2 exit and poll on those
2023         // links till they reach active
2024         //
2025         if (knvlinkDiscoverPostRxDetLinks_HAL(pGpu, pKernelNvlink, pGpu) == NV_OK)
2026         {
2027             gpuSetTimeout(pGpu, linkTrainingTimeout, &timeout, IS_SILICON(pGpu) ?
2028                 (GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE | GPU_TIMEOUT_FLAGS_DEFAULT) : 0);
2029             do
2030             {
2031 
2032                 status = gpuCheckTimeout(pGpu, &timeout);
2033                 trainingStatus = knvlinkCheckTrainingIsComplete(pGpu, pGpu, pKernelNvlink);
2034                 if (trainingStatus == NV_OK)
2035                 {
2036                     break;
2037                 }
2038                 osSpinLoop();
2039             }
2040             while (status != NV_ERR_TIMEOUT);
2041 
2042             if (status == NV_ERR_TIMEOUT)
2043             {
2044                 NV_PRINTF(LEVEL_ERROR,"Timedout while checking to see if training complete!\n");
2045             }
2046         }
2047     }
2048     else
2049     {
2050         // Wakeup the mask of links of the device from sleep using legacy l2 exit
2051         status = nvlink_lib_train_links_from_L2_to_active(pKernelNvlink->pNvlinkDev,
2052                                                           linkMask,
2053                                                           NVLINK_STATE_CHANGE_ASYNC);
2054     }
2055 
2056     if (status == NVL_SUCCESS)
2057     {
2058         // Perform post-initialization setup for links that exited L2
2059         FOR_EACH_INDEX_IN_MASK(32, linkId, linkMask)
2060         {
2061             // Post topology link enable for pre-Ampere. This sets up buffer ready
2062             status = knvlinkEnableLinksPostTopology_HAL(pGpu, pKernelNvlink, NVBIT(linkId));
2063             if (status != NV_OK)
2064             {
2065                 return status;
2066             }
2067 
2068             // Update the current NVLink configuration
2069             knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink);
2070 
2071             // Perform post-topology initialization steps on the remote endpoint
2072             if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_GPU)
2073             {
2074                 OBJGPU       *pGpu1          = NULL;
2075                 KernelNvlink *pKernelNvlink1 = NULL;
2076 
2077                 // Get the remote OBJGPU and Nvlink
2078                 for (gpuInst = 0; gpuInst < NV_MAX_DEVICES; gpuInst++)
2079                 {
2080                     pGpu1 = gpumgrGetGpu(gpuInst);
2081 
2082                     if (pGpu1 &&
2083                         (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain   == gpuGetDomain(pGpu1) &&
2084                          pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus      == gpuGetBus(pGpu1)    &&
2085                          pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device   == gpuGetDevice(pGpu1) &&
2086                          pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.function == 0))
2087                     {
2088                         pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1);
2089                         remoteLinkId   = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.linkNumber;
2090 
2091                         // Post topology link enable for pre-Ampere. This sets up buffer ready
2092                         status = knvlinkEnableLinksPostTopology_HAL(pGpu1, pKernelNvlink1, NVBIT(remoteLinkId));
2093                         if (status != NV_OK)
2094                         {
2095                             return status;
2096                         }
2097 
2098                         // Update the current NVLink configuration
2099                         knvlinkUpdateCurrentConfig(pGpu1, pKernelNvlink1);
2100 
2101                         break;
2102                     }
2103                 }
2104             }
2105         }
2106         FOR_EACH_INDEX_IN_MASK_END;
2107     }
2108 
2109     //
2110     // Restore HSHUB *ONLY AFTER* links have been trained and post-topology setup is complete
2111     // on both ends of the link. Only then HSHUB can be configured for P2P on any side of link
2112     //
2113     if (status == NVL_SUCCESS)
2114     {
2115         portMemSet(&saveRestoreHshubStateParams, 0, sizeof(saveRestoreHshubStateParams));
2116         saveRestoreHshubStateParams.linkMask = linkMask;
2117         saveRestoreHshubStateParams.bSave    = NV_FALSE;
2118 
2119         // Restore HSHUB SW state for the links which exited L2 state
2120         status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
2121                                      NV2080_CTRL_CMD_NVLINK_SAVE_RESTORE_HSHUB_STATE,
2122                                      (void *)&saveRestoreHshubStateParams,
2123                                      sizeof(saveRestoreHshubStateParams));
2124         if (status != NV_OK)
2125             return status;
2126 
2127         knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink);
2128 
2129         portMemSet(&programBufferRdyParams, 0, sizeof(programBufferRdyParams));
2130         programBufferRdyParams.flags        = NV2080_CTRL_NVLINK_PROGRAM_BUFFERREADY_FLAGS_RESTORE;
2131         programBufferRdyParams.bSysmem      = NV_FALSE;
2132         programBufferRdyParams.peerLinkMask = linkMask;
2133 
2134         // Restore Bufferready state for the links which exited L2 state
2135         status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
2136                                      NV2080_CTRL_CMD_NVLINK_PROGRAM_BUFFERREADY,
2137                                      (void *)&programBufferRdyParams,
2138                                      sizeof(programBufferRdyParams));
2139         if (status != NV_OK)
2140             return status;
2141 
2142         FOR_EACH_INDEX_IN_MASK(32, linkId, linkMask)
2143         {
2144             if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_GPU)
2145             {
2146                 OBJGPU       *pGpu1          = NULL;
2147                 KernelNvlink *pKernelNvlink1 = NULL;
2148 
2149                 // Get the remote OBJGPU and Nvlink
2150                 for (gpuInst = 0; gpuInst < NV_MAX_DEVICES; gpuInst++)
2151                 {
2152                     pGpu1 = gpumgrGetGpu(gpuInst);
2153 
2154                     if (pGpu1 &&
2155                         (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain   == gpuGetDomain(pGpu1) &&
2156                          pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus      == gpuGetBus(pGpu1)    &&
2157                          pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device   == gpuGetDevice(pGpu1) &&
2158                          pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.function == 0))
2159                     {
2160                         pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1);
2161                         remoteLinkId   = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.linkNumber;
2162 
2163                         portMemSet(&saveRestoreHshubStateParams, 0, sizeof(saveRestoreHshubStateParams));
2164                         saveRestoreHshubStateParams.linkMask = NVBIT(remoteLinkId);
2165                         saveRestoreHshubStateParams.bSave    = NV_FALSE;
2166 
2167                         // Restore HSHUB SW state for the links which exited L2 state
2168                         status = knvlinkExecGspRmRpc(pGpu1, pKernelNvlink1,
2169                                                      NV2080_CTRL_CMD_NVLINK_SAVE_RESTORE_HSHUB_STATE,
2170                                                      (void *)&saveRestoreHshubStateParams,
2171                                                      sizeof(saveRestoreHshubStateParams));
2172                         if (status != NV_OK)
2173                             return status;
2174 
2175                         knvlinkUpdateCurrentConfig(pGpu1, pKernelNvlink1);
2176 
2177                         portMemSet(&programBufferRdyParams, 0, sizeof(programBufferRdyParams));
2178                         programBufferRdyParams.flags        = NV2080_CTRL_NVLINK_PROGRAM_BUFFERREADY_FLAGS_RESTORE;
2179                         programBufferRdyParams.bSysmem      = NV_FALSE;
2180                         programBufferRdyParams.peerLinkMask = NVBIT(remoteLinkId);
2181 
2182                         //
2183                         // Restore Buffer Ready state for the links from cached SW state after HSHUB
2184                         // settings have been restored
2185                         //
2186                         status = knvlinkExecGspRmRpc(pGpu1, pKernelNvlink1,
2187                                                      NV2080_CTRL_CMD_NVLINK_PROGRAM_BUFFERREADY,
2188                                                      (void *)&programBufferRdyParams,
2189                                                      sizeof(programBufferRdyParams));
2190                         if (status != NV_OK)
2191                             return status;
2192 
2193                         break;
2194                     }
2195                 }
2196             }
2197         }
2198         FOR_EACH_INDEX_IN_MASK_END;
2199     }
2200 
2201     if (status == NVL_MORE_PROCESSING_REQUIRED)
2202     {
2203         NV_PRINTF(LEVEL_INFO,
2204                   "Transition to L0 for GPU%d: linkMask 0x%x in progress... Waiting for "
2205                   "remote endpoints to request L2 exit\n", pGpu->gpuInstance,
2206                   linkMask);
2207 
2208         return NV_WARN_MORE_PROCESSING_REQUIRED;
2209     }
2210 
2211     if (status != NVL_SUCCESS)
2212     {
2213         NV_PRINTF(LEVEL_ERROR,
2214                   "Unable to wakeup the linkmask 0x%x of GPU%d from SLEEP\n",
2215                   linkMask, pGpu->gpuInstance);
2216 
2217         return NV_ERR_GENERIC;
2218     }
2219 
2220     return NV_OK;
2221 }
2222 
2223 /*!
2224  * @brief Updates GPU peer info (peerMask) based on switchLinkMasks
2225  *
2226  * @param[in]  pGpu             OBJGPU pointer
2227  * @param[in]  pKernelNvlink    KernelNvlink pointer
2228  * @param[in]  switchLinkMasks  Mask of switch links
2229  *
2230  * @return  Returns NV_TRUE if peerMask is updated
2231  */
2232 static NvBool
2233 _knvlinkUpdateSwitchLinkMasks
2234 (
2235     OBJGPU       *pGpu,
2236     KernelNvlink *pKernelNvlink,
2237     NvU32         switchLinkMasks
2238 )
2239 {
2240     KernelNvlink *pKernelNvlink1 = NULL;
2241     OBJGPU       *pGpu1          = NULL;
2242     NvBool        bPeerUpdated   = NV_FALSE;
2243     NV_STATUS     status         = NV_OK;
2244     NvU32         gpuInst;
2245 
2246     //
2247     // On NvSwitch systems, all the enabled and connected GPU links should
2248     // go through NvSwitch. We don't support GPU<->GPU or GPU<->NPU direct
2249     // connections on NvSwitch systems.
2250     //
2251     if (!knvlinkIsGpuConnectedToNvswitch(pGpu, pKernelNvlink))
2252     {
2253         return bPeerUpdated;
2254     }
2255 
2256     for (gpuInst = 0; gpuInst < NV_MAX_DEVICES; gpuInst++)
2257     {
2258         pGpu1 = gpumgrGetGpu(gpuInst);
2259         if (!pGpu1)
2260         {
2261             continue;
2262         }
2263 
2264         // No support for SLI P2P on nvswitch systems.
2265         if (IsSLIEnabled(pGpu1))
2266         {
2267             continue;
2268         }
2269 
2270         pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1);
2271 
2272         if (!pKernelNvlink1)
2273         {
2274             continue;
2275         }
2276 
2277         if (!pKernelNvlink1->discoveredLinks)
2278         {
2279             continue;
2280         }
2281 
2282         if (!knvlinkIsGpuConnectedToNvswitch(pGpu1, pKernelNvlink1))
2283         {
2284             continue;
2285         }
2286 
2287         // Update local peerLinkMasks.
2288         status = knvlinkSetLinkMaskToPeer(pGpu, pKernelNvlink, pGpu1, switchLinkMasks);
2289         if (status != NV_OK)
2290             return NV_FALSE;
2291 
2292         //
2293         // Update remote peerLinkMasks only if a remote endpoint is connected.
2294         //
2295         // We are deliberately picking up loopback peerLinkMask, because it
2296         // represents the actual nvswitch connection mask for that GPU and
2297         // guarantees that the end point is connected to nvswitch.
2298         //
2299         status = knvlinkSetLinkMaskToPeer(pGpu1, pKernelNvlink1, pGpu,
2300                                 pKernelNvlink1->peerLinkMasks[gpuGetInstance(pGpu1)]);
2301         if (status != NV_OK)
2302             return NV_FALSE;
2303 
2304         bPeerUpdated = NV_TRUE;
2305     }
2306 
2307     return bPeerUpdated;
2308 }
2309 
2310 /*!
2311  * @brief Updates GPU peer info (peerMask) when a GPU is degraded
2312  *
2313  * @param[in]  pGpu           OBJGPU pointer
2314  * @param[in]  pKernelNvlink  KernelNvlink pointer
2315  *
2316  */
2317 static NvBool
2318 _knvlinkUpdateSwitchLinkMasksGpuDegraded
2319 (
2320     OBJGPU       *pGpu,
2321     KernelNvlink *pKernelNvlink
2322 )
2323 {
2324     KernelNvlink *pKernelNvlink1 = NULL;
2325     OBJGPU       *pGpu1          = NULL;
2326     NvBool        bPeerUpdated   = NV_FALSE;
2327     NV_STATUS     status         = NV_OK;
2328     NvU32         gpuInst;
2329 
2330     for (gpuInst = 0; gpuInst < NV_MAX_DEVICES; gpuInst++)
2331     {
2332         pGpu1 = gpumgrGetGpu(gpuInst);
2333         if (!pGpu1)
2334         {
2335             continue;
2336         }
2337 
2338         // No support for SLI P2P on nvswitch systems.
2339         if (IsSLIEnabled(pGpu1))
2340         {
2341             continue;
2342         }
2343 
2344         pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1);
2345 
2346         if (!pKernelNvlink1)
2347         {
2348             continue;
2349         }
2350 
2351         if (!pKernelNvlink1->discoveredLinks)
2352         {
2353             continue;
2354         }
2355 
2356         if (!knvlinkIsGpuConnectedToNvswitch(pGpu1, pKernelNvlink1))
2357         {
2358             continue;
2359         }
2360 
2361         // Update local peerLinkMasks.
2362         status = knvlinkSetLinkMaskToPeer(pGpu, pKernelNvlink, pGpu1, 0);
2363         if (status != NV_OK)
2364             return NV_FALSE;
2365 
2366         // Update remote peerLinkMasks
2367         status = knvlinkSetLinkMaskToPeer(pGpu1, pKernelNvlink1, pGpu, 0);
2368         if (status != NV_OK)
2369             return NV_FALSE;
2370 
2371         bPeerUpdated = NV_TRUE;
2372     }
2373 
2374     return bPeerUpdated;
2375 }
2376 
2377 /*!
2378  * For each known peer, update their configurations, now that another
2379  * one of their peers (this GPU) has been initialized.
2380  *
2381  * This will update the PCE-LCE mappings, but it will not trigger any
2382  * HSHUB updates since peer IDs shouldn't have been allocated at this
2383  * point.
2384  *
2385  * @param[in]  pGpu           OBJGPU pointer
2386  * @param[in]  pKernelNvlink  KernelNvlink pointer
2387  */
2388 static void
2389 _knvlinkUpdatePeerConfigs
2390 (
2391     OBJGPU       *pGpu,
2392     KernelNvlink *pKernelNvlink
2393 )
2394 {
2395     NvU32 gpuInst;
2396 
2397     for (gpuInst = 0; gpuInst < NV_ARRAY_ELEMENTS(pKernelNvlink->peerLinkMasks); gpuInst++)
2398     {
2399         if (pKernelNvlink->peerLinkMasks[gpuInst] != 0)
2400         {
2401             OBJGPU *pRemoteGpu = gpumgrGetGpu(gpuInst);
2402 
2403             if (pRemoteGpu != NULL)
2404             {
2405                 KernelNvlink *pRemoteKernelNvlink = GPU_GET_KERNEL_NVLINK(pRemoteGpu);
2406 
2407                 if (pRemoteKernelNvlink != NULL)
2408                 {
2409                     NV_PRINTF(LEVEL_INFO,
2410                               "GPU%u requesting GPU%u NVLINK config update\n",
2411                               gpuGetInstance(pGpu),
2412                               gpuGetInstance(pRemoteGpu));
2413 
2414                     _knvlinkPrintTopologySummary(pRemoteGpu, pRemoteKernelNvlink);
2415 
2416                     // Update CE mappings on remote GPUs since we have new connections
2417                     knvlinkUpdateCurrentConfig(pRemoteGpu, pRemoteKernelNvlink);
2418                 }
2419             }
2420         }
2421     }
2422 }
2423 
2424 /*!
2425  * Print the nvlink topology for this GPU
2426  *
2427  * @param[in]  pGpu           OBJGPU pointer
2428  * @param[in]  pKernelNvlink  KernelNvlink pointer
2429  */
2430 static void
2431 _knvlinkPrintTopologySummary
2432 (
2433     OBJGPU       *pGpu,
2434     KernelNvlink *pKernelNvlink
2435 )
2436 {
2437 #if NV_PRINTF_ENABLED
2438 
2439     NvU32     i;
2440     NV_STATUS status;
2441 
2442     if (DBG_RMMSG_CHECK(LEVEL_INFO) == 0)
2443     {
2444         return;
2445     }
2446 
2447     NV_PRINTF(LEVEL_INFO, "GPU%02u cached topology:\n", gpuGetInstance(pGpu));
2448 
2449     NV2080_CTRL_NVLINK_HSHUB_GET_SYSMEM_NVLINK_MASK_PARAMS params;
2450     portMemSet(&params, 0, sizeof(params));
2451 
2452     status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
2453                                  NV2080_CTRL_CMD_NVLINK_HSHUB_GET_SYSMEM_NVLINK_MASK,
2454                                  (void *)&params, sizeof(params));
2455     if (status != NV_OK)
2456     {
2457         NV_PRINTF(LEVEL_ERROR, "Unable to determine sysmem link mask\n");
2458         return;
2459     }
2460 
2461     // Print the discovered sysmem links
2462     if (params.sysmemLinkMask != 0)
2463     {
2464         NV_PRINTF(LEVEL_INFO, "    sysmem link mask : 0x%x\n", params.sysmemLinkMask);
2465     }
2466 
2467     // Print the discovered p2p links
2468     for (i = 0; i < NV_ARRAY_ELEMENTS(pKernelNvlink->peerLinkMasks); i++)
2469     {
2470         if (pKernelNvlink->peerLinkMasks[i] != 0)
2471         {
2472             NV_PRINTF(LEVEL_INFO, "    GPU%02u link mask  : 0x%x\n", i,
2473                       pKernelNvlink->peerLinkMasks[i]);
2474         }
2475     }
2476 
2477     // Print the links which do not have a connection yet
2478     if (pKernelNvlink->disconnectedLinkMask != 0)
2479     {
2480         NV_PRINTF(LEVEL_INFO, "    unknown link mask: 0x%x\n",
2481                   pKernelNvlink->disconnectedLinkMask);
2482     }
2483 
2484 #endif
2485 }
2486 
2487 #endif
2488