1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #define NVOC_KERNEL_NVLINK_H_PRIVATE_ACCESS_ALLOWED
25 
26 #include "os/os.h"
27 #include "core/hal.h"
28 #include "core/locks.h"
29 #include "gpu_mgr/gpu_mgr.h"
30 #include "gpu/gpu.h"
31 
32 #include "kernel/gpu/nvlink/kernel_nvlink.h"
33 #include "kernel/gpu/nvlink/kernel_ioctrl.h"
34 
35 #if defined(INCLUDE_NVLINK_LIB)
36 static void _knvlinkUpdateRemoteEndUuidInfo(OBJGPU *, KernelNvlink *);
37 #endif
38 
39 /*!
40  * @brief Initializes NVLink lib - WAR only.
41  *
42  * @param[in]  pGpu           OBJGPU pointer
43  * @param[in]  pKernelNvlink  KernelNvlink  pointer
44  */
45 void
knvlinkCoreDriverLoadWar_IMPL(OBJGPU * pGpu,KernelNvlink * pKernelNvlink)46 knvlinkCoreDriverLoadWar_IMPL
47 (
48     OBJGPU        *pGpu,
49     KernelNvlink  *pKernelNvlink
50 )
51 {
52     //
53     // All platforms which support NVLINK_CORE should call nvlink_lib_initialize
54     // explicitly, if NVLink support is needed. It is not RM's responsibility to
55     // initialize NVLink driver. Currently, only windows fails to do the same.
56     // Hence, adding this WAR to keep NVLink alive on windows. Also, see
57     // nvlinkCoreDriverUnloadWar_IMPL.
58     //
59     // See Bug 1962411 in order to nuke this WAR for Windows.
60     //
61 #if defined(INCLUDE_NVLINK_LIB)
62 
63     if (RMCFG_FEATURE_PLATFORM_WINDOWS)
64     {
65         if (!nvlink_lib_is_initialized())
66         {
67             nvlink_lib_initialize();
68         }
69     }
70 
71 #endif
72 }
73 
74 /*!
75  * @brief Un-initializes NVLink lib - WAR only.
76  *
77  * @param[in]  pGpu           OBJGPU pointer
78  * @param[in]  pKernelNvlink  KernelNvlink  pointer
79  */
80 void
knvlinkCoreDriverUnloadWar_IMPL(OBJGPU * pGpu,KernelNvlink * pKernelNvlink)81 knvlinkCoreDriverUnloadWar_IMPL
82 (
83     OBJGPU        *pGpu,
84     KernelNvlink  *pKernelNvlink
85 )
86 {
87 #if defined(INCLUDE_NVLINK_LIB)
88 
89     if (RMCFG_FEATURE_PLATFORM_WINDOWS)
90     {
91         nvlink_lib_unload();
92     }
93 
94 #endif
95 }
96 
97 /*!
98  * @brief Checks whether NVLink driver is supported.
99  *
100  * @param[in]  pGpu           OBJGPU pointer
101  * @param[in]  pKernelNvlink  KernelNvlink  pointer
102  *
103  * @return  NV_OK on success
104  */
105 NV_STATUS
knvlinkCoreIsDriverSupported_IMPL(OBJGPU * pGpu,KernelNvlink * pKernelNvlink)106 knvlinkCoreIsDriverSupported_IMPL
107 (
108     OBJGPU        *pGpu,
109     KernelNvlink  *pKernelNvlink
110 )
111 {
112 #if defined(INCLUDE_NVLINK_LIB)
113 
114     if (nvlink_lib_is_initialized())
115     {
116         return NV_OK;
117     }
118 
119 #endif
120 
121     NV_PRINTF(LEVEL_INFO, "NVLink core lib isn't initialized yet!\n");
122 
123     return NV_ERR_NOT_SUPPORTED;
124 }
125 
126 /*!
127  * @brief Add GPU device to nvlink core
128  *
129  * @param[in]  pGpu           OBJGPU pointer
130  * @param[in]  pKernelNvlink  KernelNvlink  pointer
131  *
132  * @return  NV_OK on success
133  */
134 NV_STATUS
knvlinkCoreAddDevice_IMPL(OBJGPU * pGpu,KernelNvlink * pKernelNvlink)135 knvlinkCoreAddDevice_IMPL
136 (
137     OBJGPU        *pGpu,
138     KernelNvlink  *pKernelNvlink
139 )
140 {
141     NV_STATUS status = NV_OK;
142 
143 #if defined(INCLUDE_NVLINK_LIB)
144 
145     nvlink_device *dev     = NULL;
146     char          *devIdx  = NULL;
147 
148     // Return if the device is already registered
149     if (pKernelNvlink->pNvlinkDev)
150     {
151         NV_PRINTF(LEVEL_INFO, "GPU already registered in NVLINK core!\n");
152 
153         return status;
154     }
155 
156     // Set the driver name
157     pKernelNvlink->driverName = portMemAllocNonPaged(NVLINK_DRIVER_NAME_LENGTH);
158     if (pKernelNvlink->driverName == NULL)
159     {
160         return NV_ERR_NO_MEMORY;
161     }
162     portMemSet((void *)pKernelNvlink->driverName, 0, NVLINK_DRIVER_NAME_LENGTH);
163     portMemCopy(pKernelNvlink->driverName, sizeof(NVLINK_NVIDIA_DRIVER), NVLINK_NVIDIA_DRIVER,
164                 sizeof(NVLINK_NVIDIA_DRIVER));
165 
166     //
167     // Set the temporary device name. The actual device name will be updated
168     // after PMU state load completes
169     //
170     pKernelNvlink->deviceName = portMemAllocNonPaged(NVLINK_DEVICE_NAME_LENGTH);
171     if (pKernelNvlink->deviceName == NULL)
172     {
173         portMemFree(pKernelNvlink->driverName);
174 
175         NV_PRINTF(LEVEL_ERROR, "Failed to allocate memory for device name\n");
176         return NV_ERR_NO_MEMORY;
177     }
178     portMemSet((void *)pKernelNvlink->deviceName, 0, NVLINK_DEVICE_NAME_LENGTH);
179 
180     portMemCopy(pKernelNvlink->deviceName, sizeof("GPU"), "GPU", sizeof("GPU"));
181     devIdx = pKernelNvlink->deviceName;
182     while (*devIdx != '\0') devIdx++;
183     knvlinkUtoa((NvU8 *)devIdx,
184                 NVLINK_DEVICE_NAME_LENGTH - (devIdx - pKernelNvlink->deviceName),
185                 gpuGetInstance(pGpu));
186 
187 
188     // Allocate memory for the nvlink_device struct
189     dev = portMemAllocNonPaged(sizeof(nvlink_device));
190     if (dev == NULL)
191     {
192         portMemFree(pKernelNvlink->driverName);
193 
194         portMemFree(pKernelNvlink->deviceName);
195 
196         NV_PRINTF(LEVEL_ERROR,
197                   "Failed to create nvlink_device struct for GPU\n");
198         return NV_ERR_NO_MEMORY;
199     }
200     portMemSet((void *)dev, 0, sizeof(nvlink_device));
201 
202     // Initialize values for the nvlink_device struct
203     dev->driverName               = pKernelNvlink->driverName;
204     dev->deviceName               = pKernelNvlink->deviceName;
205     dev->type                     = NVLINK_DEVICE_TYPE_GPU;
206     dev->pciInfo.domain           = gpuGetDomain(pGpu);
207     dev->pciInfo.bus              = gpuGetBus(pGpu);
208     dev->pciInfo.device           = gpuGetDevice(pGpu);
209     dev->pciInfo.function         = 0;
210     dev->pciInfo.pciDeviceId      = pGpu->idInfo.PCIDeviceID;
211     dev->pciInfo.bars[0].baseAddr = pGpu->pKernelBus->pciBars[0];
212     dev->pciInfo.bars[0].barSize  = pGpu->pKernelBus->pciBarSizes[0];
213     dev->initialized              = 1;
214     dev->enableALI                = pKernelNvlink->bEnableAli;
215     dev->numIoctrls               = nvPopCount32(pKernelNvlink->ioctrlMask);
216     dev->numActiveLinksPerIoctrl  = knvlinkGetNumActiveLinksPerIoctrl(pGpu, pKernelNvlink);
217     dev->numLinksPerIoctrl        = knvlinkGetTotalNumLinksPerIoctrl(pGpu, pKernelNvlink);
218     dev->bReducedNvlinkConfig     = knvlinkIsGpuReducedNvlinkConfig_HAL(pGpu, pKernelNvlink);
219 
220     // Register the GPU in nvlink core
221     if (nvlink_lib_register_device(dev) != 0)
222     {
223         NV_PRINTF(LEVEL_ERROR, "Failed to register GPU in NVLINK core!\n");
224 
225         goto knvlinkCoreAddDevice_exit;
226     }
227 
228     NV_PRINTF(LEVEL_INFO, "GPU registered successfully in NVLINK core\n");
229 
230     pKernelNvlink->pNvlinkDev = dev;
231 
232     return status;
233 
234 knvlinkCoreAddDevice_exit:
235 
236     portMemFree(pKernelNvlink->driverName);
237 
238     portMemFree(pKernelNvlink->deviceName);
239 
240     portMemFree(dev);
241 
242 #endif
243 
244     return status;
245 }
246 
247 /*!
248  * @brief Update GPU UUID in nvlink core
249  *
250  * @param[in]  pGpu           OBJGPU pointer
251  * @param[in]  pKernelNvlink  KernelNvlink  pointer
252  *
253  * @return  NV_OK on success
254  */
255 NV_STATUS
knvlinkCoreUpdateDeviceUUID_IMPL(OBJGPU * pGpu,KernelNvlink * pKernelNvlink)256 knvlinkCoreUpdateDeviceUUID_IMPL
257 (
258     OBJGPU        *pGpu,
259     KernelNvlink  *pKernelNvlink
260 )
261 {
262     NV_STATUS status = NV_OK;
263 
264 #if defined(INCLUDE_NVLINK_LIB)
265 
266     NvU8   *pGidString = NULL;
267     char   *devIdx     = NULL;
268     NvU32   flags      = 0;
269     NvU32   gidStrLen;
270     nvlink_device_info devInfo;
271 
272     if (pKernelNvlink->pNvlinkDev)
273     {
274         //
275         // SHA1 uuid format is 16 bytes long. Hence, make sure NVLINK_UUID_LEN
276         // is sufficient to store SHA1 uuid format.
277         //
278         ct_assert(NVLINK_UUID_LEN == RM_SHA1_GID_SIZE);
279         ct_assert(NVLINK_UUID_LEN == NV_UUID_LEN);
280 
281         flags = FLD_SET_DRF_NUM(2080_GPU_CMD, _GPU_GET_GID_FLAGS, _TYPE,
282                         NV2080_GPU_CMD_GPU_GET_GID_FLAGS_TYPE_SHA1, flags);
283         flags = FLD_SET_DRF_NUM(2080_GPU_CMD, _GPU_GET_GID_FLAGS, _FORMAT,
284                         NV2080_GPU_CMD_GPU_GET_GID_FLAGS_FORMAT_BINARY, flags);
285 
286         if (!pGpu->gpuUuid.isInitialized)
287         {
288             status = gpuGetGidInfo(pGpu, &pGidString, &gidStrLen, flags);
289             if (status != NV_OK)
290             {
291                 NV_PRINTF(LEVEL_ERROR, "Failed to update GPU UUID\n");
292 
293                 return status;
294             }
295 
296             _knvlinkUpdateRemoteEndUuidInfo(pGpu, pKernelNvlink);
297         }
298 
299         //
300         // PMU state load has completed. Update the device name in RM and then in Core library
301         //
302 
303         portMemSet((void *)pKernelNvlink->deviceName, 0, NVLINK_DEVICE_NAME_LENGTH);
304         gpuGetNameString(pGpu,
305                          NV2080_CTRL_GPU_GET_NAME_STRING_FLAGS_TYPE_ASCII,
306                          pKernelNvlink->deviceName);
307 
308         devIdx = pKernelNvlink->deviceName;
309         while (*devIdx != '\0') devIdx++;
310 
311         NV_ASSERT((devIdx - pKernelNvlink->deviceName) < NVLINK_DEVICE_NAME_LENGTH);
312 
313         knvlinkUtoa((NvU8 *)devIdx,
314                    NVLINK_DEVICE_NAME_LENGTH - (devIdx - pKernelNvlink->deviceName),
315                    gpuGetInstance(pGpu));
316 
317         //
318         // Any Core library Data structure should be updated only within a Core Library function
319         // after aquiring the appropriate locks. Hence the UUID and deviceName are updated
320         // as part of nvlink_lib_update_uuid_and_device_name.
321         // To identify the device in Core Library we pass its DBDF values.
322         //
323 
324         knvlinkCoreGetDevicePciInfo_HAL(pGpu, pKernelNvlink, &devInfo);
325 
326         status = nvlink_lib_update_uuid_and_device_name(&devInfo, pGidString, pKernelNvlink->deviceName);
327 
328         // Freeing pGidString here as it is malloc'd as part of gpuGetGidInfo_IMPL
329         if (pGidString != NULL)
330             portMemFree(pGidString);
331     }
332 
333 #endif
334 
335     return status;
336 }
337 
338 /*!
339  * @brief Add link to nvlink core lib
340  *
341  * @param[in]  pGpu           OBJGPU pointer
342  * @param[in]  pKernelNvlink  KernelNvlink  pointer
343  * @param[in]  linkId         Link Number
344  *
345  * @return  NV_OK on success
346  */
347 NV_STATUS
knvlinkCoreAddLink_IMPL(OBJGPU * pGpu,KernelNvlink * pKernelNvlink,NvU32 linkId)348 knvlinkCoreAddLink_IMPL
349 (
350     OBJGPU        *pGpu,
351     KernelNvlink  *pKernelNvlink,
352     NvU32          linkId
353 )
354 {
355     NV_STATUS status = NV_OK;
356 
357 #if defined(INCLUDE_NVLINK_LIB)
358 
359     nvlink_link *link     = NULL;
360     char        *linkName = NULL;
361     char        *linkIdx  = NULL;
362 
363     // GPU device must be registered
364     if (pKernelNvlink->pNvlinkDev == NULL)
365     {
366         NV_PRINTF(LEVEL_INFO, "NVLink device isn't available.\n");
367 
368         return NV_ERR_INVALID_STATE;
369     }
370 
371     // On RTL, by default minion and SW train is disabled
372     if (IS_RTLSIM(pGpu) && !pKernelNvlink->bForceEnableCoreLibRtlsims)
373     {
374         NV_PRINTF(LEVEL_INFO,
375                   "Skipping registration of link %d on simulation.\n", linkId);
376 
377         return status;
378     }
379 
380     // Return if the link is already registered
381     if (pKernelNvlink->nvlinkLinks[linkId].core_link)
382     {
383         NV_PRINTF(LEVEL_INFO, "Link %d already registered in NVLINK core!\n",
384                   linkId);
385 
386         return status;
387     }
388 
389     // Set the link name
390     linkName = portMemAllocNonPaged(NVLINK_LINK_NAME_LENGTH);
391     if (linkName == NULL)
392     {
393         return NV_ERR_NO_MEMORY;
394     }
395     portMemSet((void *)linkName, 0, NVLINK_LINK_NAME_LENGTH);
396     portMemCopy(linkName, sizeof("Link"), "Link", sizeof("Link"));
397     linkIdx = linkName;
398     while (*linkIdx != '\0') linkIdx++;
399     knvlinkUtoa((NvU8 *)linkIdx,
400                 NVLINK_LINK_NAME_LENGTH - (linkIdx - linkName),
401                 linkId);
402 
403     // Allocate memory for the nvlink_link struct
404     link = portMemAllocNonPaged(sizeof(nvlink_link));
405     if (link == NULL)
406     {
407         status = NV_ERR_NO_MEMORY;
408         NV_PRINTF(LEVEL_ERROR, "Failed to create nvlink_link struct\n");
409 
410         goto knvlinkCoreAddLink_exit;
411     }
412     portMemSet((void *)link, 0, sizeof(nvlink_link));
413 
414     // Initialize values for the nvlink_link struct
415     link->linkName         = linkName;
416     link->linkNumber       = linkId;
417     link->state            = NVLINK_LINKSTATE_OFF;
418     link->tx_sublink_state = NVLINK_SUBLINK_STATE_TX_OFF;
419     link->rx_sublink_state = NVLINK_SUBLINK_STATE_RX_OFF;
420     link->bRxDetected      = NV_FALSE;
421     link->bInitphase5Fails = NV_FALSE;
422     link->version          = pKernelNvlink->ipVerNvlink;
423     link->dev              = pKernelNvlink->pNvlinkDev;
424     link->link_info        = &(pKernelNvlink->nvlinkLinks[linkId]);
425     link->link_handlers    = osGetNvlinkLinkCallbacks();
426 
427     if (link->link_handlers == NULL)
428     {
429 
430         NV_PRINTF(LEVEL_ERROR, "link handlers not found\n");
431 
432         goto knvlinkCoreAddLink_exit;
433     }
434 
435     // Register the link in nvlink core
436     if (nvlink_lib_register_link(link->dev, link) != 0)
437     {
438         NV_PRINTF(LEVEL_ERROR, "Failed to register link %d in NVLINK core!\n",
439                   linkId);
440 
441         goto knvlinkCoreAddLink_exit;
442     }
443 
444     pKernelNvlink->nvlinkLinks[linkId].core_link = link;
445 
446     NV_PRINTF(LEVEL_INFO,
447               "LINK%d: %s registered successfully in NVLINK core\n", linkId,
448               linkName);
449 
450     return status;
451 
452 knvlinkCoreAddLink_exit:
453 
454     portMemFree(linkName);
455 
456     portMemFree(link);
457 
458 #endif // defined(INCLUDE_NVLINK_LIB)
459 
460     return status;
461 }
462 
463 /*!
464  * @brief Remove link from nvlink core
465  *
466  * @param[in]  pGpu           OBJGPU pointer
467  * @param[in]  pKernelNvlink  KernelNvlink  pointer
468  * @param[in]  linkId         Link Number
469  *
470  * @return  NV_OK on success
471  */
472 NV_STATUS
knvlinkCoreRemoveLink_IMPL(OBJGPU * pGpu,KernelNvlink * pKernelNvlink,NvU32 linkId)473 knvlinkCoreRemoveLink_IMPL
474 (
475     OBJGPU        *pGpu,
476     KernelNvlink  *pKernelNvlink,
477     NvU32          linkId
478 )
479 {
480     NV_STATUS status = NV_OK;
481 
482 #if defined(INCLUDE_NVLINK_LIB)
483 
484     // GPU device must be registered
485     NV_ASSERT(pKernelNvlink->pNvlinkDev != NULL);
486 
487     if (pKernelNvlink->nvlinkLinks[linkId].core_link)
488     {
489         nvlink_lib_unregister_link(pKernelNvlink->nvlinkLinks[linkId].core_link);
490 
491         // Update the RM state now that the link is de-registered
492         pKernelNvlink->nvlinkLinks[linkId].core_link = NULL;
493         pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected = NV_FALSE;
494     }
495 
496 #endif
497 
498     return status;
499 }
500 
501 /*!
502  * @brief Remove GPU device from nvlink core
503  *
504  * @param[in]  pGpu           OBJGPU pointer
505  * @param[in]  pKernelNvlink  KernelNvlink  pointer
506  *
507  * @return  NV_OK on success
508  */
509 NV_STATUS
knvlinkCoreRemoveDevice_IMPL(OBJGPU * pGpu,KernelNvlink * pKernelNvlink)510 knvlinkCoreRemoveDevice_IMPL
511 (
512     OBJGPU        *pGpu,
513     KernelNvlink  *pKernelNvlink
514 )
515 {
516 #if defined(INCLUDE_NVLINK_LIB)
517 
518     OBJGPU       *pGpu1          = NULL;
519     KernelNvlink *pKernelNvlink1 = NULL;
520     NvU32         gpuMask        = 0;
521     NvU32         gpuInstance    = 0;
522     NV_STATUS     status         = NV_OK;
523 
524     gpumgrGetGpuAttachInfo(NULL, &gpuMask);
525 
526     // Clear peer link masks
527     while ((pGpu1 = gpumgrGetNextGpu(gpuMask, &gpuInstance)) != NULL)
528     {
529         if (pGpu1 == pGpu)
530             continue;
531 
532         pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1);
533         if (pKernelNvlink1 == NULL)
534         {
535             continue;
536         }
537 
538         // Set peerLinkMask from pGpu1 to pGpu to 0
539         status = knvlinkSetLinkMaskToPeer(pGpu1, pKernelNvlink1, pGpu, 0);
540         if (status != NV_OK)
541             return status;
542     }
543 
544     if (pKernelNvlink->pNvlinkDev)
545     {
546         nvlink_device *dev = pKernelNvlink->pNvlinkDev;
547         nvlink_lib_unregister_device(dev);
548         portMemFree(dev);
549     }
550 
551     // Update the RM cache of the core lib device
552     pKernelNvlink->pNvlinkDev = NULL;
553 
554     if (pKernelNvlink->driverName)
555         portMemFree(pKernelNvlink->driverName);
556 
557     if (pKernelNvlink->deviceName)
558         portMemFree(pKernelNvlink->deviceName);
559 
560 #endif
561 
562     return NV_OK;
563 }
564 
565 /*!
566  * @brief Return true if a GPU is connected to an NVSwitch. For now, to keep SW
567  * simple we make sure that all enabled links are connected to NVSwitch. Note,
568  * on NVSwitch systems currently there is no POR for GPU<->GPU direct peers,
569  * everything gets routed through NVSwitch.
570  *
571  * @param[in]  pGpu          OBJGPU pointer
572  * @param[in]  pKernelNvlink KernelNvlink  pointer
573  */
574 NvBool
knvlinkIsGpuConnectedToNvswitch_IMPL(OBJGPU * pGpu,KernelNvlink * pKernelNvlink)575 knvlinkIsGpuConnectedToNvswitch_IMPL
576 (
577     OBJGPU        *pGpu,
578     KernelNvlink  *pKernelNvlink
579 )
580 {
581     NvBool bConnected = NV_FALSE;
582 
583 #if defined(INCLUDE_NVLINK_LIB)
584 
585     NvU32 i;
586     KNVLINK_CONN_INFO remoteEndInfo;
587 
588     FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink->enabledLinks)
589     {
590         remoteEndInfo = pKernelNvlink->nvlinkLinks[i].remoteEndInfo;
591 
592         if (remoteEndInfo.bConnected &&
593             remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_NVSWITCH)
594         {
595             bConnected = NV_TRUE;
596         }
597         else if (remoteEndInfo.bConnected)
598         {
599             // We don't support a mix of direct connect and switch links
600             return NV_FALSE;
601         }
602     }
603     FOR_EACH_INDEX_IN_MASK_END;
604 
605 #endif
606 
607     return bConnected;
608 }
609 
610 /*!
611  * @brief Return true if a link is connected.
612  *
613  * @param[in]  pGpu          OBJGPU pointer
614  * @param[in]  pKernelNvlink KernelNvlink  pointer
615  * @param[in]  linkId        Link identifier
616  */
617 NvBool
knvlinkIsLinkConnected_IMPL(OBJGPU * pGpu,KernelNvlink * pKernelNvlink,NvU32 linkId)618 knvlinkIsLinkConnected_IMPL
619 (
620     OBJGPU        *pGpu,
621     KernelNvlink  *pKernelNvlink,
622     NvU32          linkId
623 )
624 {
625 #if defined(INCLUDE_NVLINK_LIB)
626 
627     //
628     // For forced configs, we might not have connectivity information,
629     // return true
630     //
631     if (knvlinkIsForcedConfig(pGpu, pKernelNvlink) ||
632         pKernelNvlink->pLinkConnection)
633     {
634         return NV_TRUE;
635     }
636 
637     if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected)
638     {
639         return NV_TRUE;
640     }
641 
642 #endif
643 
644     return NV_FALSE;
645 }
646 
647 #if defined(INCLUDE_NVLINK_LIB)
648 
649 /*!
650  * @brief Update the UUID for the remote device
651  *
652  * @param[in]  pGpu          OBJGPU pointer
653  * @param[in]  pKernelNvlink KernelNvlink  pointer
654  */
655 static void
_knvlinkUpdateRemoteEndUuidInfo(OBJGPU * pGpu,KernelNvlink * pKernelNvlink)656 _knvlinkUpdateRemoteEndUuidInfo
657 (
658     OBJGPU        *pGpu,
659     KernelNvlink  *pKernelNvlink
660 )
661 {
662     OBJGPU       *pGpu1          = NULL;
663     KernelNvlink *pKernelNvlink1 = NULL;
664     unsigned      remoteLinkId;
665     unsigned      i, j;
666 
667     FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink->enabledLinks)
668     {
669         if (pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bConnected &&
670             (pKernelNvlink->nvlinkLinks[i].remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_GPU))
671         {
672             pGpu1 = NULL;
673 
674             // Get the remote OBJGPU and Nvlink
675             for (j = 0; j < NV_MAX_DEVICES; j++)
676             {
677                 pGpu1 = gpumgrGetGpu(j);
678 
679                 if (pGpu1 &&
680                     (pKernelNvlink->nvlinkLinks[i].remoteEndInfo.domain   == gpuGetDomain(pGpu1) &&
681                      pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bus      == gpuGetBus(pGpu1)    &&
682                      pKernelNvlink->nvlinkLinks[i].remoteEndInfo.device   == gpuGetDevice(pGpu1) &&
683                      pKernelNvlink->nvlinkLinks[i].remoteEndInfo.function == 0))
684                 {
685                     pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1);
686 
687                     if (pKernelNvlink1)
688                     {
689                         remoteLinkId = pKernelNvlink->nvlinkLinks[i].remoteEndInfo.linkNumber;
690 
691                         // Set the PCI information for remote end
692                         portMemCopy(pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.devUuid,
693                                     NV_UUID_LEN,
694                                     &pGpu->gpuUuid.uuid[0],
695                                     NV_UUID_LEN);
696                     }
697                     break;
698                 }
699             }
700         }
701     }
702     FOR_EACH_INDEX_IN_MASK_END;
703 }
704 
705 #endif
706