1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "gpu/gpu.h" 25 #include "gpu/subdevice/subdevice.h" 26 #include "vgpu/rpc.h" 27 28 #include "kernel/gpu/nvlink/kernel_nvlink.h" 29 #include "kernel/gpu/nvlink/kernel_ioctrl.h" 30 #include "kernel/gpu/nvlink/common_nvlink.h" 31 32 typedef struct 33 { 34 NvU32 laneRxdetStatusMask; 35 NvBool bConnected; 36 NvU32 remoteLinkNumber; 37 NvU64 remoteDeviceType; 38 NvU64 remoteChipSid; 39 NvU32 remoteDomain; 40 NvU8 remoteBus; 41 NvU8 remoteDevice; 42 NvU8 remoteFunction; 43 NvU32 remotePciDeviceId; 44 NvBool bLoopbackSupported; 45 } NvlinkLinkStatus; 46 47 // 48 // subdeviceCtrlCmdBusGetNvlinkCaps 49 // Get the Nvlink global capabilities 50 // 51 NV_STATUS 52 subdeviceCtrlCmdBusGetNvlinkCaps_IMPL 53 ( 54 Subdevice *pSubdevice, 55 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_CAPS_PARAMS *pParams 56 ) 57 { 58 OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice); 59 60 return nvlinkCtrlCmdBusGetNvlinkCaps(pGpu, pParams); 61 } 62 63 static void _calculateNvlinkCaps 64 ( 65 OBJGPU *pGpu, 66 NvU32 bridgeSensableLinks, 67 NvU32 bridgedLinks, 68 NvU32 ipVerNvlink, 69 NvBool bMIGNvLinkP2PSupported, 70 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_CAPS_PARAMS *pParams 71 ) 72 { 73 NvU8 tempCaps[NV2080_CTRL_NVLINK_CAPS_TBL_SIZE]; 74 portMemSet(tempCaps, 0, NV2080_CTRL_NVLINK_CAPS_TBL_SIZE); 75 76 // With MIG memory partitioning, NvLink P2P or sysmem accesses are not allowed 77 if (bMIGNvLinkP2PSupported) 78 { 79 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _SUPPORTED); 80 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _SYSMEM_ACCESS); 81 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _P2P_SUPPORTED); 82 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _P2P_ATOMICS); 83 84 // 85 // This GPU supports SLI bridge sensing if any of the links 86 // support bridge sensing. 87 // 88 if (bridgeSensableLinks != 0) 89 { 90 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _SLI_BRIDGE_SENSABLE); 91 } 92 93 // This GPU has an SLI bridge if any of the links are bridged 94 if (bridgedLinks != 0) 95 { 96 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _SLI_BRIDGE); 97 } 98 99 // NVLink versions beyond the first support sysmem atomics 100 if (ipVerNvlink != NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_1_0) 101 { 102 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _SYSMEM_ATOMICS); 103 } 104 } 105 106 switch (ipVerNvlink) 107 { 108 case NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_4_0: 109 { 110 pParams->lowestNvlinkVersion = NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_4_0; 111 pParams->highestNvlinkVersion = NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_4_0; 112 pParams->lowestNciVersion = NV2080_CTRL_NVLINK_CAPS_NCI_VERSION_4_0; 113 pParams->highestNciVersion = NV2080_CTRL_NVLINK_CAPS_NCI_VERSION_4_0; 114 115 // Supported power states 116 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _POWER_STATE_L0); 117 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _POWER_STATE_L1); 118 break; 119 } 120 case NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_3_1: 121 { 122 pParams->lowestNvlinkVersion = NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_3_1; 123 pParams->highestNvlinkVersion = NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_3_1; 124 pParams->lowestNciVersion = NV2080_CTRL_NVLINK_CAPS_NCI_VERSION_3_1; 125 pParams->highestNciVersion = NV2080_CTRL_NVLINK_CAPS_NCI_VERSION_3_1; 126 127 // Supported power states 128 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _POWER_STATE_L0); 129 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _POWER_STATE_L2); 130 break; 131 } 132 case NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_3_0: 133 { 134 pParams->lowestNvlinkVersion = NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_3_0; 135 pParams->highestNvlinkVersion = NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_3_0; 136 pParams->lowestNciVersion = NV2080_CTRL_NVLINK_CAPS_NCI_VERSION_3_0; 137 pParams->highestNciVersion = NV2080_CTRL_NVLINK_CAPS_NCI_VERSION_3_0; 138 139 // Supported power states 140 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _POWER_STATE_L0); 141 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _POWER_STATE_L2); 142 break; 143 } 144 case NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_2_2: 145 { 146 pParams->lowestNvlinkVersion = NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_2_2; 147 pParams->highestNvlinkVersion = NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_2_2; 148 pParams->lowestNciVersion = NV2080_CTRL_NVLINK_CAPS_NCI_VERSION_2_2; 149 pParams->highestNciVersion = NV2080_CTRL_NVLINK_CAPS_NCI_VERSION_2_2; 150 151 // Supported power states 152 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _POWER_STATE_L0); 153 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _POWER_STATE_L2); 154 break; 155 } 156 case NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_2_0: 157 { 158 pParams->lowestNvlinkVersion = NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_2_0; 159 pParams->highestNvlinkVersion = NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_2_0; 160 pParams->lowestNciVersion = NV2080_CTRL_NVLINK_CAPS_NCI_VERSION_2_0; 161 pParams->highestNciVersion = NV2080_CTRL_NVLINK_CAPS_NCI_VERSION_2_0; 162 163 // Supported power states 164 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _POWER_STATE_L0); 165 break; 166 } 167 default: 168 { 169 pParams->lowestNvlinkVersion = NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_1_0; 170 pParams->highestNvlinkVersion = NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_1_0; 171 pParams->lowestNciVersion = NV2080_CTRL_NVLINK_CAPS_NCI_VERSION_1_0; 172 pParams->highestNciVersion = NV2080_CTRL_NVLINK_CAPS_NCI_VERSION_1_0; 173 174 // Supported power states 175 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _POWER_STATE_L0); 176 break; 177 } 178 } 179 180 portMemCopy(&pParams->capsTbl, NV2080_CTRL_NVLINK_CAPS_TBL_SIZE, tempCaps, NV2080_CTRL_NVLINK_CAPS_TBL_SIZE); 181 } 182 183 // 184 // knvlinkCtrlCmdBusGetNvlinkCaps 185 // Inner function of subdeviceCtrlCmdBusGetNvlinkCaps for internal RM direct function call 186 // Get the Nvlink global capabilities 187 // 188 NV_STATUS 189 nvlinkCtrlCmdBusGetNvlinkCaps 190 ( 191 OBJGPU *pGpu, 192 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_CAPS_PARAMS *pParams 193 ) 194 { 195 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 196 NvBool bMIGNvLinkP2PSupported = ((pKernelMIGManager != NULL) && 197 kmigmgrIsMIGNvlinkP2PSupported(pGpu, pKernelMIGManager)); 198 // 199 // vGPU: 200 // 201 // Since vGPU does all real hardware management in the 202 // host, if we are in guest OS (where IS_VIRTUAL(pGpu) is true), 203 // do an RPC to the host to get blacklist information from host RM 204 // 205 if (IS_VIRTUAL(pGpu)) 206 { 207 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 208 RmCtrlParams *pRmCtrlParams = pCallContext->pControlParams; 209 NV_STATUS status = NV_OK; 210 211 NV_RM_RPC_CONTROL(pGpu, pRmCtrlParams->hClient, pRmCtrlParams->hObject, pRmCtrlParams->cmd, 212 pRmCtrlParams->pParams, pRmCtrlParams->paramsSize, status); 213 return status; 214 } 215 216 // Initialize link masks to 0 217 pParams->enabledLinkMask = 0; 218 pParams->discoveredLinkMask = 0; 219 220 if (IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu) && RMCFG_FEATURE_PLATFORM_GSP) 221 { 222 return NV_ERR_NOT_SUPPORTED; 223 } 224 else 225 { 226 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 227 if (pKernelNvlink == NULL) 228 { 229 NV_PRINTF(LEVEL_INFO, "Kernel NVLink is unavailable. Returning.\n"); 230 return NV_OK; 231 } 232 // With MIG memory partitioning, NvLink P2P or sysmem accesses are not allowed 233 if (bMIGNvLinkP2PSupported) 234 { 235 // 236 // Some links might have passed receiver detect (bridge is present), 237 // but might have failed to transition to safe mode (marginal links) 238 // Update connectedLinks and bridgedLinks mask for these links 239 // 240 knvlinkFilterBridgeLinks_HAL(pGpu, pKernelNvlink); 241 } 242 _calculateNvlinkCaps(pGpu, pKernelNvlink->bridgeSensableLinks, pKernelNvlink->bridgedLinks, pKernelNvlink->ipVerNvlink, bMIGNvLinkP2PSupported, pParams); 243 244 pParams->discoveredLinkMask = knvlinkGetDiscoveredLinkMask(pGpu, pKernelNvlink); 245 pParams->enabledLinkMask = knvlinkGetEnabledLinkMask(pGpu, pKernelNvlink); 246 } 247 248 return NV_OK; 249 } 250 251 void 252 static _getNvlinkStatus 253 ( 254 OBJGPU *pGpu, 255 NV2080_CTRL_NVLINK_GET_LINK_AND_CLOCK_INFO_PARAMS *nvlinkLinkAndClockInfoParams, 256 NvU32 bridgeSensableLinks, 257 NvU32 bridgedLinks, 258 NvU32 ipVerNvlink, 259 NvlinkLinkStatus nvlinkLinks[NVLINK_MAX_LINKS_SW], 260 NvBool bNvlinkEnabled, 261 NvBool bL2PowerStateEnabled, 262 NvBool bForcedConfig, 263 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *pParams 264 ) 265 { 266 NvU8 i = 0; 267 NvU8 tempCaps[NV2080_CTRL_NVLINK_CAPS_TBL_SIZE]; 268 NvU32 r = 0; 269 OBJGPU *remotePeer0 = NULL; 270 NvBool bPeerLink, bSysmemLink, bSwitchLink; 271 272 r = pParams->enabledLinkMask; 273 while (r >>= 1 ) i++; 274 275 NV_ASSERT(i <= NV2080_CTRL_NVLINK_MAX_LINKS); 276 277 FOR_EACH_INDEX_IN_MASK(32, i, pParams->enabledLinkMask) 278 { 279 bPeerLink = NV_FALSE; 280 bSysmemLink = NV_FALSE; 281 bSwitchLink = NV_FALSE; 282 NV2080_CTRL_NVLINK_GET_LINK_AND_CLOCK_VALUES *pLinkAndClockValues; 283 284 pLinkAndClockValues = &nvlinkLinkAndClockInfoParams->linkInfo[i]; 285 286 portMemSet(tempCaps, 0, NV2080_CTRL_NVLINK_CAPS_TBL_SIZE); 287 288 if (bNvlinkEnabled) 289 { 290 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _VALID); 291 } 292 293 if (pLinkAndClockValues->bLinkConnectedToSystem) 294 { 295 // Tag as a Sysmem link 296 bSysmemLink = NV_TRUE; 297 298 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _SYSMEM_ACCESS); 299 300 // NVLink versions beyond the first support sysmem atomics 301 if (ipVerNvlink != NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_1_0 ) 302 { 303 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _SYSMEM_ATOMICS); 304 } 305 } 306 307 if (nvlinkLinks[i].bConnected) 308 { 309 // Tag as Peer link 310 bPeerLink = NV_TRUE; 311 312 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _P2P_SUPPORTED); 313 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _P2P_ATOMICS); 314 } 315 316 // Indicate per-link bridge sense status 317 if (bridgeSensableLinks & NVBIT(i)) 318 { 319 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _SLI_BRIDGE_SENSABLE); 320 } 321 322 // Indicate per-link bridge status 323 if (bridgedLinks & NVBIT(i)) 324 { 325 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _SLI_BRIDGE); 326 } 327 328 // Set the power states caps 329 switch (ipVerNvlink) 330 { 331 case NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_2_0: 332 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _POWER_STATE_L0); 333 break; 334 335 case NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_4_0: 336 case NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_3_1: 337 case NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_3_0: 338 case NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_2_2: 339 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _POWER_STATE_L0); 340 if (bL2PowerStateEnabled) 341 { 342 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _POWER_STATE_L2); 343 } 344 break; 345 346 default: 347 RMCTRL_SET_CAP(tempCaps, NV2080_CTRL_NVLINK_CAPS, _POWER_STATE_L0); 348 break; 349 } 350 351 portMemCopy(&pParams->linkInfo[i].capsTbl, NV2080_CTRL_NVLINK_CAPS_TBL_SIZE, tempCaps, NV2080_CTRL_NVLINK_CAPS_TBL_SIZE); 352 353 pParams->linkInfo[i].phyType = NV2080_CTRL_NVLINK_STATUS_PHY_NVHS; 354 pParams->linkInfo[i].subLinkWidth = pLinkAndClockValues->subLinkWidth; 355 pParams->linkInfo[i].linkState = pLinkAndClockValues->linkState; 356 pParams->linkInfo[i].txSublinkStatus = (NvU8) pLinkAndClockValues->txSublinkState; 357 pParams->linkInfo[i].rxSublinkStatus = (NvU8) pLinkAndClockValues->rxSublinkState; 358 359 // Initialize the lane reversal state information for the link 360 pParams->linkInfo[i].bLaneReversal = pLinkAndClockValues->bLaneReversal; 361 362 switch (ipVerNvlink) 363 { 364 case NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_4_0: 365 pParams->linkInfo[i].nvlinkVersion = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_4_0; 366 pParams->linkInfo[i].nciVersion = NV2080_CTRL_NVLINK_STATUS_NCI_VERSION_4_0; 367 break; 368 case NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_3_1: 369 pParams->linkInfo[i].nvlinkVersion = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_3_1; 370 pParams->linkInfo[i].nciVersion = NV2080_CTRL_NVLINK_STATUS_NCI_VERSION_3_1; 371 break; 372 case NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_3_0: 373 pParams->linkInfo[i].nvlinkVersion = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_3_0; 374 pParams->linkInfo[i].nciVersion = NV2080_CTRL_NVLINK_STATUS_NCI_VERSION_3_0; 375 break; 376 case NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_2_2: 377 pParams->linkInfo[i].nvlinkVersion = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_2_2; 378 pParams->linkInfo[i].nciVersion = NV2080_CTRL_NVLINK_STATUS_NCI_VERSION_2_2; 379 break; 380 case NV2080_CTRL_NVLINK_CAPS_NVLINK_VERSION_2_0: 381 pParams->linkInfo[i].nvlinkVersion = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_2_0; 382 pParams->linkInfo[i].nciVersion = NV2080_CTRL_NVLINK_STATUS_NCI_VERSION_2_0; 383 break; 384 default: 385 pParams->linkInfo[i].nvlinkVersion = NV2080_CTRL_NVLINK_STATUS_NVLINK_VERSION_1_0; 386 pParams->linkInfo[i].nciVersion = NV2080_CTRL_NVLINK_STATUS_NCI_VERSION_1_0; 387 break; 388 } 389 pParams->linkInfo[i].phyVersion = NV2080_CTRL_NVLINK_STATUS_NVHS_VERSION_1_0; 390 391 // Initialize the connection information for the link 392 pParams->linkInfo[i].connected = NV2080_CTRL_NVLINK_STATUS_CONNECTED_FALSE; 393 pParams->linkInfo[i].remoteDeviceLinkNumber = NV2080_CTRL_NVLINK_STATUS_REMOTE_LINK_NUMBER_INVALID; 394 pParams->linkInfo[i].remoteDeviceInfo.deviceType = NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_NONE; 395 pParams->linkInfo[i].localDeviceLinkNumber = i; 396 pParams->linkInfo[i].laneRxdetStatusMask = nvlinkLinks[i].laneRxdetStatusMask; 397 398 // Set the device information for the local end of the link 399 pParams->linkInfo[i].localDeviceInfo.domain = gpuGetDomain(pGpu); 400 pParams->linkInfo[i].localDeviceInfo.bus = gpuGetBus(pGpu); 401 pParams->linkInfo[i].localDeviceInfo.device = gpuGetDevice(pGpu); 402 pParams->linkInfo[i].localDeviceInfo.function = 0; 403 pParams->linkInfo[i].localDeviceInfo.pciDeviceId = pGpu->idInfo.PCIDeviceID; 404 pParams->linkInfo[i].localDeviceInfo.deviceType = NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_GPU; 405 406 // Record the local end of the link's deviceIdFlags 407 if(pGpu->idInfo.PCIDeviceID) 408 { 409 pParams->linkInfo[i].localDeviceInfo.deviceIdFlags = 410 FLD_SET_DRF(2080_CTRL_NVLINK, _DEVICE_INFO, _DEVICE_ID_FLAGS, _PCI, 411 pParams->linkInfo[i].localDeviceInfo.deviceIdFlags); 412 } 413 414 // 415 // Get clock related state 416 // NOTE: to be depricated HW terminology conforming versions 417 // 418 pParams->linkInfo[i].nvlinkLinkClockKHz = pLinkAndClockValues->nvlinkLinkClockKHz; 419 pParams->linkInfo[i].nvlinkRefClkSpeedKHz = nvlinkLinkAndClockInfoParams->nvlinkRefClkSpeedKHz; 420 pParams->linkInfo[i].nvlinkCommonClockSpeedKHz = pParams->linkInfo[i].nvlinkLinkClockKHz / 16; 421 422 pParams->linkInfo[i].nvlinkCommonClockSpeedMhz = pParams->linkInfo[i].nvlinkCommonClockSpeedKHz / 1000; 423 424 // Clock speed and Data rate info conforming with HW terminology 425 pParams->linkInfo[i].nvlinkLineRateMbps = pLinkAndClockValues->nvlinkLineRateMbps; 426 pParams->linkInfo[i].nvlinkLinkClockMhz = pLinkAndClockValues->nvlinkLinkClockMhz; 427 pParams->linkInfo[i].nvlinkLinkDataRateKiBps = pLinkAndClockValues->nvlinkLinkDataRateKiBps; 428 pParams->linkInfo[i].nvlinkRefClkType = pLinkAndClockValues->nvlinkRefClkType; 429 pParams->linkInfo[i].nvlinkRefClkSpeedMhz = pLinkAndClockValues->nvlinkReqLinkClockMhz; 430 431 if (nvlinkLinks[i].bConnected) 432 { 433 pParams->linkInfo[i].connected = NV2080_CTRL_NVLINK_STATUS_CONNECTED_TRUE; 434 pParams->linkInfo[i].remoteDeviceLinkNumber = (NvU8) nvlinkLinks[i].remoteLinkNumber; 435 pParams->linkInfo[i].remoteLinkSid = nvlinkLinks[i].remoteChipSid; 436 437 // Set the device information for the remote end of the link 438 pParams->linkInfo[i].remoteDeviceInfo.domain = nvlinkLinks[i].remoteDomain; 439 pParams->linkInfo[i].remoteDeviceInfo.bus = nvlinkLinks[i].remoteBus; 440 pParams->linkInfo[i].remoteDeviceInfo.device = nvlinkLinks[i].remoteDevice; 441 pParams->linkInfo[i].remoteDeviceInfo.function = nvlinkLinks[i].remoteFunction; 442 pParams->linkInfo[i].remoteDeviceInfo.pciDeviceId = nvlinkLinks[i].remotePciDeviceId; 443 pParams->linkInfo[i].remoteDeviceInfo.deviceType = nvlinkLinks[i].remoteDeviceType; 444 445 // Update the device Id flags for PCI 446 if (nvlinkLinks[i].remotePciDeviceId) 447 { 448 pParams->linkInfo[i].remoteDeviceInfo.deviceIdFlags |= 449 FLD_SET_DRF(2080_CTRL_NVLINK, _DEVICE_INFO, _DEVICE_ID_FLAGS, _PCI, 450 pParams->linkInfo[i].remoteDeviceInfo.deviceIdFlags); 451 } 452 453 // Check the PCI dbdf values to confirm the device on remote end 454 if (NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_ID_FLAGS_PCI & pParams->linkInfo[i].remoteDeviceInfo.deviceIdFlags) 455 { 456 if (!nvlinkLinks[i].bLoopbackSupported) 457 { 458 pParams->linkInfo[i].loopProperty = NV2080_CTRL_NVLINK_STATUS_LOOP_PROPERTY_NONE; 459 continue; 460 } 461 } 462 463 pParams->linkInfo[i].loopProperty = pParams->linkInfo[i].remoteDeviceLinkNumber == i ? 464 NV2080_CTRL_NVLINK_STATUS_LOOP_PROPERTY_LOOPBACK : 465 NV2080_CTRL_NVLINK_STATUS_LOOP_PROPERTY_LOOPOUT; 466 } 467 468 if (!(IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu) && RMCFG_FEATURE_PLATFORM_GSP)) 469 { 470 // Per-link ForceConfig handling (non-legacy Arch ForceConfig only) 471 if (bForcedConfig) 472 { 473 if (!pGpu->getProperty(pGpu, PDB_PROP_GPU_EMULATION)) 474 { 475 pParams->linkInfo[i].linkState = NV2080_CTRL_NVLINK_STATUS_LINK_STATE_ACTIVE; 476 pParams->linkInfo[i].rxSublinkStatus = NV2080_CTRL_NVLINK_STATUS_SUBLINK_RX_STATE_HIGH_SPEED_1; 477 pParams->linkInfo[i].txSublinkStatus = NV2080_CTRL_NVLINK_STATUS_SUBLINK_TX_STATE_HIGH_SPEED_1; 478 } 479 480 pParams->linkInfo[i].connected = NV_TRUE; 481 pParams->linkInfo[i].loopProperty = NV_FALSE; 482 pParams->linkInfo[i].remoteDeviceLinkNumber = i; 483 if (!pParams->linkInfo[i].nvlinkLinkClockMhz) 484 pParams->linkInfo[i].nvlinkLinkClockMhz = pLinkAndClockValues->nvlinkReqLinkClockMhz; 485 486 // Expose remote device as EBRIDGE if forced only sysmem 487 if (bSysmemLink && !bPeerLink && !bSwitchLink) 488 { 489 pParams->linkInfo[i].remoteDeviceInfo.domain = 0; 490 pParams->linkInfo[i].remoteDeviceInfo.bus = FORCED_SYSMEM_PCI_BUS; 491 pParams->linkInfo[i].remoteDeviceInfo.device = 0; 492 pParams->linkInfo[i].remoteDeviceInfo.function = i; 493 pParams->linkInfo[i].remoteDeviceInfo.pciDeviceId = FORCED_SYSMEM_DEVICE_ID; 494 pParams->linkInfo[i].remoteDeviceInfo.deviceType = FORCED_SYSMEM_DEVICE_TYPE; 495 496 pParams->linkInfo[i].remoteDeviceInfo.deviceIdFlags |= 497 FLD_SET_DRF(2080_CTRL_NVLINK, _DEVICE_INFO, _DEVICE_ID_FLAGS, _PCI, 498 pParams->linkInfo[i].remoteDeviceInfo.deviceIdFlags); 499 } 500 501 // Expose remote device as GPU if forced only peer 502 if (bPeerLink && !bSysmemLink && !bSwitchLink) 503 { 504 remotePeer0 = gpumgrGetGpu(pGpu->gpuInstance == 0 ? 1 : 0); 505 if (NULL == remotePeer0) 506 { 507 remotePeer0 = pGpu; 508 } 509 510 // 511 // Ensure the remote is actually a GPU that supports NVLink. 512 // If it is not, we should stick with the current GPU as 513 // this is likely a loopback config. See Bug 1786206. 514 // 515 if (remotePeer0 != pGpu) 516 { 517 KernelNvlink *pRemoteKernelNvlink = GPU_GET_KERNEL_NVLINK(remotePeer0); 518 if (pRemoteKernelNvlink) 519 { 520 if (pRemoteKernelNvlink->discoveredLinks == 0) 521 { 522 // There are no links on this remote, fall back to loopback. 523 remotePeer0 = pGpu; 524 } 525 } 526 else 527 { 528 // NVLink not present on this remote, fall back to loopback. 529 remotePeer0 = pGpu; 530 } 531 } 532 533 pParams->linkInfo[i].remoteDeviceInfo.domain = gpuGetDomain(remotePeer0); 534 pParams->linkInfo[i].remoteDeviceInfo.bus = gpuGetBus(remotePeer0); 535 pParams->linkInfo[i].remoteDeviceInfo.device = gpuGetDevice(remotePeer0); 536 pParams->linkInfo[i].remoteDeviceInfo.function = 0; 537 pParams->linkInfo[i].remoteDeviceInfo.pciDeviceId = remotePeer0->idInfo.PCIDeviceID; 538 pParams->linkInfo[i].remoteDeviceInfo.deviceType = NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_GPU; 539 540 // This config is either in loopback or real 1/1 P2P, nothing else. 541 if (gpuGetDBDF(remotePeer0) == gpuGetDBDF(pGpu)) 542 { 543 pParams->linkInfo[i].loopProperty = NV2080_CTRL_NVLINK_STATUS_LOOP_PROPERTY_LOOPBACK; 544 } 545 546 pParams->linkInfo[i].remoteDeviceInfo.deviceIdFlags |= 547 FLD_SET_DRF(2080_CTRL_NVLINK, _DEVICE_INFO, _DEVICE_ID_FLAGS, _PCI, 548 pParams->linkInfo[i].remoteDeviceInfo.deviceIdFlags); 549 } 550 551 // 552 // Expose remote device as Switch if requested 553 // Requested can be either forced sysmem and peer or 554 // if either and requested as switch 555 // 556 if ( (bSysmemLink && bPeerLink) || 557 ((bSysmemLink || bPeerLink) && bSwitchLink)) 558 { 559 pParams->linkInfo[i].remoteDeviceInfo.domain = 0; 560 pParams->linkInfo[i].remoteDeviceInfo.bus = FORCED_SWITCH_PCI_BUS; 561 pParams->linkInfo[i].remoteDeviceInfo.device = 0; 562 pParams->linkInfo[i].remoteDeviceInfo.function = i; 563 pParams->linkInfo[i].remoteDeviceInfo.pciDeviceId = FORCED_SWITCH_DEVICE_ID; 564 pParams->linkInfo[i].remoteDeviceInfo.deviceType = NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_SWITCH; 565 566 pParams->linkInfo[i].remoteDeviceInfo.deviceIdFlags |= 567 FLD_SET_DRF(2080_CTRL_NVLINK, _DEVICE_INFO, _DEVICE_ID_FLAGS, _PCI, 568 pParams->linkInfo[i].remoteDeviceInfo.deviceIdFlags); 569 } 570 } 571 } 572 } 573 FOR_EACH_INDEX_IN_MASK_END; 574 } 575 576 // 577 // subdeviceCtrlCmdBusGetNvlinkStatus 578 // Get the Nvlink per link capabilities 579 // 580 NV_STATUS 581 subdeviceCtrlCmdBusGetNvlinkStatus_IMPL 582 ( 583 Subdevice *pSubdevice, 584 NV2080_CTRL_CMD_NVLINK_GET_NVLINK_STATUS_PARAMS *pParams 585 ) 586 { 587 OBJGPU *pGpu = GPU_RES_GET_GPU(pSubdevice); 588 KernelMIGManager *pKernelMIGManager = GPU_GET_KERNEL_MIG_MANAGER(pGpu); 589 NvBool bMIGNvLinkP2PSupported = ((pKernelMIGManager != NULL) && 590 kmigmgrIsMIGNvlinkP2PSupported(pGpu, pKernelMIGManager)); 591 NV_STATUS status = NV_OK; 592 NvU8 i = 0; 593 struct 594 { 595 NV2080_CTRL_NVLINK_GET_LINK_AND_CLOCK_INFO_PARAMS nvlinkLinkAndClockInfoParams; 596 NvlinkLinkStatus nvlinkLinks[NVLINK_MAX_LINKS_SW]; 597 } *pTmpData = NULL; 598 599 // 600 // vGPU: 601 // 602 // Since vGPU does all real hardware management in the 603 // host, if we are in guest OS (where IS_VIRTUAL(pGpu) is true), 604 // do an RPC to the host to get blacklist information from host RM 605 // 606 if (IS_VIRTUAL(pGpu)) 607 { 608 // RPC for this RmCtrl was implemented as an effort of enabling NVLINK P2P 609 // on vGPU. As NVLINK P2P is supported Pascal+ onwards, we return NOT_SUPPORTED 610 // pre-Pascal. 611 if (IsPASCALorBetter(pGpu)) 612 { 613 CALL_CONTEXT *pCallContext = resservGetTlsCallContext(); 614 RmCtrlParams *pRmCtrlParams = pCallContext->pControlParams; 615 616 NV_RM_RPC_CONTROL(pGpu, pRmCtrlParams->hClient, pRmCtrlParams->hObject, pRmCtrlParams->cmd, 617 pRmCtrlParams->pParams, pRmCtrlParams->paramsSize, status); 618 619 if (IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu)) 620 { 621 FOR_EACH_INDEX_IN_MASK(32, i, pParams->enabledLinkMask) 622 { 623 NV2080_CTRL_NVLINK_DEVICE_INFO *pDeviceInfo = &pParams->linkInfo[i].remoteDeviceInfo; 624 OBJGPU *pLoopGpu = gpumgrGetGpuFromUuid(pDeviceInfo->deviceUUID, 625 DRF_DEF(2080_GPU_CMD, _GPU_GET_GID_FLAGS, _TYPE, _SHA1) | 626 DRF_DEF(2080_GPU_CMD, _GPU_GET_GID_FLAGS, _FORMAT, _BINARY)); 627 628 // Clear output if no gpu on the other end 629 if (pLoopGpu == NULL) 630 { 631 portMemSet(&pParams->linkInfo[i], 0, sizeof(NV2080_CTRL_NVLINK_LINK_STATUS_INFO)); 632 } 633 else 634 { 635 pDeviceInfo->domain = (pLoopGpu->gpuId >> 16) & 0xffff; 636 pDeviceInfo->bus = (pLoopGpu->gpuId >> 8) & 0xff; 637 pDeviceInfo->device = pLoopGpu->gpuId & 0xff; 638 639 // Clear UUID 640 portMemSet(pDeviceInfo->deviceUUID, 0, sizeof(pDeviceInfo->deviceUUID)); 641 } 642 } 643 FOR_EACH_INDEX_IN_MASK_END; 644 } 645 646 return status; 647 } 648 else 649 { 650 return NV_ERR_NOT_SUPPORTED; 651 } 652 } 653 654 // Initialize link mask to 0 655 pParams->enabledLinkMask = 0; 656 657 if (!bMIGNvLinkP2PSupported) 658 { 659 NV_PRINTF(LEVEL_ERROR, "MIG NVLink P2P is not supported.\n"); 660 status = NV_OK; 661 return status; 662 } 663 664 pTmpData = portMemAllocNonPaged(sizeof(*pTmpData)); 665 666 if (pTmpData == NULL) 667 { 668 return NV_ERR_NO_MEMORY; 669 } 670 portMemSet(pTmpData, 0, sizeof(*pTmpData)); 671 672 if (IS_VGPU_GSP_PLUGIN_OFFLOAD_ENABLED(pGpu) && RMCFG_FEATURE_PLATFORM_GSP) 673 { 674 status = NV_ERR_NOT_SUPPORTED; 675 goto done; 676 } 677 else 678 { 679 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 680 681 if (pKernelNvlink == NULL) 682 { 683 NV_PRINTF(LEVEL_INFO, "Kernel NVLink is unavailable. Returning.\n"); 684 status = NV_OK; 685 goto done; 686 } 687 688 // Get the remote ends of the links from the nvlink core 689 if (!knvlinkIsForcedConfig(pGpu, pKernelNvlink) && 690 !(IS_RTLSIM(pGpu) && !pKernelNvlink->bForceEnableCoreLibRtlsims)) 691 { 692 // 693 // Get the nvlink connections for this device from the core 694 // If the function fails then the corelib doesn't have enough 695 // info to validate connectivity so we should mark the API call 696 // as not ready 697 // 698 status = knvlinkCoreGetRemoteDeviceInfo(pGpu, pKernelNvlink); 699 if (status != NV_OK) 700 { 701 NV_PRINTF(LEVEL_INFO, "Nvlink is not ready yet!\n"); 702 status = NV_ERR_NOT_READY; 703 goto done; 704 } 705 } 706 707 // 708 // Some links might have passed receiver detect (bridge is present), 709 // but might have failed to transition to safe mode (marginal links) 710 // Update connectedLinks and bridgedLinks mask for these links 711 // 712 knvlinkFilterBridgeLinks_HAL(pGpu, pKernelNvlink); 713 714 pParams->enabledLinkMask = pKernelNvlink->enabledLinks; 715 716 pTmpData->nvlinkLinkAndClockInfoParams.linkMask = pParams->enabledLinkMask; 717 pTmpData->nvlinkLinkAndClockInfoParams.bSublinkStateInst = pParams->bSublinkStateInst; 718 719 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 720 NV2080_CTRL_CMD_NVLINK_GET_LINK_AND_CLOCK_INFO, 721 (void *)&pTmpData->nvlinkLinkAndClockInfoParams, 722 sizeof(pTmpData->nvlinkLinkAndClockInfoParams)); 723 if (status != NV_OK) 724 { 725 NV_PRINTF(LEVEL_ERROR, "Failed to collect nvlink status info!\n"); 726 goto done; 727 } 728 729 FOR_EACH_INDEX_IN_MASK(32, i, pParams->enabledLinkMask) 730 { 731 pTmpData->nvlinkLinks[i].laneRxdetStatusMask = pKernelNvlink->nvlinkLinks[i].laneRxdetStatusMask; 732 733 #if defined(INCLUDE_NVLINK_LIB) 734 pTmpData->nvlinkLinks[i].bConnected = pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bConnected; 735 pTmpData->nvlinkLinks[i].remoteLinkNumber = pKernelNvlink->nvlinkLinks[i].remoteEndInfo.linkNumber; 736 pTmpData->nvlinkLinks[i].remoteDeviceType = pKernelNvlink->nvlinkLinks[i].remoteEndInfo.deviceType; 737 pTmpData->nvlinkLinks[i].remoteChipSid = pKernelNvlink->nvlinkLinks[i].remoteEndInfo.chipSid; 738 pTmpData->nvlinkLinks[i].remoteDomain = pKernelNvlink->nvlinkLinks[i].remoteEndInfo.domain; 739 pTmpData->nvlinkLinks[i].remoteBus = pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bus; 740 pTmpData->nvlinkLinks[i].remoteDevice = pKernelNvlink->nvlinkLinks[i].remoteEndInfo.device; 741 pTmpData->nvlinkLinks[i].remoteFunction = pKernelNvlink->nvlinkLinks[i].remoteEndInfo.function; 742 pTmpData->nvlinkLinks[i].remotePciDeviceId = pKernelNvlink->nvlinkLinks[i].remoteEndInfo.pciDeviceId; 743 pTmpData->nvlinkLinks[i].bLoopbackSupported = knvlinkIsP2pLoopbackSupportedPerLink(pGpu, pKernelNvlink, i); 744 745 if (pKernelNvlink->nvlinkLinks[i].core_link) 746 { 747 pParams->linkInfo[i].localLinkSid = pKernelNvlink->nvlinkLinks[i].core_link->localSid; 748 } 749 #endif 750 } 751 FOR_EACH_INDEX_IN_MASK_END; 752 753 _getNvlinkStatus(pGpu, 754 &pTmpData->nvlinkLinkAndClockInfoParams, 755 pKernelNvlink->bridgeSensableLinks, 756 pKernelNvlink->bridgedLinks, 757 pKernelNvlink->ipVerNvlink, 758 pTmpData->nvlinkLinks, 759 pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_ENABLED), 760 pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_L2_POWER_STATE_ENABLED), 761 knvlinkIsForcedConfig(pGpu, pKernelNvlink), 762 pParams); 763 } 764 done: 765 portMemFree(pTmpData); 766 767 return status; 768 } 769