1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #define NVOC_KERNEL_NVLINK_H_PRIVATE_ACCESS_ALLOWED 25 26 #include "os/os.h" 27 #include "core/hal.h" 28 #include "core/info_block.h" 29 #include "core/locks.h" 30 #include "core/thread_state.h" 31 #include "gpu/gpu.h" 32 33 #include "kernel/gpu/nvlink/kernel_nvlink.h" 34 #include "kernel/gpu/nvlink/kernel_ioctrl.h" 35 36 #include "nverror.h" 37 38 #if defined(INCLUDE_NVLINK_LIB) 39 #include "nvlink_os.h" 40 #endif 41 42 static void _knvlinkRetrainLinkPrologue(OBJGPU *, KernelNvlink *, NvU32); 43 44 #if defined(INCLUDE_NVLINK_LIB) 45 46 static NV_STATUS _knvlinkActivateDiscoveredConns(OBJGPU *, KernelNvlink *, NvBool); 47 static NV_STATUS _knvlinkActivateDiscoveredP2pConn(OBJGPU *, KernelNvlink *, NvU32); 48 static NV_STATUS _knvlinkActivateDiscoveredSwitchConn(OBJGPU *, KernelNvlink *, NvU32); 49 static NV_STATUS _knvlinkActivateDiscoveredSysmemConn(OBJGPU *, KernelNvlink *, NvU32); 50 static NV_STATUS _knvlinkEnterSleep(OBJGPU *, KernelNvlink *, NvU32); 51 static NV_STATUS _knvlinkExitSleep(OBJGPU *, KernelNvlink *, NvU32); 52 static NvBool _knvlinkUpdateSwitchLinkMasks(OBJGPU *, KernelNvlink *, NvU32); 53 static NvBool _knvlinkUpdateSwitchLinkMasksGpuDegraded(OBJGPU *, KernelNvlink *); 54 static void _knvlinkUpdatePeerConfigs(OBJGPU *, KernelNvlink *); 55 static void _knvlinkPrintTopologySummary(OBJGPU *, KernelNvlink *); 56 57 #endif 58 59 /*! 60 * @brief Get the nvlink connections for the GPU. 61 * This function calls into the core library to trigger topology discovery 62 * on the set of links that have been registered with the core library. 63 * 64 * @param[in] pGpu OBJGPU pointer 65 * @param[in] pKernelNvlink KernelNvlink pointer 66 * 67 * @return NV_OK on success 68 */ 69 NV_STATUS 70 knvlinkCoreGetRemoteDeviceInfo_IMPL 71 ( 72 OBJGPU *pGpu, 73 KernelNvlink *pKernelNvlink 74 ) 75 { 76 NV_STATUS status = NV_OK; 77 78 #if defined(INCLUDE_NVLINK_LIB) 79 80 OBJSYS *pSys = SYS_GET_INSTANCE(); 81 NvU32 flags = NVLINK_STATE_CHANGE_SYNC; 82 NvBool bNvswitchProxyPresent = NV_FALSE; 83 NvBool bUpdateConnStatus = NV_FALSE; 84 NvBool bCheckDegradedMode = NV_FALSE; 85 nvlink_conn_info conn_info = {0}; 86 NvU32 linkId; 87 NvU32 numActiveLinksPerIoctrl = 0; 88 NvU32 numLinksPerIoctrl = 0; 89 90 // 91 // Topology discovery should NOT be triggered in RTD3/FGC6 exit path if L2 is 92 // supported. The remote information will be restored when RM state is restored 93 // 94 if (!knvlinkPoweredUpForD3_HAL(pGpu, pKernelNvlink)) 95 { 96 // 97 // Optimization: Check for nvlink proxy only when system fabric is externally 98 // managed. This would avoid RPCs in non-nvswitch cases. 99 // 100 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 101 { 102 bNvswitchProxyPresent = knvlinkIsNvswitchProxyPresent(pGpu, pKernelNvlink); 103 } 104 105 // 106 // UpdatePostRxDetect has to happen only if there is a disconnected link 107 // 108 if (pKernelNvlink->disconnectedLinkMask && pKernelNvlink->bEnableAli) 109 { 110 // Update the post Rx Det link Mask for the GPU 111 knvlinkUpdatePostRxDetectLinkMask(pGpu, pKernelNvlink); 112 } 113 114 if (pKernelNvlink->ipVerNvlink >= NVLINK_VERSION_40 && 115 !bNvswitchProxyPresent && 116 !pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED) && 117 pKernelNvlink->pNvlinkDev != NULL && 118 !pKernelNvlink->bFloorSwept) 119 { 120 numLinksPerIoctrl = knvlinkGetTotalNumLinksPerIoctrl(pGpu, pKernelNvlink); 121 status = knvlinkFloorSweep(pGpu, pKernelNvlink, 122 numLinksPerIoctrl, &numActiveLinksPerIoctrl); 123 124 if (status != NV_OK) 125 { 126 NV_PRINTF(LEVEL_ERROR, "Failed to floorsweep valid nvlink config!\n"); 127 return NV_ERR_NOT_READY; 128 } 129 } 130 131 // We only need to look at links that are still considered disconnected 132 FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->disconnectedLinkMask) 133 { 134 // 135 // If we are using ALI training, make sure the 136 // disconneted link is a valid link that is progressing 137 // passed RxDet 138 // 139 if (pKernelNvlink->bEnableAli && 140 !(pKernelNvlink->postRxDetLinkMask & NVBIT(linkId))) 141 { 142 continue; 143 } 144 145 bUpdateConnStatus = NV_FALSE; 146 147 if (pKernelNvlink->nvlinkLinks[linkId].core_link) 148 { 149 // Call the core library to get the remote end information 150 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 151 { 152 if (gpuFabricProbeIsSupported(pGpu)) 153 { 154 // 155 // If FM doesn't talk to NVLink driver using control calls 156 // (i.e. uses NVLink inband comm instread) such as 157 // IOCTL CTRL_NVLINK_DISCOVER_INTRANODE_CONNS, 158 // discover remote information explicitly. 159 // 160 nvlink_lib_discover_and_get_remote_conn_info( 161 pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, flags); 162 } 163 else 164 { 165 nvlink_lib_get_remote_conn_info( 166 pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info); 167 } 168 169 // 170 // nvlink_lib_get_remote_conn_info could fail to return connection info if 171 // it runs on a shared-NVSwitch virtualization model (HGX-2) where GPU nodes 172 // can't see NVSwitches. In that case, examine the NVLink scratch register 173 // for connectivity information. 174 // 175 if (!conn_info.bConnected && 176 (bNvswitchProxyPresent || 177 GPU_IS_NVSWITCH_DETECTED(pGpu))) 178 { 179 conn_info.bConnected = NV_TRUE; 180 conn_info.deviceType = NVLINK_DEVICE_TYPE_NVSWITCH; 181 conn_info.pciDeviceId = NV_U32_MAX; 182 conn_info.domain = NV_U32_MAX; 183 conn_info.bus = NV_U16_MAX; 184 conn_info.device = NV_U16_MAX; 185 conn_info.function = NV_U16_MAX; 186 } 187 188 // 189 // New connection is discovered from core library. But we don't know if this 190 // connection was shutdown or reset by fabric manager while enabling degraded 191 // mode. So, we have to check for degraded mode. 192 // 193 if (conn_info.bConnected) 194 { 195 bCheckDegradedMode = NV_TRUE; 196 } 197 } 198 else 199 { 200 // Aynchronous link initialization for IP 2.2 201 if (pKernelNvlink->ipVerNvlink == NVLINK_VERSION_22) 202 { 203 flags = NVLINK_STATE_CHANGE_ASYNC; 204 } 205 206 nvlink_lib_discover_and_get_remote_conn_info( 207 pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, flags); 208 } 209 210 // RPC into GSP-RM to update the link connected status only if its required 211 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected != conn_info.bConnected) 212 bUpdateConnStatus = NV_TRUE; 213 214 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected = conn_info.bConnected; 215 216 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected) 217 { 218 // Update the RM cache for the remote device information for the link 219 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain = conn_info.domain; 220 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus = conn_info.bus; 221 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device = conn_info.device; 222 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.function = conn_info.function; 223 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.pciDeviceId = conn_info.pciDeviceId; 224 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType = conn_info.deviceType; 225 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.linkNumber = conn_info.linkNumber; 226 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.chipSid = conn_info.chipSid; 227 228 nvlink_memcpy(pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.devUuid, 229 conn_info.devUuid, 230 NV_UUID_LEN); 231 } 232 233 if (bUpdateConnStatus) 234 { 235 // RPC into GSP-RM to update the link remote connection status for pGpu 236 status = knvlinkUpdateLinkConnectionStatus(pGpu, pKernelNvlink, linkId); 237 if (status != NV_OK) 238 { 239 return status; 240 } 241 } 242 } 243 } 244 FOR_EACH_INDEX_IN_MASK_END; 245 } 246 else 247 { 248 NV_PRINTF(LEVEL_INFO, 249 "L2 supported. Skip topology discovery on GPU%d in RTD3/FGC6 exit\n", 250 pGpu->gpuInstance); 251 } 252 253 // 254 // Update the RM cache for the discovered connections and then activate 255 // those connections. This includes all the post-topology settings like 256 // buffer-ready and interrupt enables 257 // 258 status = _knvlinkActivateDiscoveredConns(pGpu, pKernelNvlink, bCheckDegradedMode); 259 if (status != NV_OK) 260 { 261 NV_PRINTF(LEVEL_INFO, 262 "Failed to activate the discovered connections on GPU%d\n", 263 pGpu->gpuInstance); 264 } 265 266 #endif 267 268 return status; 269 } 270 271 /*! 272 * @brief Train all the connected sysmem links associated with the device 273 * to active through the nvlink core library. 274 * 275 * @param[in] pGpu OBJGPU pointer 276 * @param[in] pKernelNvlink KernelNvlink pointer 277 * 278 * @return NV_OK on success 279 */ 280 NV_STATUS 281 knvlinkTrainSysmemLinksToActive_IMPL 282 ( 283 OBJGPU *pGpu, 284 KernelNvlink *pKernelNvlink 285 ) 286 { 287 #if defined(INCLUDE_NVLINK_LIB) 288 289 OBJSYS *pSys = SYS_GET_INSTANCE(); 290 NvU32 i; 291 292 // On Fmodel, sysmem link training is not supported 293 if (IS_FMODEL(pGpu)) 294 { 295 NV_PRINTF(LEVEL_INFO, 296 "Skipping unsupported sysmem link training on GPU%d\n", 297 pGpu->gpuInstance); 298 299 return NV_OK; 300 } 301 302 // Return if link training is force disabled through regkey 303 if (pKernelNvlink->bSkipLinkTraining) 304 { 305 NV_PRINTF(LEVEL_INFO, 306 "Skipping link training due to regkey on GPU%d\n", 307 pGpu->gpuInstance); 308 309 return NV_OK; 310 } 311 312 // If fabric is managed by FM, return 313 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 314 { 315 NV_PRINTF(LEVEL_INFO, 316 "Fabric is externally managed, skip link training\n"); 317 318 return NV_OK; 319 } 320 321 NV_PRINTF(LEVEL_INFO, "Training sysmem links for GPU%d\n", 322 pGpu->gpuInstance); 323 324 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink->enabledLinks) 325 { 326 if (pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bConnected && 327 ((pKernelNvlink->nvlinkLinks[i].remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_IBMNPU) || 328 (pKernelNvlink->nvlinkLinks[i].remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_TEGRASHIM) || 329 (pKernelNvlink->nvlinkLinks[i].remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_EBRIDGE))) 330 { 331 if (nvlink_lib_train_links_from_swcfg_to_active( 332 &pKernelNvlink->nvlinkLinks[i].core_link, 1, NVLINK_STATE_CHANGE_SYNC) 333 != NVL_SUCCESS) 334 { 335 nvErrorLog_va((void *)pGpu, NVLINK_ERROR, 336 "NVLink: failed to train link %d to remote PCI:%04x:%02x:%02x", 337 i, 338 pKernelNvlink->nvlinkLinks[i].remoteEndInfo.domain, 339 pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bus, 340 pKernelNvlink->nvlinkLinks[i].remoteEndInfo.device); 341 342 return NV_ERR_NOT_SUPPORTED; 343 } 344 } 345 } 346 FOR_EACH_INDEX_IN_MASK_END; 347 348 // 349 // After training links, we may have used up most of the available 4s 350 // timeout during GPU state load. As a WAR in lieu of improving the 351 // performance of link training SW, reset the timeout for now. 352 // 353 NV_PRINTF(LEVEL_INFO, "resetting timeout after link training\n"); 354 355 threadStateResetTimeout(pGpu); 356 357 #endif 358 359 return NV_OK; 360 } 361 362 /*! 363 * @brief Ensure links are trained and put into active. 364 * 365 * @param[in] pGpu0 OBJGPU pointer 366 * @param[in] pGpu1 OBJGPU pointer 367 * @param[in] pKernelNvlink0 KernelNvlink pointer 368 * 369 * @return NV_OK on success 370 */ 371 NV_STATUS 372 knvlinkCheckTrainingIsComplete_IMPL 373 ( 374 OBJGPU *pGpu0, 375 OBJGPU *pGpu1, 376 KernelNvlink *pKernelNvlink0 377 ) 378 { 379 NV_STATUS status = NV_OK; 380 381 #if defined(INCLUDE_NVLINK_LIB) 382 383 OBJSYS *pSys = SYS_GET_INSTANCE(); 384 NvU32 version = pKernelNvlink0->ipVerNvlink; 385 KernelNvlink *pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1); 386 NvU32 count = 0; 387 NvU32 i; 388 389 if (pKernelNvlink1 == NULL) 390 { 391 NV_PRINTF(LEVEL_ERROR, 392 "Input mask contains a GPU on which NVLink is disabled.\n"); 393 394 return NV_ERR_INVALID_ARGUMENT; 395 } 396 397 nvlink_link *pLinks[NVLINK_MAX_LINKS_SW] = { 0 }; 398 399 // Link training will be triggered from KMD in L2 exit path 400 if (knvlinkPoweredUpForD3_HAL(pGpu0, pKernelNvlink0)) 401 { 402 NV_PRINTF(LEVEL_INFO, 403 "Skip link training on GPU%d in RTD3/FGC6 exit. Links will train to " 404 "ACTIVE in L2 exit path\n", pGpu0->gpuInstance); 405 return NV_OK; 406 } 407 408 // Minion and SW training is by default disabled on RTL 409 if (IS_RTLSIM(pGpu0) && !pKernelNvlink0->bForceEnableCoreLibRtlsims) 410 { 411 return NV_OK; 412 } 413 414 // Return if link training is force disabled through regkey 415 if (pKernelNvlink0->bSkipLinkTraining) 416 { 417 NV_PRINTF(LEVEL_INFO, 418 "Skipping link training due to regkey on GPU%d\n", 419 pGpu0->gpuInstance); 420 return NV_OK; 421 } 422 423 // Return if forced config, since SW training is not supported 424 if (knvlinkIsForcedConfig(pGpu0, pKernelNvlink0)) 425 { 426 NV_PRINTF(LEVEL_INFO, "Skipping link due to forced configuration\n"); 427 return NV_OK; 428 } 429 430 // If fabric is managed by FM, return 431 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 432 { 433 NV_PRINTF(LEVEL_INFO, 434 "Fabric is externally managed, skip link training\n"); 435 return NV_OK; 436 } 437 438 // 439 // If ALI then ensure it has completed 440 // Else run through training for legacy nvlink versions 441 // 442 if (pKernelNvlink0->bEnableAli || pKernelNvlink1->bEnableAli) 443 { 444 // polling for train complete is only allowed for NvLink 4.0+ 445 NV_ASSERT(version >= NVLINK_VERSION_40); 446 447 // 448 // Check to make sure that the links for the first GPU have 449 // all completed training 450 // 451 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->postRxDetLinkMask) 452 { 453 pLinks[count] = pKernelNvlink0->nvlinkLinks[i].core_link; 454 count++; 455 } 456 FOR_EACH_INDEX_IN_MASK_END; 457 458 // If the return code is non-zero, links are still training 459 if (nvlink_lib_check_training_complete(pLinks, count) != 0) 460 { 461 NV_PRINTF(LEVEL_INFO, "Links aren't fully trained yet!\n"); 462 knvlinkLogAliDebugMessages(pGpu0, pKernelNvlink0); 463 return NV_ERR_GENERIC; 464 } 465 466 // 467 // For all links in the postRxDetLinkMask, get it's peer 468 // links information 469 // 470 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->postRxDetLinkMask) 471 { 472 NV2080_CTRL_NVLINK_UPDATE_REMOTE_LOCAL_SID_PARAMS params; 473 portMemSet(¶ms, 0, sizeof(params)); 474 475 params.linkId = i; 476 477 status = knvlinkExecGspRmRpc(pGpu0, pKernelNvlink0, 478 NV2080_CTRL_CMD_NVLINK_UPDATE_REMOTE_LOCAL_SID, 479 (void *)¶ms, sizeof(params)); 480 if (status != NV_OK) 481 { 482 NV_PRINTF(LEVEL_ERROR, "Error updating Local/Remote Sid Info!\n"); 483 return status; 484 } 485 486 pKernelNvlink0->nvlinkLinks[i].core_link->remoteSid = 487 params.remoteLocalSidInfo.remoteSid; 488 pKernelNvlink0->nvlinkLinks[i].core_link->remoteDeviceType = 489 params.remoteLocalSidInfo.remoteDeviceType; 490 pKernelNvlink0->nvlinkLinks[i].core_link->remoteLinkId = 491 params.remoteLocalSidInfo.remoteLinkId; 492 pKernelNvlink0->nvlinkLinks[i].core_link->localSid = 493 params.remoteLocalSidInfo.localSid; 494 } 495 FOR_EACH_INDEX_IN_MASK_END; 496 497 // Only enter if not in loopBack 498 if (pKernelNvlink0 != pKernelNvlink1) 499 { 500 // 501 // Check to make sure that the links for the second GPU have 502 // all completed training. Reset count for this GPU prior 503 // to querying for the links 504 // 505 count = 0; 506 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink1->postRxDetLinkMask) 507 { 508 pLinks[count] = pKernelNvlink1->nvlinkLinks[i].core_link; 509 count++; 510 } 511 FOR_EACH_INDEX_IN_MASK_END; 512 513 // If the return code is non-zero, links are still training 514 if (nvlink_lib_check_training_complete(pLinks, count) != 0) 515 { 516 NV_PRINTF(LEVEL_INFO, "Links aren't fully trained yet!\n"); 517 knvlinkLogAliDebugMessages(pGpu1, pKernelNvlink1); 518 return NV_ERR_GENERIC; 519 } 520 521 // 522 // For all links in the postRxDetLinkMask, get it's peer 523 // links information 524 // 525 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink1->postRxDetLinkMask) 526 { 527 NV2080_CTRL_NVLINK_UPDATE_REMOTE_LOCAL_SID_PARAMS params; 528 portMemSet(¶ms, 0, sizeof(params)); 529 530 params.linkId = i; 531 532 status = knvlinkExecGspRmRpc(pGpu1, pKernelNvlink1, 533 NV2080_CTRL_CMD_NVLINK_UPDATE_REMOTE_LOCAL_SID, 534 (void *)¶ms, sizeof(params)); 535 if (status != NV_OK) 536 { 537 NV_PRINTF(LEVEL_ERROR, "Error updating Local/Remote Sid Info!\n"); 538 return status; 539 } 540 541 pKernelNvlink1->nvlinkLinks[i].core_link->remoteSid = 542 params.remoteLocalSidInfo.remoteSid; 543 pKernelNvlink1->nvlinkLinks[i].core_link->remoteDeviceType = 544 params.remoteLocalSidInfo.remoteDeviceType; 545 pKernelNvlink1->nvlinkLinks[i].core_link->remoteLinkId = 546 params.remoteLocalSidInfo.remoteLinkId; 547 pKernelNvlink1->nvlinkLinks[i].core_link->localSid = 548 params.remoteLocalSidInfo.localSid; 549 } 550 FOR_EACH_INDEX_IN_MASK_END; 551 } 552 } 553 554 #endif 555 556 return status; 557 } 558 559 /*! 560 * @brief Train all the connected links between the two given devices 561 * to active through the nvlink core library. 562 * 563 * @param[in] pGpu0 OBJGPU pointer 564 * @param[in] pGpu1 OBJGPU pointer 565 * @param[in] pKernelNvlink0 KernelNvlink pointer 566 * 567 * @return NV_OK on success 568 */ 569 NV_STATUS 570 knvlinkTrainP2pLinksToActive_IMPL 571 ( 572 OBJGPU *pGpu0, 573 OBJGPU *pGpu1, 574 KernelNvlink *pKernelNvlink0 575 ) 576 { 577 NV_STATUS status = NV_OK; 578 579 #if defined(INCLUDE_NVLINK_LIB) 580 581 OBJSYS *pSys = SYS_GET_INSTANCE(); 582 KernelNvlink *pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1); 583 NvU32 version = pKernelNvlink0->ipVerNvlink; 584 NvBool bTrainLinks = NV_FALSE; 585 NvU32 count = 0; 586 NvU32 remoteLink; 587 NvU32 i; 588 589 nvlink_link *pLinks[NVLINK_MAX_LINKS_SW] = { 0 }; 590 591 // Link training will be triggered from KMD in L2 exit path 592 if (knvlinkPoweredUpForD3_HAL(pGpu0, pKernelNvlink0)) 593 { 594 NV_PRINTF(LEVEL_INFO, 595 "Skip link training on GPU%d in RTD3/FGC6 exit. Links will train to " 596 "ACTIVE in L2 exit path\n", pGpu0->gpuInstance); 597 598 return NV_OK; 599 } 600 601 // Minion and SW training is by default disabled on RTL 602 if (IS_RTLSIM(pGpu0) && !pKernelNvlink0->bForceEnableCoreLibRtlsims) 603 { 604 return NV_OK; 605 } 606 607 // Return if link training is force disabled through regkey 608 if (pKernelNvlink0->bSkipLinkTraining) 609 { 610 NV_PRINTF(LEVEL_INFO, 611 "Skipping link training due to regkey on GPU%d\n", 612 pGpu0->gpuInstance); 613 614 return NV_OK; 615 } 616 617 // Return if forced config, since SW training is not supported 618 if (knvlinkIsForcedConfig(pGpu0, pKernelNvlink0)) 619 { 620 NV_PRINTF(LEVEL_INFO, "Skipping link due to forced configuration\n"); 621 622 return NV_OK; 623 } 624 625 // If fabric is managed by FM, return 626 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 627 { 628 NV_PRINTF(LEVEL_INFO, 629 "Fabric is externally managed, skip link training\n"); 630 631 return NV_OK; 632 } 633 634 // 635 // Bug# 3601144: On Ampere+ systems, return if links are already initialized, 636 // since that implies links are already trained. 637 // 638 if (IsAMPEREorBetter(pGpu0)) 639 { 640 NvU32 localMask = 0; 641 NvU32 remoteMask = 0; 642 643 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->enabledLinks) 644 { 645 if (KNVLINK_IS_LINK_CONNECTED_TO_GPU(pKernelNvlink0, i, pGpu1)) 646 { 647 remoteLink = pKernelNvlink0->nvlinkLinks[i].remoteEndInfo.linkNumber; 648 649 localMask |= BIT(i); 650 remoteMask |= BIT(remoteLink); 651 } 652 } 653 FOR_EACH_INDEX_IN_MASK_END; 654 655 if (((pKernelNvlink0->initializedLinks & localMask) == localMask) && 656 ((pKernelNvlink1->initializedLinks & remoteMask) == remoteMask)) 657 { 658 NV_PRINTF(LEVEL_INFO, "P2P links are all trained already, return\n"); 659 return NV_OK; 660 } 661 } 662 663 // Get the link train status for the enabled link masks 664 NV2080_CTRL_NVLINK_ARE_LINKS_TRAINED_PARAMS linkTrainedParams; 665 666 portMemSet(&linkTrainedParams, 0, sizeof(linkTrainedParams)); 667 linkTrainedParams.linkMask = pKernelNvlink0->enabledLinks; 668 linkTrainedParams.bActiveOnly = NV_TRUE; 669 670 // Reset timeout to clear any accumulated timeouts from link init 671 if (IS_GSP_CLIENT(pGpu0)) 672 { 673 threadStateResetTimeout(pGpu0); 674 } 675 676 status = knvlinkExecGspRmRpc(pGpu0, pKernelNvlink0, 677 NV2080_CTRL_CMD_NVLINK_ARE_LINKS_TRAINED, 678 (void *)&linkTrainedParams, 679 sizeof(linkTrainedParams)); 680 if (status != NV_OK) 681 { 682 NV_PRINTF(LEVEL_ERROR, "Failed to get the link train status for links\n"); 683 return status; 684 } 685 686 // 687 // Bug# 3424466: Optimization - Return if all enabled links for this GPU are 688 // already trained. The core library makes several callbacks to check link 689 // state which results in numerous RPCs on GSP-RM platforms resulting in low 690 // perf on chips which have low link training latency and low links count. 691 // 692 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->enabledLinks) 693 { 694 if (!KNVLINK_IS_LINK_CONNECTED_TO_GPU(pKernelNvlink0, i, pGpu1)) 695 { 696 continue; 697 } 698 699 if (!linkTrainedParams.bIsLinkActive[i]) 700 { 701 bTrainLinks = NV_TRUE; 702 break; 703 } 704 } 705 FOR_EACH_INDEX_IN_MASK_END; 706 707 if (!bTrainLinks) 708 { 709 NV_PRINTF(LEVEL_INFO, "Enabled links are all trained already, return\n"); 710 return NV_OK; 711 } 712 713 // Train the mask of enabled links to ACTIVE state 714 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->enabledLinks) 715 { 716 if (!KNVLINK_IS_LINK_CONNECTED_TO_GPU(pKernelNvlink0, i, pGpu1)) 717 { 718 continue; 719 } 720 721 if (version >= NVLINK_VERSION_22) 722 { 723 // Capture links for parallel link training 724 pLinks[count] = pKernelNvlink0->nvlinkLinks[i].core_link; 725 count++; 726 } 727 else 728 { 729 // Invoke link training for NVLINK <= 2.0 730 (void)nvlink_lib_train_links_from_swcfg_to_active( 731 &pKernelNvlink0->nvlinkLinks[i].core_link, 1, NVLINK_STATE_CHANGE_SYNC); 732 } 733 } 734 FOR_EACH_INDEX_IN_MASK_END; 735 736 // Invoke link training for NVLINK >= 2.2 737 if (count > 0) 738 { 739 // 740 // nvlink_lib_train_links_from_swcfg_to_active with 741 // NVLINK_STATE_CHANGE_ASYNC flag invokes link training asynchronously, 742 // but the call itself is synchronous i.e. it will poll for link 743 // training to complete. 744 // 745 NV_ASSERT(version >= NVLINK_VERSION_22); 746 (void)nvlink_lib_train_links_from_swcfg_to_active( 747 pLinks, count, NVLINK_STATE_CHANGE_ASYNC); 748 } 749 750 // Get the link train status for the enabled link masks 751 portMemSet(&linkTrainedParams, 0, sizeof(linkTrainedParams)); 752 linkTrainedParams.linkMask = pKernelNvlink0->enabledLinks; 753 linkTrainedParams.bActiveOnly = NV_TRUE; 754 755 // Reset timeout to clear any accumulated timeouts from link init 756 if (IS_GSP_CLIENT(pGpu0)) 757 { 758 threadStateResetTimeout(pGpu0); 759 } 760 761 status = knvlinkExecGspRmRpc(pGpu0, pKernelNvlink0, 762 NV2080_CTRL_CMD_NVLINK_ARE_LINKS_TRAINED, 763 (void *)&linkTrainedParams, 764 sizeof(linkTrainedParams)); 765 if (status != NV_OK) 766 { 767 NV_PRINTF(LEVEL_ERROR, "Failed to get the link train status for links\n"); 768 return status; 769 } 770 771 // Check if the links are trained to "active" state. 772 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->enabledLinks) 773 { 774 if (!KNVLINK_IS_LINK_CONNECTED_TO_GPU(pKernelNvlink0, i, pGpu1)) 775 { 776 continue; 777 } 778 779 if (linkTrainedParams.bIsLinkActive[i]) 780 { 781 continue; 782 } 783 784 nvErrorLog_va((void *)pGpu0, NVLINK_ERROR, 785 "NVLink: Failed to train link %d to remote PCI:%04x:%02x:%02x", 786 i, 787 pKernelNvlink0->nvlinkLinks[i].remoteEndInfo.domain, 788 pKernelNvlink0->nvlinkLinks[i].remoteEndInfo.bus, 789 pKernelNvlink0->nvlinkLinks[i].remoteEndInfo.device); 790 791 status = NV_ERR_INVALID_STATE; 792 } 793 FOR_EACH_INDEX_IN_MASK_END; 794 795 #endif 796 797 return status; 798 } 799 800 /*! 801 * knvlinkTrainFabricLinksToActive_IMPL 802 * Setup NVLinks between 2 peers connected to switch. Train the links to 803 * High Speed. 804 * 805 * Note: Desired sequence to setup NvLink P2P is: 806 * 1. A client queries P2P capability among GPUs. 807 * 2. If the GPUs are P2P compatible, create NV50_P2P object which invokes 808 * link training. 809 * However, existing GPU<->GPU link training happens during step 1 through 810 * gpumgrGetP2PCaps - which gets called on RmInitAdapter and may lead to timeout 811 * based upon the time consumed by costly link training operations. 812 * 813 * For now, we are fixing this for nvswitch systems by adding this helper 814 * function which should just get invoked during NV50_P2P object creation. 815 * 816 * This issue needs to be fixed for non-nvswitch systems as well. Bug:200285708. 817 * Once the bug is fixed, knvlinkTrainFabricLinksToActive can be called from 818 * knvlinkTrainP2pLinksToActive. 819 * 820 * @param[in] pGpu OBJGPU pointer 821 * @param[in] pKernelNvlink KernelNvlink pointer 822 * 823 * @return NV_OK on success 824 */ 825 NV_STATUS 826 knvlinkTrainFabricLinksToActive_IMPL 827 ( 828 OBJGPU *pGpu, 829 KernelNvlink *pKernelNvlink 830 ) 831 { 832 #if defined(INCLUDE_NVLINK_LIB) 833 834 OBJSYS *pSys = SYS_GET_INSTANCE(); 835 NvU32 i; 836 837 // Minion and SW training is by default disabled on RTL 838 if (IS_RTLSIM(pGpu) && !pKernelNvlink->bForceEnableCoreLibRtlsims) 839 { 840 return NV_OK; 841 } 842 843 // Return if link training is force disabled through regkey 844 if (pKernelNvlink->bSkipLinkTraining) 845 { 846 NV_PRINTF(LEVEL_INFO, 847 "Skipping link training due to regkey on GPU%d\n", 848 pGpu->gpuInstance); 849 850 return NV_OK; 851 } 852 853 // If fabric is managed by FM, return 854 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 855 { 856 NV_PRINTF(LEVEL_INFO, 857 "Fabric is externally managed, skip link training\n"); 858 859 return NV_OK; 860 } 861 862 if (knvlinkIsForcedConfig(pGpu, pKernelNvlink)) 863 { 864 NV_PRINTF(LEVEL_INFO, 865 "Nvlink in Forced Config - skip link training.\n"); 866 867 return NV_OK; 868 } 869 870 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink->enabledLinks) 871 { 872 if ( pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bConnected && 873 (pKernelNvlink->nvlinkLinks[i].remoteEndInfo.deviceType == 874 NVLINK_DEVICE_TYPE_NVSWITCH)) 875 { 876 if (nvlink_lib_train_links_from_swcfg_to_active( 877 &pKernelNvlink->nvlinkLinks[i].core_link, 1, NVLINK_STATE_CHANGE_SYNC) 878 != NVL_SUCCESS) 879 { 880 nvErrorLog_va((void *)pGpu, NVLINK_ERROR, 881 "NVLink: failed to train link %d to remote PCI:%04x:%02x:%02x", 882 i, 883 pKernelNvlink->nvlinkLinks[i].remoteEndInfo.domain, 884 pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bus, 885 pKernelNvlink->nvlinkLinks[i].remoteEndInfo.device); 886 887 return NV_ERR_INVALID_STATE; 888 } 889 } 890 } 891 FOR_EACH_INDEX_IN_MASK_END; 892 893 #endif 894 895 return NV_OK; 896 } 897 898 /*! 899 * @brief Transition/Wakeup the links into/from sleep (L2) state 900 * 901 * @param[in] pGpu OBJGPU pointer 902 * @param[in] pKernelNvlink KernelNvlink pointer 903 * @param[in] linkMask Mask of links 904 * @param[in] bEntry Enter/Exit sleep (L2) 905 * 906 * @return NV_OK on success 907 */ 908 NV_STATUS 909 knvlinkEnterExitSleep_IMPL 910 ( 911 OBJGPU *pGpu, 912 KernelNvlink *pKernelNvlink, 913 NvU32 linkMask, 914 NvBool bEntry 915 ) 916 { 917 #if defined(INCLUDE_NVLINK_LIB) 918 919 OBJSYS *pSys = SYS_GET_INSTANCE(); 920 NvU32 linkId; 921 922 // NVLink L2 as a feature should be enabled 923 if (!pKernelNvlink->getProperty(pKernelNvlink, 924 PDB_PROP_KNVLINK_L2_POWER_STATE_ENABLED)) 925 { 926 NV_PRINTF(LEVEL_ERROR, "NVLink L2 is not supported. Returning\n"); 927 928 return NV_ERR_NOT_SUPPORTED; 929 } 930 931 // Return error if NVLink fabric is managed by FM 932 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 933 { 934 NV_PRINTF(LEVEL_ERROR, 935 "Skipping L2 entry/exit since fabric is externally managed\n"); 936 937 return NV_ERR_NOT_SUPPORTED; 938 } 939 940 // Check if all the links in the mask are connected 941 FOR_EACH_INDEX_IN_MASK(32, linkId, linkMask) 942 { 943 if (!pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected) 944 { 945 NV_PRINTF(LEVEL_ERROR, 946 "GPU%d: Link%d is not connected. Returning\n", 947 pGpu->gpuInstance, linkId); 948 949 return NV_ERR_NOT_SUPPORTED; 950 } 951 } 952 FOR_EACH_INDEX_IN_MASK_END; 953 954 // Links that share a PLL must enter/exit L2 together 955 FOR_EACH_INDEX_IN_MASK(32, linkId, linkMask) 956 { 957 // If the link is a PLL master, consider the slave link 958 if (pKernelNvlink->nvlinkLinks[linkId].pllMasterLinkId == linkId) 959 { 960 // If the slave link exists and is not init-disabled, it should be included 961 if ( (pKernelNvlink->nvlinkLinks[linkId].pllSlaveLinkId != NVLINK_MAX_LINKS_SW) && 962 (NVBIT(pKernelNvlink->nvlinkLinks[linkId].pllSlaveLinkId) & pKernelNvlink->enabledLinks) && 963 !(NVBIT(pKernelNvlink->nvlinkLinks[linkId].pllSlaveLinkId) & linkMask) ) 964 { 965 NV_PRINTF(LEVEL_ERROR, 966 "GPU%d: Links sharing PLL should enter/exit L2 together. Returning\n", 967 pGpu->gpuInstance); 968 969 return NV_ERR_NOT_SUPPORTED; 970 } 971 } 972 else 973 { 974 // For a slave link, its PLL master should be included if not init-disabled 975 if ( (NVBIT(pKernelNvlink->nvlinkLinks[linkId].pllMasterLinkId) & pKernelNvlink->enabledLinks) && 976 !(NVBIT(pKernelNvlink->nvlinkLinks[linkId].pllMasterLinkId) & linkMask) ) 977 { 978 NV_PRINTF(LEVEL_ERROR, 979 "GPU%d: Links sharing PLL should enter/exit L2 together. Returning\n", 980 pGpu->gpuInstance); 981 982 return NV_ERR_NOT_SUPPORTED; 983 } 984 } 985 } 986 FOR_EACH_INDEX_IN_MASK_END; 987 988 // Device must be registered in the nvlink core library 989 if (!pKernelNvlink->pNvlinkDev) 990 { 991 NV_PRINTF(LEVEL_ERROR, 992 "GPU%d: not registered in core lib. Returning\n", 993 pGpu->gpuInstance); 994 995 return NV_ERR_NOT_SUPPORTED; 996 } 997 998 if (bEntry) 999 { 1000 // Remove the peer mapping in HSHUB and transition links to sleep (L2) 1001 return _knvlinkEnterSleep(pGpu, pKernelNvlink, linkMask); 1002 } 1003 else 1004 { 1005 // Wakeup the links from sleep (L2) and setup the peer mapping in HSHUB 1006 return _knvlinkExitSleep(pGpu, pKernelNvlink, linkMask); 1007 } 1008 #endif 1009 1010 return NV_OK; 1011 } 1012 1013 /*! 1014 * @brief Shutdown all the connected links associated with the device 1015 * through the nvlink core library. 1016 * 1017 * @param[in] pGpu OBJGPU pointer 1018 * @param[in] pKernelNvlink KernelNvlink pointer 1019 * 1020 * @return NV_OK on success 1021 */ 1022 NV_STATUS 1023 knvlinkCoreShutdownDeviceLinks_IMPL 1024 ( 1025 OBJGPU *pGpu, 1026 KernelNvlink *pKernelNvlink, 1027 NvBool bForceShutdown 1028 ) 1029 { 1030 #if defined(INCLUDE_NVLINK_LIB) 1031 1032 nvlink_link *pLinks[NVLINK_MAX_LINKS_SW] = {0}; 1033 OBJSYS *pSys = SYS_GET_INSTANCE(); 1034 NvU32 count = 0; 1035 NvU32 linkId; 1036 1037 // Skip link shutdown where fabric manager is present, for nvlink version bellow 4.0 1038 if ((pKernelNvlink->ipVerNvlink < NVLINK_VERSION_40 && 1039 pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) || 1040 (pKernelNvlink->pNvlinkDev == NULL)) 1041 { 1042 NV_PRINTF(LEVEL_INFO, 1043 "core lib device is either externally managed or not present, skipping\n"); 1044 1045 return NV_OK; 1046 } 1047 1048 // return early if there are no enabled links 1049 if (pKernelNvlink->enabledLinks == 0) 1050 { 1051 NV_PRINTF(LEVEL_INFO, "No links to shutdown for the GPU%d\n", 1052 pGpu->gpuInstance); 1053 1054 return NV_OK; 1055 } 1056 1057 if (!bForceShutdown && pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_MINION_GFW_BOOT)) 1058 { 1059 NV_PRINTF(LEVEL_INFO, 1060 "GFW boot is enabled. Link shutdown is not required, skipping\n"); 1061 1062 return NV_OK; 1063 } 1064 1065 FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks) 1066 { 1067 // Capture the links for lane shutdown through core lib if supported 1068 if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED)) 1069 { 1070 // Skip GPU in reset 1071 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType == 1072 NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_GPU) 1073 { 1074 OBJGPU* pRemoteGpu = gpumgrGetGpuFromBusInfo( 1075 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain, 1076 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus, 1077 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device); 1078 if (API_GPU_IN_RESET_SANITY_CHECK(pRemoteGpu)) 1079 { 1080 continue; 1081 } 1082 } 1083 pLinks[count] = pKernelNvlink->nvlinkLinks[linkId].core_link; 1084 count++; 1085 } 1086 else 1087 { 1088 nvlink_lib_powerdown_links_from_active_to_swcfg( 1089 &pKernelNvlink->nvlinkLinks[linkId].core_link, 1090 1, NVLINK_STATE_CHANGE_SYNC); 1091 } 1092 } 1093 FOR_EACH_INDEX_IN_MASK_END; 1094 1095 // Trigger laneshutdown through core lib if shutdown is supported 1096 if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED) && (count > 0)) 1097 { 1098 if (nvlink_lib_powerdown_links_from_active_to_off( 1099 pLinks, count, NVLINK_STATE_CHANGE_SYNC)) 1100 { 1101 NV_PRINTF(LEVEL_ERROR, "Unable to turn off links for the GPU%d\n", 1102 pGpu->gpuInstance); 1103 1104 return NV_ERR_INVALID_STATE; 1105 } 1106 } 1107 1108 #endif 1109 1110 return NV_OK; 1111 } 1112 1113 /*! 1114 * @brief Reset all the connected links associated with the device 1115 * through the nvlink core library. 1116 * 1117 * @param[in] pGpu OBJGPU pointer 1118 * @param[in] pKernelNvlink KernelNvlink pointer 1119 * 1120 * @return NV_OK on success 1121 */ 1122 NV_STATUS 1123 knvlinkCoreResetDeviceLinks_IMPL 1124 ( 1125 OBJGPU *pGpu, 1126 KernelNvlink *pKernelNvlink 1127 ) 1128 { 1129 #if defined(INCLUDE_NVLINK_LIB) 1130 1131 nvlink_link *pLinks[NVLINK_MAX_LINKS_SW] = {0}; 1132 OBJSYS *pSys = SYS_GET_INSTANCE(); 1133 NvU32 count = 0; 1134 NvU32 linkId; 1135 1136 // Skip link reset where fabric manager is present, for nvlink version bellow 4.0 1137 if ((pKernelNvlink->ipVerNvlink < NVLINK_VERSION_40 && 1138 pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) || 1139 (pKernelNvlink->pNvlinkDev == NULL)) 1140 { 1141 NV_PRINTF(LEVEL_INFO, 1142 "core lib device is either externally managed or not present, skipping\n"); 1143 1144 return NV_OK; 1145 } 1146 1147 // return early if there are no enabled links 1148 if (pKernelNvlink->enabledLinks == 0) 1149 { 1150 NV_PRINTF(LEVEL_INFO, "No links to reset for the GPU%d\n", 1151 pGpu->gpuInstance); 1152 1153 return NV_OK; 1154 } 1155 1156 // We only perform the link reset if lane shutdown is enabled 1157 if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED)) 1158 { 1159 FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks) 1160 { 1161 // Skip GPU in reset 1162 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType == 1163 NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_GPU) 1164 { 1165 OBJGPU* pRemoteGpu = gpumgrGetGpuFromBusInfo( 1166 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain, 1167 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus, 1168 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device); 1169 if (API_GPU_IN_RESET_SANITY_CHECK(pRemoteGpu)) 1170 { 1171 continue; 1172 } 1173 } 1174 pLinks[count] = pKernelNvlink->nvlinkLinks[linkId].core_link; 1175 count++; 1176 } 1177 FOR_EACH_INDEX_IN_MASK_END; 1178 1179 if (nvlink_lib_reset_links(pLinks, count, NVLINK_STATE_CHANGE_SYNC) && (count > 0)) 1180 { 1181 NV_PRINTF(LEVEL_ERROR, "Unable to reset link(s) for GPU%d\n", 1182 pGpu->gpuInstance); 1183 1184 return NV_ERR_INVALID_STATE; 1185 } 1186 } 1187 else 1188 { 1189 NV_PRINTF(LEVEL_INFO, 1190 "Lane shutdown not enabled, skipping link(s) reset for GPU%d\n", 1191 pGpu->gpuInstance); 1192 1193 return NV_ERR_INVALID_STATE; 1194 } 1195 1196 #endif 1197 1198 return NV_OK; 1199 } 1200 1201 /*! 1202 * @brief Retrain a link from either safe mode or off. 1203 * 1204 * @param[in] pGpu OBJGPU pointer 1205 * @param[in] pKernelNvlink KernelNvlink pointer 1206 * @param[in] linkId Link ID of the link in question 1207 * @param[in] bFromOff Whether link should be retrained from SAFE/OFF 1208 * 1209 * @returns NV_OK if link retraining was successful 1210 */ 1211 NV_STATUS 1212 knvlinkRetrainLink_IMPL 1213 ( 1214 OBJGPU *pGpu, 1215 KernelNvlink *pKernelNvlink, 1216 NvU32 linkId, 1217 NvBool bFromOff 1218 ) 1219 { 1220 NV_STATUS status = NV_OK; 1221 1222 // If NVLINK_LIB isn't enabled, we just execute prologue and return. 1223 _knvlinkRetrainLinkPrologue(pGpu, pKernelNvlink, linkId); 1224 1225 OBJSYS *pSys = SYS_GET_INSTANCE(); 1226 1227 // If fabric is managed by FM 1228 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 1229 { 1230 #if defined(INCLUDE_NVLINK_LIB) 1231 1232 // 1233 // Notify FM for link re-training. 1234 // 1235 // Note, at this point all DL interrupts should be disabled. The interrupts 1236 // will be enabled through nvlinkCoreReenableLinkInterruptsCallback only if 1237 // links can be successfully re-trained. 1238 // 1239 // It is responsibility of FM to highlight link re-training failures to the 1240 // system admin. Hence, we shouldn't be logging Xid in this case. 1241 // 1242 // It is worth to note that, there is no race in between interrupt 1243 // enable/disable register update as we notify FM only after disabling 1244 // interrupts. 1245 // 1246 gpuNotifySubDeviceEvent(pGpu, 1247 NV2080_NOTIFIERS_NVLINK_ERROR_RECOVERY_REQUIRED, 1248 NULL, 0, 0, (NvV16)NV2080_CTRL_NVLINK_UNIT_DL); 1249 1250 return NV_OK; 1251 #endif 1252 } 1253 1254 #if defined(INCLUDE_NVLINK_LIB) 1255 // 1256 // If this is a slave endpoint requesting the retrain, kick off a request 1257 // to the master instead. There is no need to (and indeed, we should not) 1258 // hold the master endpoint lock here. 1259 // 1260 if (!pKernelNvlink->nvlinkLinks[linkId].core_link->master) 1261 { 1262 nvlink_link_change *link_change; 1263 nvlink_link *slave, *master; 1264 1265 slave = pKernelNvlink->nvlinkLinks[linkId].core_link; 1266 if (nvlink_lib_get_link_master(slave, &master) != NVL_SUCCESS) 1267 { 1268 NV_PRINTF(LEVEL_ERROR, 1269 "link master could not be found from GPU%u link %u\n", 1270 gpuGetInstance(pGpu), linkId); 1271 1272 return NV_ERR_INVALID_STATE; 1273 } 1274 1275 NV_ASSERT_OR_RETURN(master != slave, NV_ERR_INVALID_STATE); 1276 1277 link_change = &slave->link_change; 1278 link_change->slave = slave; 1279 link_change->master = master; 1280 link_change->change_type = bFromOff ? nvlink_retrain_from_off : 1281 nvlink_retrain_from_safe; 1282 1283 if (master->link_handlers->queue_link_change(link_change) != NVL_SUCCESS) 1284 { 1285 return NV_ERR_GENERIC; 1286 } 1287 1288 // 1289 // Because the link retrain request to the master is asynchronous, 1290 // tell the caller they'll need to wait. 1291 // 1292 return NV_WARN_MORE_PROCESSING_REQUIRED; 1293 } 1294 #endif 1295 1296 if (bFromOff) 1297 { 1298 status = knvlinkRetrainLinkFromOff(pGpu, pKernelNvlink, linkId); 1299 } 1300 else 1301 { 1302 status = knvlinkRetrainLinkFromSafe(pGpu, pKernelNvlink, linkId); 1303 } 1304 1305 return status; 1306 } 1307 1308 /*! 1309 * @brief Floorsweep the nvlink config for the chip 1310 * 1311 * @param[in] pGpu OBJGPU pointer 1312 * @param[in] pKernelNvlink KernelNvlink pointer 1313 * @param[in] numLinksPerIp number of total links found in discovery 1314 * @param[out] pNumLinkActive number of links needed to be active 1315 * 1316 * @returns On success, sets unique fabric address and returns NV_OK. 1317 * On failure, returns NV_ERR_XXX. 1318 */ 1319 NV_STATUS 1320 knvlinkFloorSweep_IMPL 1321 ( 1322 OBJGPU *pGpu, 1323 KernelNvlink *pKernelNvlink, 1324 NvU32 numLinksPerIoctrl, 1325 NvU32 *pNumActiveLinksPerIoctrl 1326 ) 1327 { 1328 1329 #if defined(INCLUDE_NVLINK_LIB) 1330 NV_STATUS status = NV_OK; 1331 NvU32 linkId; 1332 NvU32 tmpDisabledLinkMask = 0; 1333 NvU32 tmpEnabledLinkMask = 0; 1334 nvlink_conn_info conn_info; 1335 1336 *pNumActiveLinksPerIoctrl = knvlinkGetNumActiveLinksPerIoctrl(pGpu, pKernelNvlink); 1337 if (!knvlinkIsFloorSweepingNeeded_HAL(pGpu, pKernelNvlink, *pNumActiveLinksPerIoctrl, numLinksPerIoctrl)) 1338 { 1339 return NV_OK; 1340 } 1341 1342 // The path here is important not getting the connection info 1343 FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks) 1344 { 1345 nvlink_lib_discover_and_get_remote_conn_info( 1346 pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, 0); 1347 } 1348 FOR_EACH_INDEX_IN_MASK_END; 1349 1350 // 1351 // This call must be before the floorswept to cache the NVLink bridge 1352 // information in physical RM. 1353 // 1354 knvlinkDirectConnectCheck_HAL(pGpu, pKernelNvlink); 1355 1356 // floorsweeping in corelib will update connection info that RM qill query below 1357 (void)nvlink_lib_powerdown_floorswept_links_to_off(pKernelNvlink->pNvlinkDev); 1358 1359 // 1360 // If a link in the enabledLinkMask is not trained after floorsweeping then 1361 // then add it to a tmp disabled linkMask 1362 // 1363 1364 // Get the link train status for the enabled link masks 1365 NV2080_CTRL_NVLINK_ARE_LINKS_TRAINED_PARAMS linkTrainedParams; 1366 1367 portMemSet(&linkTrainedParams, 0, sizeof(linkTrainedParams)); 1368 linkTrainedParams.linkMask = pKernelNvlink->enabledLinks; 1369 linkTrainedParams.bActiveOnly = NV_TRUE; 1370 1371 // Reset timeout to clear any accumulated timeouts from link init 1372 if (IS_GSP_CLIENT(pGpu)) 1373 { 1374 threadStateResetTimeout(pGpu); 1375 } 1376 1377 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1378 NV2080_CTRL_CMD_NVLINK_ARE_LINKS_TRAINED, 1379 (void *)&linkTrainedParams, 1380 sizeof(linkTrainedParams)); 1381 if (status != NV_OK) 1382 { 1383 NV_PRINTF(LEVEL_ERROR, "Failed to get the link train status for links\n"); 1384 return status; 1385 } 1386 1387 // 1388 // Create a temporary mask of all links that are now enabled: 1389 // classified as a link in active 1390 // 1391 FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks) 1392 { 1393 if (linkTrainedParams.bIsLinkActive[linkId]) 1394 { 1395 tmpEnabledLinkMask |= BIT(linkId); 1396 } 1397 else 1398 { 1399 tmpDisabledLinkMask |= BIT(linkId); 1400 } 1401 } 1402 FOR_EACH_INDEX_IN_MASK_END; 1403 1404 // Redo linkMasks based on the search above being the ground truth 1405 pKernelNvlink->enabledLinks = tmpEnabledLinkMask; 1406 1407 // 1408 // remove any links not in active in the tmpEnabledLinkMask from all 1409 // other link masks as these have been floorswept by the corelib 1410 // 1411 pKernelNvlink->disconnectedLinkMask = tmpEnabledLinkMask; 1412 pKernelNvlink->initDisabledLinksMask = tmpDisabledLinkMask; 1413 1414 1415 status = knvlinkProcessInitDisabledLinks(pGpu, pKernelNvlink); 1416 if (status != NV_OK) 1417 { 1418 NV_ASSERT(status == NV_OK); 1419 return status; 1420 } 1421 1422 // Re-sync the link masks with GSP 1423 status = knvlinkSyncLinkMasksAndVbiosInfo(pGpu, pKernelNvlink); 1424 if (status != NV_OK) 1425 { 1426 NV_ASSERT(status == NV_OK); 1427 return status; 1428 } 1429 1430 // 1431 // Assert that the number of links in active is always less then 1432 // or equal to the number of active links on the chips 1433 // 1434 if(!(nvPopCount32(tmpEnabledLinkMask) <= *pNumActiveLinksPerIoctrl * nvPopCount32(pKernelNvlink->ioctrlMask))) 1435 { 1436 NV_PRINTF(LEVEL_INFO, 1437 "Floorsweeping didn't work! enabledMaskCount: 0x%x and numActiveLinksTotal: 0x%x. Current link info cached in SW: discoveredLinks: 0x%x; enabledLinks:0x%x; disconnectedLinks:0x%x; initDisabledLinksMask:0x%x\n", 1438 nvPopCount32(tmpEnabledLinkMask), *pNumActiveLinksPerIoctrl * nvPopCount32(pKernelNvlink->ioctrlMask), pKernelNvlink->discoveredLinks, pKernelNvlink->enabledLinks, pKernelNvlink->disconnectedLinkMask, pKernelNvlink->initDisabledLinksMask); 1439 1440 return NV_ERR_NOT_READY; 1441 } 1442 1443 pKernelNvlink->bFloorSwept = NV_TRUE; 1444 #endif //INCLUDE_NVLINK_LIB 1445 return NV_OK; 1446 } 1447 1448 /*! 1449 * @brief Retrain the link from OFF state 1450 * 1451 * @param[in] pGpu OBJGPU pointer 1452 * @param[in] pKernelNvlink KernelNvlink pointer 1453 * @param[in] linkId Link ID of the link in question 1454 * 1455 * @returns NV_OK if link retraining was successful 1456 */ 1457 NV_STATUS 1458 knvlinkRetrainLinkFromOff 1459 ( 1460 OBJGPU *pGpu, 1461 KernelNvlink *pKernelNvlink, 1462 NvU32 linkId 1463 ) 1464 { 1465 1466 return NV_OK; 1467 } 1468 1469 /*! 1470 * @brief Retrain the link from SAFE state 1471 * 1472 * @param[in] pGpu OBJGPU pointer 1473 * @param[in] pKernelNvlink KernelNvlink pointer 1474 * @param[in] linkId Link ID of the link in question 1475 * 1476 * @returns NV_OK if link retraining was successful 1477 */ 1478 NV_STATUS 1479 knvlinkRetrainLinkFromSafe 1480 ( 1481 OBJGPU *pGpu, 1482 KernelNvlink *pKernelNvlink, 1483 NvU32 linkId 1484 ) 1485 { 1486 1487 return NV_OK; 1488 } 1489 1490 /*! 1491 * @brief _knvlinkRetrainLinkPrologue currently disables DL interrupts 1492 * 1493 * @param[in] pGpu OBJGPU pointer 1494 * @param[in] pKernelNvlink KernelNvlink pointer 1495 * @param[in] linkId Link ID of the link in question 1496 */ 1497 static void 1498 _knvlinkRetrainLinkPrologue 1499 ( 1500 OBJGPU *pGpu, 1501 KernelNvlink *pKernelNvlink, 1502 NvU32 linkId 1503 ) 1504 { 1505 1506 return; 1507 } 1508 1509 #if defined(INCLUDE_NVLINK_LIB) 1510 1511 /*! 1512 * @brief Activate the connections discovered in topology discovery 1513 * 1514 * @param[in] pGpu OBJGPU pointer 1515 * @param[in] pKernelNvlink KernelNvlink pointer 1516 * @param[in] bCheckDegradedMode Whether to check for degraded mode 1517 * 1518 * @return NV_OK on success 1519 */ 1520 static NV_STATUS 1521 _knvlinkActivateDiscoveredConns 1522 ( 1523 OBJGPU *pGpu, 1524 KernelNvlink *pKernelNvlink, 1525 NvBool bCheckDegradedMode 1526 ) 1527 { 1528 NvU32 initDisconnectedLinkMask = pKernelNvlink->disconnectedLinkMask; 1529 NvU32 switchLinkMasks = 0; 1530 NvBool bPeerUpdated = NV_FALSE; 1531 NV_STATUS status = NV_OK; 1532 NvU32 linkId; 1533 1534 // 1535 // Degraded Mode on LR10+ systems. Check for degraded mode if this was not done before 1536 // and if new connections were discovered from the core library. 1537 // 1538 if (bCheckDegradedMode) 1539 { 1540 status = knvlinkApplyNvswitchDegradedModeSettings_HAL(pGpu, pKernelNvlink, 1541 &switchLinkMasks); 1542 } 1543 1544 // We only need to look at links that are considered disconnected 1545 FOR_EACH_INDEX_IN_MASK(32, linkId, initDisconnectedLinkMask) 1546 { 1547 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected) 1548 { 1549 // This link is now marked connected 1550 pKernelNvlink->disconnectedLinkMask &= ~NVBIT(linkId); 1551 1552 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType 1553 == NVLINK_DEVICE_TYPE_GPU) 1554 { 1555 bPeerUpdated = NV_TRUE; 1556 1557 // 1558 // Activate the p2p link. This includes copying the remote device 1559 // information for the remote link and enabling the post topology 1560 // steps on both the ends of the link. 1561 // 1562 // NOTE: HSHUB will nott be setup for the discovered peer link here 1563 // and will only be configured when a P2P object is created 1564 // 1565 status = _knvlinkActivateDiscoveredP2pConn(pGpu, pKernelNvlink, linkId); 1566 } 1567 else if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType 1568 == NVLINK_DEVICE_TYPE_NVSWITCH) 1569 { 1570 status = _knvlinkActivateDiscoveredSwitchConn(pGpu, pKernelNvlink, linkId); 1571 1572 // 1573 // There is no need to mark link as a master. On NVSwitch systems, 1574 // External Fabric Management should be enabled by default. 1575 // 1576 switchLinkMasks |= NVBIT(linkId); 1577 } 1578 else 1579 { 1580 // 1581 // Activate the sysmem link. This includes even training the link to 1582 // ACTIVE, since for sysmem link post-topology steps should be setup 1583 // only after ACTIVE 1584 // 1585 status = _knvlinkActivateDiscoveredSysmemConn(pGpu, pKernelNvlink, linkId); 1586 } 1587 1588 // If any of the above failed, return failure 1589 if (status != NV_OK) 1590 { 1591 NV_PRINTF(LEVEL_ERROR, 1592 "Failed to activate link%d on GPU%d!!!\n", linkId, 1593 pGpu->gpuInstance); 1594 1595 return status; 1596 } 1597 } 1598 } 1599 FOR_EACH_INDEX_IN_MASK_END; 1600 1601 #if defined(NVCPU_PPC64LE) || defined(NVCPU_AARCH64) 1602 if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_SYSMEM_SUPPORT_ENABLED)) 1603 { 1604 // Credits should be released after Active for sysmem 1605 knvlinkEnableLinksPostTopology_HAL(pGpu, pKernelNvlink, pKernelNvlink->enabledLinks); 1606 if (status != NV_OK) 1607 { 1608 return status; 1609 } 1610 1611 // Enable SYSMEM links in HSHUB. On P9 this must happen after Active 1612 knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink); 1613 } 1614 #endif 1615 1616 // If any new connection was discovered in this call 1617 if (initDisconnectedLinkMask != pKernelNvlink->disconnectedLinkMask) 1618 { 1619 if (pKernelNvlink->disconnectedLinkMask == pKernelNvlink->enabledLinks) //GPU degraded case 1620 { 1621 bPeerUpdated |= _knvlinkUpdateSwitchLinkMasksGpuDegraded(pGpu, pKernelNvlink); 1622 } 1623 else // other cases 1624 { 1625 bPeerUpdated |= _knvlinkUpdateSwitchLinkMasks(pGpu, pKernelNvlink, 1626 switchLinkMasks); 1627 } 1628 1629 _knvlinkPrintTopologySummary(pGpu, pKernelNvlink); 1630 1631 // 1632 // Make sure we update the CE mappings for this GPU, if the known set 1633 // of peers has changed. 1634 // 1635 knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink); 1636 if (bPeerUpdated) 1637 { 1638 // 1639 // Request that any peers updated also update their CE mappings, 1640 // since they now have a new peer. 1641 // 1642 _knvlinkUpdatePeerConfigs(pGpu, pKernelNvlink); 1643 } 1644 } 1645 1646 return status; 1647 } 1648 1649 /*! 1650 * @brief Activate the given P2P connection 1651 * This function updates the RM state for the discovered P2P connection 1652 * and enables post-topology steps on both ends of the connection. But, 1653 * it does not configure HSHUB on any end of the connection. HSHUB will 1654 * be configured only when a P2P object is created 1655 * 1656 * @param[in] pGpu OBJGPU pointer 1657 * @param[in] pKernelNvlink KernelNvlink pointer 1658 * @param[in] linkId Link ID 1659 * 1660 * @return NV_OK on success 1661 */ 1662 static NV_STATUS 1663 _knvlinkActivateDiscoveredP2pConn 1664 ( 1665 OBJGPU *pGpu, 1666 KernelNvlink *pKernelNvlink, 1667 NvU32 linkId 1668 ) 1669 { 1670 OBJGPU *pGpu0 = pGpu; 1671 OBJGPU *pGpu1 = NULL; 1672 KernelNvlink *pKernelNvlink0 = GPU_GET_KERNEL_NVLINK(pGpu0); 1673 NV_STATUS status = NV_OK; 1674 NvBool bUpdateConnStatus = NV_FALSE; 1675 NvU32 remoteLinkId; 1676 NvU32 gpuInst; 1677 1678 // Get the remote OBJGPU and Nvlink 1679 for (gpuInst = 0; gpuInst < NV_MAX_DEVICES; gpuInst++) 1680 { 1681 pGpu1 = gpumgrGetGpu(gpuInst); 1682 1683 if (pGpu1 && 1684 // Just rely on PCIe DBDF values for detecting the remote 1685 (pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.domain == gpuGetDomain(pGpu1)) && 1686 (pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.bus == gpuGetBus(pGpu1)) && 1687 (pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.device == gpuGetDevice(pGpu1)) && 1688 (pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.function == 0)) 1689 { 1690 KernelNvlink *pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1); 1691 1692 // Map the remote GPU's instance number to the associated links on this GPU. 1693 status = knvlinkSetLinkMaskToPeer(pGpu0, pKernelNvlink0, pGpu1, 1694 (pKernelNvlink0->peerLinkMasks[gpuInst] | NVBIT(linkId))); 1695 if (status != NV_OK) 1696 return status; 1697 1698 // 1699 // Post Topology enable on the local end of the link. 1700 // Needs to happen before HSHUB is setup for this link on any end. 1701 // 1702 status = knvlinkEnableLinksPostTopology_HAL(pGpu0, pKernelNvlink0, NVBIT(linkId)); 1703 if (status != NV_OK) 1704 { 1705 return status; 1706 } 1707 1708 // Set the remote device information for the remote device 1709 if (pKernelNvlink1) 1710 { 1711 remoteLinkId = pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.linkNumber; 1712 1713 // RPC into GSP-RM to update the link remote connection status only if its required 1714 if (pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.bConnected == NV_FALSE) 1715 bUpdateConnStatus = NV_TRUE; 1716 1717 // Set the PCI information for remote end 1718 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.bConnected = NV_TRUE; 1719 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.domain = pKernelNvlink0->pNvlinkDev->pciInfo.domain; 1720 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.bus = pKernelNvlink0->pNvlinkDev->pciInfo.bus; 1721 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.device = pKernelNvlink0->pNvlinkDev->pciInfo.device; 1722 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.function = pKernelNvlink0->pNvlinkDev->pciInfo.function; 1723 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.pciDeviceId = pKernelNvlink0->pNvlinkDev->pciInfo.pciDeviceId; 1724 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.deviceType = pKernelNvlink0->pNvlinkDev->type; 1725 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.chipSid = pKernelNvlink0->nvlinkLinks[linkId].core_link->localSid; 1726 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.linkNumber = linkId; 1727 1728 // Update the DLPL revision in the connection information 1729 pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.ipVerDlPl = pKernelNvlink1->nvlinkLinks[remoteLinkId].ipVerDlPl; 1730 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.ipVerDlPl = pKernelNvlink0->nvlinkLinks[linkId].ipVerDlPl; 1731 1732 if (bUpdateConnStatus) 1733 { 1734 // RPC into GSP-RM to update the link remote connection status for pGpu1 for the given link 1735 status = knvlinkUpdateLinkConnectionStatus(pGpu1, pKernelNvlink1, remoteLinkId); 1736 if (status != NV_OK) 1737 { 1738 return status; 1739 } 1740 } 1741 1742 pKernelNvlink1->disconnectedLinkMask &= ~NVBIT(remoteLinkId); 1743 1744 // Map this GPU's instance number to the associated link on the remote end. 1745 status = knvlinkSetLinkMaskToPeer(pGpu1, pKernelNvlink1, pGpu0, 1746 (pKernelNvlink1->peerLinkMasks[gpuGetInstance(pGpu0)] | NVBIT(remoteLinkId))); 1747 if (status != NV_OK) 1748 return status; 1749 1750 // 1751 // Post Topology enable on the remote end of the link. 1752 // Needs to happen before HSHUB is setup for this link on any end. 1753 // 1754 status = knvlinkEnableLinksPostTopology_HAL(pGpu1, pKernelNvlink1, NVBIT(remoteLinkId)); 1755 if (status != NV_OK) 1756 { 1757 return status; 1758 } 1759 1760 // Set the deviceUUID 1761 portMemCopy(pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.devUuid, 1762 NV_UUID_LEN, 1763 pGpu0->gpuUuid.uuid, 1764 NV_UUID_LEN); 1765 1766 // 1767 // The master of a GPU <-> GPU link depends on instance number. This is so that when locking 1768 // (which requires the master to be locked before the slave), the lower GPU instance number 1769 // will always be locked first, which is how rmGpuLocksAcquire acquires them. For loopback, 1770 // fall back to link ID instead. 1771 // 1772 if ((gpuGetInstance(pGpu0) < gpuGetInstance(pGpu1)) || 1773 ((gpuGetInstance(pGpu0) == gpuGetInstance(pGpu1)) && 1774 (linkId < remoteLinkId))) 1775 { 1776 NV_ASSERT(NVL_SUCCESS == nvlink_lib_set_link_master( 1777 pKernelNvlink0->nvlinkLinks[linkId].core_link)); 1778 } 1779 else if ((gpuGetInstance(pGpu1) < gpuGetInstance(pGpu0)) || 1780 ((gpuGetInstance(pGpu1) == gpuGetInstance(pGpu0)) && 1781 (remoteLinkId < linkId))) 1782 { 1783 NV_ASSERT(NVL_SUCCESS == nvlink_lib_set_link_master( 1784 pKernelNvlink1->nvlinkLinks[remoteLinkId].core_link)); 1785 } 1786 1787 break; 1788 } 1789 } 1790 } 1791 1792 return status; 1793 } 1794 1795 /*! 1796 * @brief Activate the given switch connection 1797 * 1798 * @param[in] pGpu OBJGPU pointer 1799 * @param[in] pKernelNvlink KernelNvlink pointer 1800 * @param[in] linkId Link ID 1801 * 1802 * @return NV_OK on success 1803 */ 1804 static NV_STATUS 1805 _knvlinkActivateDiscoveredSwitchConn 1806 ( 1807 OBJGPU *pGpu, 1808 KernelNvlink *pKernelNvlink, 1809 NvU32 linkId 1810 ) 1811 { 1812 NV_STATUS status = NV_OK; 1813 1814 // Post Topology enablement for switch links 1815 status = knvlinkEnableLinksPostTopology_HAL(pGpu, pKernelNvlink, NVBIT(linkId)); 1816 if (status != NV_OK) 1817 { 1818 return status; 1819 } 1820 1821 return NV_OK; 1822 } 1823 1824 /*! 1825 * @brief Activate the given P2P connection 1826 * This function updates the RM state for the discovered sysmem 1827 * connection and trains the connection to ACTIVE, because, for 1828 * sysmem link post-topology steps can only be configured after 1829 * ACTIVE. HSHUB is also configured for sysmem link here. 1830 * 1831 * @param[in] pGpu OBJGPU pointer 1832 * @param[in] pKernelNvlink KernelNvlink pointer 1833 * @param[in] linkId Link ID 1834 * 1835 * @return NV_OK on success 1836 */ 1837 static NV_STATUS 1838 _knvlinkActivateDiscoveredSysmemConn 1839 ( 1840 OBJGPU *pGpu, 1841 KernelNvlink *pKernelNvlink, 1842 NvU32 linkId 1843 ) 1844 { 1845 NV_STATUS status = NV_OK; 1846 1847 NV2080_CTRL_NVLINK_UPDATE_HSHUB_MUX_PARAMS updateHshubMuxParams; 1848 NV2080_CTRL_NVLINK_SETUP_NVLINK_SYSMEM_PARAMS nvlinkSysmemParams; 1849 1850 pKernelNvlink->sysmemLinkMask |= NVBIT(linkId); 1851 1852 portMemSet(&nvlinkSysmemParams, 0, sizeof(nvlinkSysmemParams)); 1853 nvlinkSysmemParams.sysmemLinkMask = pKernelNvlink->sysmemLinkMask; 1854 1855 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1856 NV2080_CTRL_CMD_NVLINK_SETUP_NVLINK_SYSMEM, 1857 (void *)&nvlinkSysmemParams, 1858 sizeof(nvlinkSysmemParams)); 1859 if (status != NV_OK) 1860 { 1861 NV_PRINTF(LEVEL_ERROR, "Failed to setup HSHUB NVLink sysmem links state\n"); 1862 return status; 1863 } 1864 1865 // Always make the GPU side the master for NPU connections 1866 NV_ASSERT(NVL_SUCCESS == nvlink_lib_set_link_master( 1867 pKernelNvlink->nvlinkLinks[linkId].core_link)); 1868 1869 // Train SYSMEM links to Active, and only then enable traffic 1870 status = knvlinkTrainSysmemLinksToActive(pGpu, pKernelNvlink); 1871 if (status != NV_OK) 1872 { 1873 NV_PRINTF(LEVEL_ERROR, 1874 "FAILED TO TRAIN CPU/SYSMEM LINKS TO ACTIVE on GPU%d!!!\n", 1875 pGpu->gpuInstance); 1876 1877 NV_ASSERT(0); 1878 } 1879 1880 portMemSet(&updateHshubMuxParams, 0, sizeof(updateHshubMuxParams)); 1881 updateHshubMuxParams.updateType = NV2080_CTRL_NVLINK_UPDATE_HSHUB_MUX_TYPE_PROGRAM; 1882 updateHshubMuxParams.bSysMem = NV_TRUE; 1883 1884 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1885 NV2080_CTRL_CMD_NVLINK_UPDATE_HSHUB_MUX, 1886 (void *)&updateHshubMuxParams, 1887 sizeof(updateHshubMuxParams)); 1888 return status; 1889 } 1890 1891 /*! 1892 * @brief Transition the mask of links into sleep (L2) state 1893 * 1894 * @param[in] pGpu OBJGPU pointer 1895 * @param[in] pKernelNvlink KernelNvlink pointer 1896 * @param[in] linkMask Mask of links 1897 * 1898 * @return NV_OK on success 1899 */ 1900 static NV_STATUS 1901 _knvlinkEnterSleep 1902 ( 1903 OBJGPU *pGpu, 1904 KernelNvlink *pKernelNvlink, 1905 NvU32 linkMask 1906 ) 1907 { 1908 NV_STATUS retStatus = NV_OK; 1909 NvlStatus status = NVL_SUCCESS; 1910 1911 NV2080_CTRL_NVLINK_PROGRAM_BUFFERREADY_PARAMS programBufferRdyParams; 1912 NV2080_CTRL_NVLINK_SAVE_RESTORE_HSHUB_STATE_PARAMS saveRestoreHshubStateParams; 1913 1914 portMemSet(&programBufferRdyParams, 0, sizeof(programBufferRdyParams)); 1915 programBufferRdyParams.flags = NV2080_CTRL_NVLINK_PROGRAM_BUFFERREADY_FLAGS_SAVE; 1916 programBufferRdyParams.bSysmem = NV_FALSE; 1917 programBufferRdyParams.peerLinkMask = linkMask; 1918 1919 // Save Bufferready state for the the mask of links entering L2 1920 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1921 NV2080_CTRL_CMD_NVLINK_PROGRAM_BUFFERREADY, 1922 (void *)&programBufferRdyParams, 1923 sizeof(programBufferRdyParams)); 1924 if (status != NV_OK) 1925 return status; 1926 1927 portMemSet(&saveRestoreHshubStateParams, 0, sizeof(saveRestoreHshubStateParams)); 1928 saveRestoreHshubStateParams.linkMask = linkMask; 1929 saveRestoreHshubStateParams.bSave = NV_TRUE; 1930 1931 // Save HSHUB SW state for the links which will need to be restored later 1932 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1933 NV2080_CTRL_CMD_NVLINK_SAVE_RESTORE_HSHUB_STATE, 1934 (void *)&saveRestoreHshubStateParams, 1935 sizeof(saveRestoreHshubStateParams)); 1936 if (status != NV_OK) 1937 return status; 1938 1939 // In L2 Entry path 1940 pKernelNvlink->bL2Entry = NV_TRUE; 1941 1942 // Put the mask of links of the device to sleep 1943 status = nvlink_lib_powerdown_links_from_active_to_L2(pKernelNvlink->pNvlinkDev, 1944 linkMask, 1945 NVLINK_STATE_CHANGE_ASYNC); 1946 if (status == NVL_MORE_PROCESSING_REQUIRED) 1947 { 1948 NV_PRINTF(LEVEL_INFO, 1949 "Transition to L2 for GPU%d: linkMask 0x%x in progress... Waiting for " 1950 "remote endpoints to request L2 entry\n", pGpu->gpuInstance, 1951 linkMask); 1952 1953 return NV_WARN_MORE_PROCESSING_REQUIRED; 1954 } 1955 1956 if (status != NVL_SUCCESS) 1957 { 1958 NV_PRINTF(LEVEL_ERROR, 1959 "Unable to put the linkmask 0x%x of GPU%d to SLEEP\n", 1960 linkMask, pGpu->gpuInstance); 1961 1962 return NV_ERR_GENERIC; 1963 } 1964 1965 return retStatus; 1966 } 1967 1968 /*! 1969 * @brief Wakeup the mask of links from sleep (L2) state 1970 * 1971 * @param[in] pGpu OBJGPU pointer 1972 * @param[in] pKernelNvlink KernelNvlink pointer 1973 * @param[in] linkMask Mask of links 1974 * 1975 * @return NV_OK on success 1976 */ 1977 static NV_STATUS 1978 _knvlinkExitSleep 1979 ( 1980 OBJGPU *pGpu, 1981 KernelNvlink *pKernelNvlink, 1982 NvU32 linkMask 1983 ) 1984 { 1985 NvlStatus status = NVL_SUCCESS; 1986 NvlStatus trainingStatus = NVL_SUCCESS; 1987 NvU32 linkId; 1988 NvU32 remoteLinkId; 1989 NvU32 gpuInst; 1990 RMTIMEOUT timeout; 1991 NvU32 linkTrainingTimeout = 10000000; 1992 1993 NV2080_CTRL_NVLINK_PROGRAM_BUFFERREADY_PARAMS programBufferRdyParams; 1994 NV2080_CTRL_NVLINK_SAVE_RESTORE_HSHUB_STATE_PARAMS saveRestoreHshubStateParams; 1995 1996 pKernelNvlink->bL2Entry = NV_FALSE; 1997 1998 // Kick-off ALI if it is enabled 1999 if (pKernelNvlink->bEnableAli) 2000 { 2001 // 2002 // For each link, request a change to active. 2003 // Don't have to wait for the request to finish as links 2004 // will be queries via DLSTAT to know their status and training 2005 // progression. 2006 // 2007 FOR_EACH_INDEX_IN_MASK(32, linkId, linkMask) 2008 { 2009 status = knvlinkTrainLinksToActiveAli(pGpu, pKernelNvlink, NVBIT(linkId), NV_FALSE); 2010 if (status != NV_OK) 2011 { 2012 NV_PRINTF(LEVEL_ERROR, 2013 "Failed to request Link %d to transition to active\n", linkId); 2014 } 2015 #if defined(INCLUDE_NVLINK_LIB) 2016 pKernelNvlink->nvlinkLinks[linkId].core_link->bStateSaved = NV_FALSE; 2017 #endif 2018 } 2019 FOR_EACH_INDEX_IN_MASK_END; 2020 2021 // 2022 // Get all links that are passed RxDet after L2 exit and poll on those 2023 // links till they reach active 2024 // 2025 if (knvlinkDiscoverPostRxDetLinks_HAL(pGpu, pKernelNvlink, pGpu) == NV_OK) 2026 { 2027 gpuSetTimeout(pGpu, linkTrainingTimeout, &timeout, IS_SILICON(pGpu) ? 2028 (GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE | GPU_TIMEOUT_FLAGS_DEFAULT) : 0); 2029 do 2030 { 2031 2032 status = gpuCheckTimeout(pGpu, &timeout); 2033 trainingStatus = knvlinkCheckTrainingIsComplete(pGpu, pGpu, pKernelNvlink); 2034 if (trainingStatus == NV_OK) 2035 { 2036 break; 2037 } 2038 osSpinLoop(); 2039 } 2040 while (status != NV_ERR_TIMEOUT); 2041 2042 if (status == NV_ERR_TIMEOUT) 2043 { 2044 NV_PRINTF(LEVEL_ERROR,"Timedout while checking to see if training complete!\n"); 2045 } 2046 } 2047 } 2048 else 2049 { 2050 // Wakeup the mask of links of the device from sleep using legacy l2 exit 2051 status = nvlink_lib_train_links_from_L2_to_active(pKernelNvlink->pNvlinkDev, 2052 linkMask, 2053 NVLINK_STATE_CHANGE_ASYNC); 2054 } 2055 2056 if (status == NVL_SUCCESS) 2057 { 2058 // Perform post-initialization setup for links that exited L2 2059 FOR_EACH_INDEX_IN_MASK(32, linkId, linkMask) 2060 { 2061 // Post topology link enable for pre-Ampere. This sets up buffer ready 2062 status = knvlinkEnableLinksPostTopology_HAL(pGpu, pKernelNvlink, NVBIT(linkId)); 2063 if (status != NV_OK) 2064 { 2065 return status; 2066 } 2067 2068 // Update the current NVLink configuration 2069 knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink); 2070 2071 // Perform post-topology initialization steps on the remote endpoint 2072 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_GPU) 2073 { 2074 OBJGPU *pGpu1 = NULL; 2075 KernelNvlink *pKernelNvlink1 = NULL; 2076 2077 // Get the remote OBJGPU and Nvlink 2078 for (gpuInst = 0; gpuInst < NV_MAX_DEVICES; gpuInst++) 2079 { 2080 pGpu1 = gpumgrGetGpu(gpuInst); 2081 2082 if (pGpu1 && 2083 (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain == gpuGetDomain(pGpu1) && 2084 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus == gpuGetBus(pGpu1) && 2085 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device == gpuGetDevice(pGpu1) && 2086 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.function == 0)) 2087 { 2088 pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1); 2089 remoteLinkId = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.linkNumber; 2090 2091 // Post topology link enable for pre-Ampere. This sets up buffer ready 2092 status = knvlinkEnableLinksPostTopology_HAL(pGpu1, pKernelNvlink1, NVBIT(remoteLinkId)); 2093 if (status != NV_OK) 2094 { 2095 return status; 2096 } 2097 2098 // Update the current NVLink configuration 2099 knvlinkUpdateCurrentConfig(pGpu1, pKernelNvlink1); 2100 2101 break; 2102 } 2103 } 2104 } 2105 } 2106 FOR_EACH_INDEX_IN_MASK_END; 2107 } 2108 2109 // 2110 // Restore HSHUB *ONLY AFTER* links have been trained and post-topology setup is complete 2111 // on both ends of the link. Only then HSHUB can be configured for P2P on any side of link 2112 // 2113 if (status == NVL_SUCCESS) 2114 { 2115 portMemSet(&saveRestoreHshubStateParams, 0, sizeof(saveRestoreHshubStateParams)); 2116 saveRestoreHshubStateParams.linkMask = linkMask; 2117 saveRestoreHshubStateParams.bSave = NV_FALSE; 2118 2119 // Restore HSHUB SW state for the links which exited L2 state 2120 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 2121 NV2080_CTRL_CMD_NVLINK_SAVE_RESTORE_HSHUB_STATE, 2122 (void *)&saveRestoreHshubStateParams, 2123 sizeof(saveRestoreHshubStateParams)); 2124 if (status != NV_OK) 2125 return status; 2126 2127 knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink); 2128 2129 portMemSet(&programBufferRdyParams, 0, sizeof(programBufferRdyParams)); 2130 programBufferRdyParams.flags = NV2080_CTRL_NVLINK_PROGRAM_BUFFERREADY_FLAGS_RESTORE; 2131 programBufferRdyParams.bSysmem = NV_FALSE; 2132 programBufferRdyParams.peerLinkMask = linkMask; 2133 2134 // Restore Bufferready state for the links which exited L2 state 2135 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 2136 NV2080_CTRL_CMD_NVLINK_PROGRAM_BUFFERREADY, 2137 (void *)&programBufferRdyParams, 2138 sizeof(programBufferRdyParams)); 2139 if (status != NV_OK) 2140 return status; 2141 2142 FOR_EACH_INDEX_IN_MASK(32, linkId, linkMask) 2143 { 2144 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_GPU) 2145 { 2146 OBJGPU *pGpu1 = NULL; 2147 KernelNvlink *pKernelNvlink1 = NULL; 2148 2149 // Get the remote OBJGPU and Nvlink 2150 for (gpuInst = 0; gpuInst < NV_MAX_DEVICES; gpuInst++) 2151 { 2152 pGpu1 = gpumgrGetGpu(gpuInst); 2153 2154 if (pGpu1 && 2155 (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain == gpuGetDomain(pGpu1) && 2156 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus == gpuGetBus(pGpu1) && 2157 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device == gpuGetDevice(pGpu1) && 2158 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.function == 0)) 2159 { 2160 pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1); 2161 remoteLinkId = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.linkNumber; 2162 2163 portMemSet(&saveRestoreHshubStateParams, 0, sizeof(saveRestoreHshubStateParams)); 2164 saveRestoreHshubStateParams.linkMask = NVBIT(remoteLinkId); 2165 saveRestoreHshubStateParams.bSave = NV_FALSE; 2166 2167 // Restore HSHUB SW state for the links which exited L2 state 2168 status = knvlinkExecGspRmRpc(pGpu1, pKernelNvlink1, 2169 NV2080_CTRL_CMD_NVLINK_SAVE_RESTORE_HSHUB_STATE, 2170 (void *)&saveRestoreHshubStateParams, 2171 sizeof(saveRestoreHshubStateParams)); 2172 if (status != NV_OK) 2173 return status; 2174 2175 knvlinkUpdateCurrentConfig(pGpu1, pKernelNvlink1); 2176 2177 portMemSet(&programBufferRdyParams, 0, sizeof(programBufferRdyParams)); 2178 programBufferRdyParams.flags = NV2080_CTRL_NVLINK_PROGRAM_BUFFERREADY_FLAGS_RESTORE; 2179 programBufferRdyParams.bSysmem = NV_FALSE; 2180 programBufferRdyParams.peerLinkMask = NVBIT(remoteLinkId); 2181 2182 // 2183 // Restore Buffer Ready state for the links from cached SW state after HSHUB 2184 // settings have been restored 2185 // 2186 status = knvlinkExecGspRmRpc(pGpu1, pKernelNvlink1, 2187 NV2080_CTRL_CMD_NVLINK_PROGRAM_BUFFERREADY, 2188 (void *)&programBufferRdyParams, 2189 sizeof(programBufferRdyParams)); 2190 if (status != NV_OK) 2191 return status; 2192 2193 break; 2194 } 2195 } 2196 } 2197 } 2198 FOR_EACH_INDEX_IN_MASK_END; 2199 } 2200 2201 if (status == NVL_MORE_PROCESSING_REQUIRED) 2202 { 2203 NV_PRINTF(LEVEL_INFO, 2204 "Transition to L0 for GPU%d: linkMask 0x%x in progress... Waiting for " 2205 "remote endpoints to request L2 exit\n", pGpu->gpuInstance, 2206 linkMask); 2207 2208 return NV_WARN_MORE_PROCESSING_REQUIRED; 2209 } 2210 2211 if (status != NVL_SUCCESS) 2212 { 2213 NV_PRINTF(LEVEL_ERROR, 2214 "Unable to wakeup the linkmask 0x%x of GPU%d from SLEEP\n", 2215 linkMask, pGpu->gpuInstance); 2216 2217 return NV_ERR_GENERIC; 2218 } 2219 2220 return NV_OK; 2221 } 2222 2223 /*! 2224 * @brief Updates GPU peer info (peerMask) based on switchLinkMasks 2225 * 2226 * @param[in] pGpu OBJGPU pointer 2227 * @param[in] pKernelNvlink KernelNvlink pointer 2228 * @param[in] switchLinkMasks Mask of switch links 2229 * 2230 * @return Returns NV_TRUE if peerMask is updated 2231 */ 2232 static NvBool 2233 _knvlinkUpdateSwitchLinkMasks 2234 ( 2235 OBJGPU *pGpu, 2236 KernelNvlink *pKernelNvlink, 2237 NvU32 switchLinkMasks 2238 ) 2239 { 2240 KernelNvlink *pKernelNvlink1 = NULL; 2241 OBJGPU *pGpu1 = NULL; 2242 NvBool bPeerUpdated = NV_FALSE; 2243 NV_STATUS status = NV_OK; 2244 NvU32 gpuInst; 2245 2246 // 2247 // On NvSwitch systems, all the enabled and connected GPU links should 2248 // go through NvSwitch. We don't support GPU<->GPU or GPU<->NPU direct 2249 // connections on NvSwitch systems. 2250 // 2251 if (!knvlinkIsGpuConnectedToNvswitch(pGpu, pKernelNvlink)) 2252 { 2253 return bPeerUpdated; 2254 } 2255 2256 for (gpuInst = 0; gpuInst < NV_MAX_DEVICES; gpuInst++) 2257 { 2258 pGpu1 = gpumgrGetGpu(gpuInst); 2259 if (!pGpu1) 2260 { 2261 continue; 2262 } 2263 2264 // No support for SLI P2P on nvswitch systems. 2265 if (IsSLIEnabled(pGpu1)) 2266 { 2267 continue; 2268 } 2269 2270 pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1); 2271 2272 if (!pKernelNvlink1) 2273 { 2274 continue; 2275 } 2276 2277 if (!pKernelNvlink1->discoveredLinks) 2278 { 2279 continue; 2280 } 2281 2282 if (!knvlinkIsGpuConnectedToNvswitch(pGpu1, pKernelNvlink1)) 2283 { 2284 continue; 2285 } 2286 2287 // Update local peerLinkMasks. 2288 status = knvlinkSetLinkMaskToPeer(pGpu, pKernelNvlink, pGpu1, switchLinkMasks); 2289 if (status != NV_OK) 2290 return NV_FALSE; 2291 2292 // 2293 // Update remote peerLinkMasks only if a remote endpoint is connected. 2294 // 2295 // We are deliberately picking up loopback peerLinkMask, because it 2296 // represents the actual nvswitch connection mask for that GPU and 2297 // guarantees that the end point is connected to nvswitch. 2298 // 2299 status = knvlinkSetLinkMaskToPeer(pGpu1, pKernelNvlink1, pGpu, 2300 pKernelNvlink1->peerLinkMasks[gpuGetInstance(pGpu1)]); 2301 if (status != NV_OK) 2302 return NV_FALSE; 2303 2304 bPeerUpdated = NV_TRUE; 2305 } 2306 2307 return bPeerUpdated; 2308 } 2309 2310 /*! 2311 * @brief Updates GPU peer info (peerMask) when a GPU is degraded 2312 * 2313 * @param[in] pGpu OBJGPU pointer 2314 * @param[in] pKernelNvlink KernelNvlink pointer 2315 * 2316 */ 2317 static NvBool 2318 _knvlinkUpdateSwitchLinkMasksGpuDegraded 2319 ( 2320 OBJGPU *pGpu, 2321 KernelNvlink *pKernelNvlink 2322 ) 2323 { 2324 KernelNvlink *pKernelNvlink1 = NULL; 2325 OBJGPU *pGpu1 = NULL; 2326 NvBool bPeerUpdated = NV_FALSE; 2327 NV_STATUS status = NV_OK; 2328 NvU32 gpuInst; 2329 2330 for (gpuInst = 0; gpuInst < NV_MAX_DEVICES; gpuInst++) 2331 { 2332 pGpu1 = gpumgrGetGpu(gpuInst); 2333 if (!pGpu1) 2334 { 2335 continue; 2336 } 2337 2338 // No support for SLI P2P on nvswitch systems. 2339 if (IsSLIEnabled(pGpu1)) 2340 { 2341 continue; 2342 } 2343 2344 pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1); 2345 2346 if (!pKernelNvlink1) 2347 { 2348 continue; 2349 } 2350 2351 if (!pKernelNvlink1->discoveredLinks) 2352 { 2353 continue; 2354 } 2355 2356 if (!knvlinkIsGpuConnectedToNvswitch(pGpu1, pKernelNvlink1)) 2357 { 2358 continue; 2359 } 2360 2361 // Update local peerLinkMasks. 2362 status = knvlinkSetLinkMaskToPeer(pGpu, pKernelNvlink, pGpu1, 0); 2363 if (status != NV_OK) 2364 return NV_FALSE; 2365 2366 // Update remote peerLinkMasks 2367 status = knvlinkSetLinkMaskToPeer(pGpu1, pKernelNvlink1, pGpu, 0); 2368 if (status != NV_OK) 2369 return NV_FALSE; 2370 2371 bPeerUpdated = NV_TRUE; 2372 } 2373 2374 return bPeerUpdated; 2375 } 2376 2377 /*! 2378 * For each known peer, update their configurations, now that another 2379 * one of their peers (this GPU) has been initialized. 2380 * 2381 * This will update the PCE-LCE mappings, but it will not trigger any 2382 * HSHUB updates since peer IDs shouldn't have been allocated at this 2383 * point. 2384 * 2385 * @param[in] pGpu OBJGPU pointer 2386 * @param[in] pKernelNvlink KernelNvlink pointer 2387 */ 2388 static void 2389 _knvlinkUpdatePeerConfigs 2390 ( 2391 OBJGPU *pGpu, 2392 KernelNvlink *pKernelNvlink 2393 ) 2394 { 2395 NvU32 gpuInst; 2396 2397 for (gpuInst = 0; gpuInst < NV_ARRAY_ELEMENTS(pKernelNvlink->peerLinkMasks); gpuInst++) 2398 { 2399 if (pKernelNvlink->peerLinkMasks[gpuInst] != 0) 2400 { 2401 OBJGPU *pRemoteGpu = gpumgrGetGpu(gpuInst); 2402 2403 if (pRemoteGpu != NULL) 2404 { 2405 KernelNvlink *pRemoteKernelNvlink = GPU_GET_KERNEL_NVLINK(pRemoteGpu); 2406 2407 if (pRemoteKernelNvlink != NULL) 2408 { 2409 NV_PRINTF(LEVEL_INFO, 2410 "GPU%u requesting GPU%u NVLINK config update\n", 2411 gpuGetInstance(pGpu), 2412 gpuGetInstance(pRemoteGpu)); 2413 2414 _knvlinkPrintTopologySummary(pRemoteGpu, pRemoteKernelNvlink); 2415 2416 // Update CE mappings on remote GPUs since we have new connections 2417 knvlinkUpdateCurrentConfig(pRemoteGpu, pRemoteKernelNvlink); 2418 } 2419 } 2420 } 2421 } 2422 } 2423 2424 /*! 2425 * Print the nvlink topology for this GPU 2426 * 2427 * @param[in] pGpu OBJGPU pointer 2428 * @param[in] pKernelNvlink KernelNvlink pointer 2429 */ 2430 static void 2431 _knvlinkPrintTopologySummary 2432 ( 2433 OBJGPU *pGpu, 2434 KernelNvlink *pKernelNvlink 2435 ) 2436 { 2437 #if NV_PRINTF_ENABLED 2438 2439 NvU32 i; 2440 NV_STATUS status; 2441 2442 if (DBG_RMMSG_CHECK(LEVEL_INFO) == 0) 2443 { 2444 return; 2445 } 2446 2447 NV_PRINTF(LEVEL_INFO, "GPU%02u cached topology:\n", gpuGetInstance(pGpu)); 2448 2449 NV2080_CTRL_NVLINK_HSHUB_GET_SYSMEM_NVLINK_MASK_PARAMS params; 2450 portMemSet(¶ms, 0, sizeof(params)); 2451 2452 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 2453 NV2080_CTRL_CMD_NVLINK_HSHUB_GET_SYSMEM_NVLINK_MASK, 2454 (void *)¶ms, sizeof(params)); 2455 if (status != NV_OK) 2456 { 2457 NV_PRINTF(LEVEL_ERROR, "Unable to determine sysmem link mask\n"); 2458 return; 2459 } 2460 2461 // Print the discovered sysmem links 2462 if (params.sysmemLinkMask != 0) 2463 { 2464 NV_PRINTF(LEVEL_INFO, " sysmem link mask : 0x%x\n", params.sysmemLinkMask); 2465 } 2466 2467 // Print the discovered p2p links 2468 for (i = 0; i < NV_ARRAY_ELEMENTS(pKernelNvlink->peerLinkMasks); i++) 2469 { 2470 if (pKernelNvlink->peerLinkMasks[i] != 0) 2471 { 2472 NV_PRINTF(LEVEL_INFO, " GPU%02u link mask : 0x%x\n", i, 2473 pKernelNvlink->peerLinkMasks[i]); 2474 } 2475 } 2476 2477 // Print the links which do not have a connection yet 2478 if (pKernelNvlink->disconnectedLinkMask != 0) 2479 { 2480 NV_PRINTF(LEVEL_INFO, " unknown link mask: 0x%x\n", 2481 pKernelNvlink->disconnectedLinkMask); 2482 } 2483 2484 #endif 2485 } 2486 2487 #endif 2488