1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #define NVOC_KERNEL_NVLINK_H_PRIVATE_ACCESS_ALLOWED 25 26 #include "os/os.h" 27 #include "core/hal.h" 28 #include "core/info_block.h" 29 #include "core/locks.h" 30 #include "core/thread_state.h" 31 #include "gpu/gpu.h" 32 33 #include "kernel/gpu/nvlink/kernel_nvlink.h" 34 #include "kernel/gpu/nvlink/kernel_ioctrl.h" 35 36 #include "nverror.h" 37 38 #if defined(INCLUDE_NVLINK_LIB) 39 #include "nvlink_os.h" 40 #endif 41 42 static void _knvlinkRetrainLinkPrologue(OBJGPU *, KernelNvlink *, NvU32); 43 44 #if defined(INCLUDE_NVLINK_LIB) 45 46 static NV_STATUS _knvlinkActivateDiscoveredConns(OBJGPU *, KernelNvlink *, NvBool); 47 static NV_STATUS _knvlinkActivateDiscoveredP2pConn(OBJGPU *, KernelNvlink *, NvU32); 48 static NV_STATUS _knvlinkActivateDiscoveredSwitchConn(OBJGPU *, KernelNvlink *, NvU32); 49 static NV_STATUS _knvlinkActivateDiscoveredSysmemConn(OBJGPU *, KernelNvlink *, NvU32); 50 static NV_STATUS _knvlinkEnterSleep(OBJGPU *, KernelNvlink *, NvU32); 51 static NV_STATUS _knvlinkExitSleep(OBJGPU *, KernelNvlink *, NvU32); 52 static NvBool _knvlinkUpdateSwitchLinkMasks(OBJGPU *, KernelNvlink *, NvU32); 53 static NvBool _knvlinkUpdateSwitchLinkMasksGpuDegraded(OBJGPU *, KernelNvlink *); 54 static void _knvlinkUpdatePeerConfigs(OBJGPU *, KernelNvlink *); 55 static void _knvlinkPrintTopologySummary(OBJGPU *, KernelNvlink *); 56 57 #endif 58 59 /*! 60 * @brief Get the nvlink connections for the GPU. 61 * This function calls into the core library to trigger topology discovery 62 * on the set of links that have been registered with the core library. 63 * 64 * @param[in] pGpu OBJGPU pointer 65 * @param[in] pKernelNvlink KernelNvlink pointer 66 * 67 * @return NV_OK on success 68 */ 69 NV_STATUS 70 knvlinkCoreGetRemoteDeviceInfo_IMPL 71 ( 72 OBJGPU *pGpu, 73 KernelNvlink *pKernelNvlink 74 ) 75 { 76 NV_STATUS status = NV_OK; 77 78 #if defined(INCLUDE_NVLINK_LIB) 79 80 OBJSYS *pSys = SYS_GET_INSTANCE(); 81 NvU32 flags = NVLINK_STATE_CHANGE_SYNC; 82 NvBool bNvswitchProxyPresent = NV_FALSE; 83 NvBool bUpdateConnStatus = NV_FALSE; 84 NvBool bCheckDegradedMode = NV_FALSE; 85 nvlink_conn_info conn_info = {0}; 86 NvU32 linkId; 87 NvU32 numActiveLinksPerIoctrl = 0; 88 NvU32 numLinksPerIoctrl = 0; 89 90 // 91 // Topology discovery should NOT be triggered in RTD3/FGC6 exit path if L2 is 92 // supported. The remote information will be restored when RM state is restored 93 // 94 if (!knvlinkPoweredUpForD3_HAL(pGpu, pKernelNvlink)) 95 { 96 // 97 // Optimization: Check for nvlink proxy only when system fabric is externally 98 // managed. This would avoid RPCs in non-nvswitch cases. 99 // 100 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 101 { 102 bNvswitchProxyPresent = knvlinkIsNvswitchProxyPresent(pGpu, pKernelNvlink); 103 } 104 105 if (pKernelNvlink->bEnableAli) 106 { 107 // Update the post Rx Det link Mask for the GPU 108 knvlinkUpdatePostRxDetectLinkMask(pGpu, pKernelNvlink); 109 } 110 111 if (pKernelNvlink->ipVerNvlink >= NVLINK_VERSION_40 && 112 !bNvswitchProxyPresent && 113 !pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED) && 114 pKernelNvlink->pNvlinkDev != NULL && 115 !pKernelNvlink->bFloorSwept) 116 { 117 numLinksPerIoctrl = knvlinkGetTotalNumLinksPerIoctrl(pGpu, pKernelNvlink); 118 status = knvlinkFloorSweep(pGpu, pKernelNvlink, 119 numLinksPerIoctrl, &numActiveLinksPerIoctrl); 120 121 if (status != NV_OK) 122 { 123 NV_PRINTF(LEVEL_ERROR, "Failed to floorsweep valid nvlink config!\n"); 124 return NV_ERR_NOT_READY; 125 } 126 } 127 128 // We only need to look at links that are still considered disconnected 129 FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->disconnectedLinkMask) 130 { 131 // 132 // If we are using ALI training, make sure the 133 // disconneted link is a valid link that is progressing 134 // passed RxDet 135 // 136 if (pKernelNvlink->bEnableAli && 137 !(pKernelNvlink->postRxDetLinkMask & NVBIT(linkId))) 138 { 139 continue; 140 } 141 142 bUpdateConnStatus = NV_FALSE; 143 144 if (pKernelNvlink->nvlinkLinks[linkId].core_link) 145 { 146 // Call the core library to get the remote end information 147 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 148 { 149 if (gpuFabricProbeIsSupported(pGpu)) 150 { 151 // 152 // If FM doesn't talk to NVLink driver using control calls 153 // (i.e. uses NVLink inband comm instread) such as 154 // IOCTL CTRL_NVLINK_DISCOVER_INTRANODE_CONNS, 155 // discover remote information explicitly. 156 // 157 nvlink_lib_discover_and_get_remote_conn_info( 158 pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, flags); 159 } 160 else 161 { 162 nvlink_lib_get_remote_conn_info( 163 pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info); 164 } 165 166 // 167 // nvlink_lib_get_remote_conn_info could fail to return connection info if 168 // it runs on a shared-NVSwitch virtualization model (HGX-2) where GPU nodes 169 // can't see NVSwitches. In that case, examine the NVLink scratch register 170 // for connectivity information. 171 // 172 if (!conn_info.bConnected && 173 (bNvswitchProxyPresent || 174 GPU_IS_NVSWITCH_DETECTED(pGpu))) 175 { 176 conn_info.bConnected = NV_TRUE; 177 conn_info.deviceType = NVLINK_DEVICE_TYPE_NVSWITCH; 178 conn_info.pciDeviceId = NV_U32_MAX; 179 conn_info.domain = NV_U32_MAX; 180 conn_info.bus = NV_U16_MAX; 181 conn_info.device = NV_U16_MAX; 182 conn_info.function = NV_U16_MAX; 183 } 184 185 // 186 // New connection is discovered from core library. But we don't know if this 187 // connection was shutdown or reset by fabric manager while enabling degraded 188 // mode. So, we have to check for degraded mode. 189 // 190 if (conn_info.bConnected) 191 { 192 bCheckDegradedMode = NV_TRUE; 193 } 194 } 195 else 196 { 197 // Aynchronous link initialization for IP 2.2 198 if (pKernelNvlink->ipVerNvlink == NVLINK_VERSION_22) 199 { 200 flags = NVLINK_STATE_CHANGE_ASYNC; 201 } 202 203 nvlink_lib_discover_and_get_remote_conn_info( 204 pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, flags); 205 } 206 207 // RPC into GSP-RM to update the link connected status only if its required 208 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected != conn_info.bConnected) 209 bUpdateConnStatus = NV_TRUE; 210 211 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected = conn_info.bConnected; 212 213 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected) 214 { 215 // Update the RM cache for the remote device information for the link 216 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain = conn_info.domain; 217 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus = conn_info.bus; 218 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device = conn_info.device; 219 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.function = conn_info.function; 220 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.pciDeviceId = conn_info.pciDeviceId; 221 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType = conn_info.deviceType; 222 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.linkNumber = conn_info.linkNumber; 223 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.chipSid = conn_info.chipSid; 224 225 nvlink_memcpy(pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.devUuid, 226 conn_info.devUuid, 227 NV_UUID_LEN); 228 } 229 230 if (bUpdateConnStatus) 231 { 232 // RPC into GSP-RM to update the link remote connection status for pGpu 233 status = knvlinkUpdateLinkConnectionStatus(pGpu, pKernelNvlink, linkId); 234 if (status != NV_OK) 235 { 236 return status; 237 } 238 } 239 } 240 } 241 FOR_EACH_INDEX_IN_MASK_END; 242 } 243 else 244 { 245 NV_PRINTF(LEVEL_INFO, 246 "L2 supported. Skip topology discovery on GPU%d in RTD3/FGC6 exit\n", 247 pGpu->gpuInstance); 248 } 249 250 // 251 // Update the RM cache for the discovered connections and then activate 252 // those connections. This includes all the post-topology settings like 253 // buffer-ready and interrupt enables 254 // 255 status = _knvlinkActivateDiscoveredConns(pGpu, pKernelNvlink, bCheckDegradedMode); 256 if (status != NV_OK) 257 { 258 NV_PRINTF(LEVEL_INFO, 259 "Failed to activate the discovered connections on GPU%d\n", 260 pGpu->gpuInstance); 261 } 262 263 #endif 264 265 return status; 266 } 267 268 /*! 269 * @brief Train all the connected sysmem links associated with the device 270 * to active through the nvlink core library. 271 * 272 * @param[in] pGpu OBJGPU pointer 273 * @param[in] pKernelNvlink KernelNvlink pointer 274 * 275 * @return NV_OK on success 276 */ 277 NV_STATUS 278 knvlinkTrainSysmemLinksToActive_IMPL 279 ( 280 OBJGPU *pGpu, 281 KernelNvlink *pKernelNvlink 282 ) 283 { 284 #if defined(INCLUDE_NVLINK_LIB) 285 286 OBJSYS *pSys = SYS_GET_INSTANCE(); 287 NvU32 i; 288 289 // On Fmodel, sysmem link training is not supported 290 if (IS_FMODEL(pGpu)) 291 { 292 NV_PRINTF(LEVEL_INFO, 293 "Skipping unsupported sysmem link training on GPU%d\n", 294 pGpu->gpuInstance); 295 296 return NV_OK; 297 } 298 299 // Return if link training is force disabled through regkey 300 if (pKernelNvlink->bSkipLinkTraining) 301 { 302 NV_PRINTF(LEVEL_INFO, 303 "Skipping link training due to regkey on GPU%d\n", 304 pGpu->gpuInstance); 305 306 return NV_OK; 307 } 308 309 // If fabric is managed by FM, return 310 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 311 { 312 NV_PRINTF(LEVEL_INFO, 313 "Fabric is externally managed, skip link training\n"); 314 315 return NV_OK; 316 } 317 318 NV_PRINTF(LEVEL_INFO, "Training sysmem links for GPU%d\n", 319 pGpu->gpuInstance); 320 321 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink->enabledLinks) 322 { 323 if (pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bConnected && 324 ((pKernelNvlink->nvlinkLinks[i].remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_IBMNPU) || 325 (pKernelNvlink->nvlinkLinks[i].remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_TEGRASHIM) || 326 (pKernelNvlink->nvlinkLinks[i].remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_EBRIDGE))) 327 { 328 if (nvlink_lib_train_links_from_swcfg_to_active( 329 &pKernelNvlink->nvlinkLinks[i].core_link, 1, NVLINK_STATE_CHANGE_SYNC) 330 != NVL_SUCCESS) 331 { 332 nvErrorLog_va((void *)pGpu, NVLINK_ERROR, 333 "NVLink: failed to train link %d to remote PCI:%04x:%02x:%02x", 334 i, 335 pKernelNvlink->nvlinkLinks[i].remoteEndInfo.domain, 336 pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bus, 337 pKernelNvlink->nvlinkLinks[i].remoteEndInfo.device); 338 339 return NV_ERR_NOT_SUPPORTED; 340 } 341 } 342 } 343 FOR_EACH_INDEX_IN_MASK_END; 344 345 // 346 // After training links, we may have used up most of the available 4s 347 // timeout during GPU state load. As a WAR in lieu of improving the 348 // performance of link training SW, reset the timeout for now. 349 // 350 NV_PRINTF(LEVEL_INFO, "resetting timeout after link training\n"); 351 352 threadStateResetTimeout(pGpu); 353 354 #endif 355 356 return NV_OK; 357 } 358 359 /*! 360 * @brief Ensure links are trained and put into active. 361 * 362 * @param[in] pGpu0 OBJGPU pointer 363 * @param[in] pGpu1 OBJGPU pointer 364 * @param[in] pKernelNvlink0 KernelNvlink pointer 365 * 366 * @return NV_OK on success 367 */ 368 NV_STATUS 369 knvlinkCheckTrainingIsComplete_IMPL 370 ( 371 OBJGPU *pGpu0, 372 OBJGPU *pGpu1, 373 KernelNvlink *pKernelNvlink0 374 ) 375 { 376 NV_STATUS status = NV_OK; 377 378 #if defined(INCLUDE_NVLINK_LIB) 379 380 OBJSYS *pSys = SYS_GET_INSTANCE(); 381 NvU32 version = pKernelNvlink0->ipVerNvlink; 382 KernelNvlink *pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1); 383 NvU32 count = 0; 384 NvU32 i; 385 386 if (pKernelNvlink1 == NULL) 387 { 388 NV_PRINTF(LEVEL_ERROR, 389 "Input mask contains a GPU on which NVLink is disabled.\n"); 390 391 return NV_ERR_INVALID_ARGUMENT; 392 } 393 394 nvlink_link *pLinks[NVLINK_MAX_LINKS_SW] = { 0 }; 395 396 // Link training will be triggered from KMD in L2 exit path 397 if (knvlinkPoweredUpForD3_HAL(pGpu0, pKernelNvlink0)) 398 { 399 NV_PRINTF(LEVEL_INFO, 400 "Skip link training on GPU%d in RTD3/FGC6 exit. Links will train to " 401 "ACTIVE in L2 exit path\n", pGpu0->gpuInstance); 402 return NV_OK; 403 } 404 405 // Minion and SW training is by default disabled on RTL 406 if (IS_RTLSIM(pGpu0) && !pKernelNvlink0->bForceEnableCoreLibRtlsims) 407 { 408 return NV_OK; 409 } 410 411 // Return if link training is force disabled through regkey 412 if (pKernelNvlink0->bSkipLinkTraining) 413 { 414 NV_PRINTF(LEVEL_INFO, 415 "Skipping link training due to regkey on GPU%d\n", 416 pGpu0->gpuInstance); 417 return NV_OK; 418 } 419 420 // Return if forced config, since SW training is not supported 421 if (knvlinkIsForcedConfig(pGpu0, pKernelNvlink0)) 422 { 423 NV_PRINTF(LEVEL_INFO, "Skipping link due to forced configuration\n"); 424 return NV_OK; 425 } 426 427 // If fabric is managed by FM, return 428 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 429 { 430 NV_PRINTF(LEVEL_INFO, 431 "Fabric is externally managed, skip link training\n"); 432 return NV_OK; 433 } 434 435 // 436 // If ALI then ensure it has completed 437 // Else run through training for legacy nvlink versions 438 // 439 if (pKernelNvlink0->bEnableAli || pKernelNvlink1->bEnableAli) 440 { 441 // polling for train complete is only allowed for NvLink 4.0+ 442 NV_ASSERT(version >= NVLINK_VERSION_40); 443 444 // 445 // Check to make sure that the links for the first GPU have 446 // all completed training 447 // 448 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->postRxDetLinkMask) 449 { 450 pLinks[count] = pKernelNvlink0->nvlinkLinks[i].core_link; 451 count++; 452 } 453 FOR_EACH_INDEX_IN_MASK_END; 454 455 // If the return code is non-zero, links are still training 456 if (nvlink_lib_check_training_complete(pLinks, count) != 0) 457 { 458 NV_PRINTF(LEVEL_INFO, "Links aren't fully trained yet!\n"); 459 knvlinkLogAliDebugMessages(pGpu0, pKernelNvlink0); 460 return NV_ERR_GENERIC; 461 } 462 463 // 464 // For all links in the postRxDetLinkMask, get it's peer 465 // links information 466 // 467 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->postRxDetLinkMask) 468 { 469 NV2080_CTRL_NVLINK_UPDATE_REMOTE_LOCAL_SID_PARAMS params; 470 portMemSet(¶ms, 0, sizeof(params)); 471 472 params.linkId = i; 473 474 status = knvlinkExecGspRmRpc(pGpu0, pKernelNvlink0, 475 NV2080_CTRL_CMD_NVLINK_UPDATE_REMOTE_LOCAL_SID, 476 (void *)¶ms, sizeof(params)); 477 if (status != NV_OK) 478 { 479 NV_PRINTF(LEVEL_ERROR, "Error updating Local/Remote Sid Info!\n"); 480 return status; 481 } 482 483 pKernelNvlink0->nvlinkLinks[i].core_link->remoteSid = 484 params.remoteLocalSidInfo.remoteSid; 485 pKernelNvlink0->nvlinkLinks[i].core_link->remoteDeviceType = 486 params.remoteLocalSidInfo.remoteDeviceType; 487 pKernelNvlink0->nvlinkLinks[i].core_link->remoteLinkId = 488 params.remoteLocalSidInfo.remoteLinkId; 489 pKernelNvlink0->nvlinkLinks[i].core_link->localSid = 490 params.remoteLocalSidInfo.localSid; 491 } 492 FOR_EACH_INDEX_IN_MASK_END; 493 494 // Only enter if not in loopBack 495 if (pKernelNvlink0 != pKernelNvlink1) 496 { 497 // 498 // Check to make sure that the links for the second GPU have 499 // all completed training. Reset count for this GPU prior 500 // to querying for the links 501 // 502 count = 0; 503 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink1->postRxDetLinkMask) 504 { 505 pLinks[count] = pKernelNvlink1->nvlinkLinks[i].core_link; 506 count++; 507 } 508 FOR_EACH_INDEX_IN_MASK_END; 509 510 // If the return code is non-zero, links are still training 511 if (nvlink_lib_check_training_complete(pLinks, count) != 0) 512 { 513 NV_PRINTF(LEVEL_INFO, "Links aren't fully trained yet!\n"); 514 knvlinkLogAliDebugMessages(pGpu1, pKernelNvlink1); 515 return NV_ERR_GENERIC; 516 } 517 518 // 519 // For all links in the postRxDetLinkMask, get it's peer 520 // links information 521 // 522 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink1->postRxDetLinkMask) 523 { 524 NV2080_CTRL_NVLINK_UPDATE_REMOTE_LOCAL_SID_PARAMS params; 525 portMemSet(¶ms, 0, sizeof(params)); 526 527 params.linkId = i; 528 529 status = knvlinkExecGspRmRpc(pGpu1, pKernelNvlink1, 530 NV2080_CTRL_CMD_NVLINK_UPDATE_REMOTE_LOCAL_SID, 531 (void *)¶ms, sizeof(params)); 532 if (status != NV_OK) 533 { 534 NV_PRINTF(LEVEL_ERROR, "Error updating Local/Remote Sid Info!\n"); 535 return status; 536 } 537 538 pKernelNvlink1->nvlinkLinks[i].core_link->remoteSid = 539 params.remoteLocalSidInfo.remoteSid; 540 pKernelNvlink1->nvlinkLinks[i].core_link->remoteDeviceType = 541 params.remoteLocalSidInfo.remoteDeviceType; 542 pKernelNvlink1->nvlinkLinks[i].core_link->remoteLinkId = 543 params.remoteLocalSidInfo.remoteLinkId; 544 pKernelNvlink1->nvlinkLinks[i].core_link->localSid = 545 params.remoteLocalSidInfo.localSid; 546 } 547 FOR_EACH_INDEX_IN_MASK_END; 548 } 549 } 550 551 #endif 552 553 return status; 554 } 555 556 /*! 557 * @brief Train all the connected links between the two given devices 558 * to active through the nvlink core library. 559 * 560 * @param[in] pGpu0 OBJGPU pointer 561 * @param[in] pGpu1 OBJGPU pointer 562 * @param[in] pKernelNvlink0 KernelNvlink pointer 563 * 564 * @return NV_OK on success 565 */ 566 NV_STATUS 567 knvlinkTrainP2pLinksToActive_IMPL 568 ( 569 OBJGPU *pGpu0, 570 OBJGPU *pGpu1, 571 KernelNvlink *pKernelNvlink0 572 ) 573 { 574 NV_STATUS status = NV_OK; 575 576 #if defined(INCLUDE_NVLINK_LIB) 577 578 OBJSYS *pSys = SYS_GET_INSTANCE(); 579 KernelNvlink *pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1); 580 NvU32 version = pKernelNvlink0->ipVerNvlink; 581 NvBool bTrainLinks = NV_FALSE; 582 NvU32 count = 0; 583 NvU32 remoteLink; 584 NvU32 i; 585 586 nvlink_link *pLinks[NVLINK_MAX_LINKS_SW] = { 0 }; 587 588 // Link training will be triggered from KMD in L2 exit path 589 if (knvlinkPoweredUpForD3_HAL(pGpu0, pKernelNvlink0)) 590 { 591 NV_PRINTF(LEVEL_INFO, 592 "Skip link training on GPU%d in RTD3/FGC6 exit. Links will train to " 593 "ACTIVE in L2 exit path\n", pGpu0->gpuInstance); 594 595 return NV_OK; 596 } 597 598 // Minion and SW training is by default disabled on RTL 599 if (IS_RTLSIM(pGpu0) && !pKernelNvlink0->bForceEnableCoreLibRtlsims) 600 { 601 return NV_OK; 602 } 603 604 // Return if link training is force disabled through regkey 605 if (pKernelNvlink0->bSkipLinkTraining) 606 { 607 NV_PRINTF(LEVEL_INFO, 608 "Skipping link training due to regkey on GPU%d\n", 609 pGpu0->gpuInstance); 610 611 return NV_OK; 612 } 613 614 // Return if forced config, since SW training is not supported 615 if (knvlinkIsForcedConfig(pGpu0, pKernelNvlink0)) 616 { 617 NV_PRINTF(LEVEL_INFO, "Skipping link due to forced configuration\n"); 618 619 return NV_OK; 620 } 621 622 // If fabric is managed by FM, return 623 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 624 { 625 NV_PRINTF(LEVEL_INFO, 626 "Fabric is externally managed, skip link training\n"); 627 628 return NV_OK; 629 } 630 631 // 632 // Bug# 3601144: On Ampere+ systems, return if links are already initialized, 633 // since that implies links are already trained. 634 // 635 if (IsAMPEREorBetter(pGpu0)) 636 { 637 NvU32 localMask = 0; 638 NvU32 remoteMask = 0; 639 640 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->enabledLinks) 641 { 642 if (KNVLINK_IS_LINK_CONNECTED_TO_GPU(pKernelNvlink0, i, pGpu1)) 643 { 644 remoteLink = pKernelNvlink0->nvlinkLinks[i].remoteEndInfo.linkNumber; 645 646 localMask |= BIT(i); 647 remoteMask |= BIT(remoteLink); 648 } 649 } 650 FOR_EACH_INDEX_IN_MASK_END; 651 652 if (((pKernelNvlink0->initializedLinks & localMask) == localMask) && 653 ((pKernelNvlink1->initializedLinks & remoteMask) == remoteMask)) 654 { 655 NV_PRINTF(LEVEL_INFO, "P2P links are all trained already, return\n"); 656 return NV_OK; 657 } 658 } 659 660 // Get the link train status for the enabled link masks 661 NV2080_CTRL_NVLINK_ARE_LINKS_TRAINED_PARAMS linkTrainedParams; 662 663 portMemSet(&linkTrainedParams, 0, sizeof(linkTrainedParams)); 664 linkTrainedParams.linkMask = pKernelNvlink0->enabledLinks; 665 linkTrainedParams.bActiveOnly = NV_TRUE; 666 667 // Reset timeout to clear any accumulated timeouts from link init 668 if (IS_GSP_CLIENT(pGpu0)) 669 { 670 threadStateResetTimeout(pGpu0); 671 } 672 673 status = knvlinkExecGspRmRpc(pGpu0, pKernelNvlink0, 674 NV2080_CTRL_CMD_NVLINK_ARE_LINKS_TRAINED, 675 (void *)&linkTrainedParams, 676 sizeof(linkTrainedParams)); 677 if (status != NV_OK) 678 { 679 NV_PRINTF(LEVEL_ERROR, "Failed to get the link train status for links\n"); 680 return status; 681 } 682 683 // 684 // Bug# 3424466: Optimization - Return if all enabled links for this GPU are 685 // already trained. The core library makes several callbacks to check link 686 // state which results in numerous RPCs on GSP-RM platforms resulting in low 687 // perf on chips which have low link training latency and low links count. 688 // 689 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->enabledLinks) 690 { 691 if (!KNVLINK_IS_LINK_CONNECTED_TO_GPU(pKernelNvlink0, i, pGpu1)) 692 { 693 continue; 694 } 695 696 if (!linkTrainedParams.bIsLinkActive[i]) 697 { 698 bTrainLinks = NV_TRUE; 699 break; 700 } 701 } 702 FOR_EACH_INDEX_IN_MASK_END; 703 704 if (!bTrainLinks) 705 { 706 NV_PRINTF(LEVEL_INFO, "Enabled links are all trained already, return\n"); 707 return NV_OK; 708 } 709 710 // Train the mask of enabled links to ACTIVE state 711 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->enabledLinks) 712 { 713 if (!KNVLINK_IS_LINK_CONNECTED_TO_GPU(pKernelNvlink0, i, pGpu1)) 714 { 715 continue; 716 } 717 718 if (version >= NVLINK_VERSION_22) 719 { 720 // Capture links for parallel link training 721 pLinks[count] = pKernelNvlink0->nvlinkLinks[i].core_link; 722 count++; 723 } 724 else 725 { 726 // Invoke link training for NVLINK <= 2.0 727 (void)nvlink_lib_train_links_from_swcfg_to_active( 728 &pKernelNvlink0->nvlinkLinks[i].core_link, 1, NVLINK_STATE_CHANGE_SYNC); 729 } 730 } 731 FOR_EACH_INDEX_IN_MASK_END; 732 733 // Invoke link training for NVLINK >= 2.2 734 if (count > 0) 735 { 736 // 737 // nvlink_lib_train_links_from_swcfg_to_active with 738 // NVLINK_STATE_CHANGE_ASYNC flag invokes link training asynchronously, 739 // but the call itself is synchronous i.e. it will poll for link 740 // training to complete. 741 // 742 NV_ASSERT(version >= NVLINK_VERSION_22); 743 (void)nvlink_lib_train_links_from_swcfg_to_active( 744 pLinks, count, NVLINK_STATE_CHANGE_ASYNC); 745 } 746 747 // Get the link train status for the enabled link masks 748 portMemSet(&linkTrainedParams, 0, sizeof(linkTrainedParams)); 749 linkTrainedParams.linkMask = pKernelNvlink0->enabledLinks; 750 linkTrainedParams.bActiveOnly = NV_TRUE; 751 752 // Reset timeout to clear any accumulated timeouts from link init 753 if (IS_GSP_CLIENT(pGpu0)) 754 { 755 threadStateResetTimeout(pGpu0); 756 } 757 758 status = knvlinkExecGspRmRpc(pGpu0, pKernelNvlink0, 759 NV2080_CTRL_CMD_NVLINK_ARE_LINKS_TRAINED, 760 (void *)&linkTrainedParams, 761 sizeof(linkTrainedParams)); 762 if (status != NV_OK) 763 { 764 NV_PRINTF(LEVEL_ERROR, "Failed to get the link train status for links\n"); 765 return status; 766 } 767 768 // Check if the links are trained to "active" state. 769 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink0->enabledLinks) 770 { 771 if (!KNVLINK_IS_LINK_CONNECTED_TO_GPU(pKernelNvlink0, i, pGpu1)) 772 { 773 continue; 774 } 775 776 if (linkTrainedParams.bIsLinkActive[i]) 777 { 778 continue; 779 } 780 781 nvErrorLog_va((void *)pGpu0, NVLINK_ERROR, 782 "NVLink: Failed to train link %d to remote PCI:%04x:%02x:%02x", 783 i, 784 pKernelNvlink0->nvlinkLinks[i].remoteEndInfo.domain, 785 pKernelNvlink0->nvlinkLinks[i].remoteEndInfo.bus, 786 pKernelNvlink0->nvlinkLinks[i].remoteEndInfo.device); 787 788 status = NV_ERR_INVALID_STATE; 789 } 790 FOR_EACH_INDEX_IN_MASK_END; 791 792 #endif 793 794 return status; 795 } 796 797 /*! 798 * knvlinkTrainFabricLinksToActive_IMPL 799 * Setup NVLinks between 2 peers connected to switch. Train the links to 800 * High Speed. 801 * 802 * Note: Desired sequence to setup NvLink P2P is: 803 * 1. A client queries P2P capability among GPUs. 804 * 2. If the GPUs are P2P compatible, create NV50_P2P object which invokes 805 * link training. 806 * However, existing GPU<->GPU link training happens during step 1 through 807 * gpumgrGetP2PCaps - which gets called on RmInitAdapter and may lead to timeout 808 * based upon the time consumed by costly link training operations. 809 * 810 * For now, we are fixing this for nvswitch systems by adding this helper 811 * function which should just get invoked during NV50_P2P object creation. 812 * 813 * This issue needs to be fixed for non-nvswitch systems as well. Bug:200285708. 814 * Once the bug is fixed, knvlinkTrainFabricLinksToActive can be called from 815 * knvlinkTrainP2pLinksToActive. 816 * 817 * @param[in] pGpu OBJGPU pointer 818 * @param[in] pKernelNvlink KernelNvlink pointer 819 * 820 * @return NV_OK on success 821 */ 822 NV_STATUS 823 knvlinkTrainFabricLinksToActive_IMPL 824 ( 825 OBJGPU *pGpu, 826 KernelNvlink *pKernelNvlink 827 ) 828 { 829 #if defined(INCLUDE_NVLINK_LIB) 830 831 OBJSYS *pSys = SYS_GET_INSTANCE(); 832 NvU32 i; 833 834 // Minion and SW training is by default disabled on RTL 835 if (IS_RTLSIM(pGpu) && !pKernelNvlink->bForceEnableCoreLibRtlsims) 836 { 837 return NV_OK; 838 } 839 840 // Return if link training is force disabled through regkey 841 if (pKernelNvlink->bSkipLinkTraining) 842 { 843 NV_PRINTF(LEVEL_INFO, 844 "Skipping link training due to regkey on GPU%d\n", 845 pGpu->gpuInstance); 846 847 return NV_OK; 848 } 849 850 // If fabric is managed by FM, return 851 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 852 { 853 NV_PRINTF(LEVEL_INFO, 854 "Fabric is externally managed, skip link training\n"); 855 856 return NV_OK; 857 } 858 859 if (knvlinkIsForcedConfig(pGpu, pKernelNvlink)) 860 { 861 NV_PRINTF(LEVEL_INFO, 862 "Nvlink in Forced Config - skip link training.\n"); 863 864 return NV_OK; 865 } 866 867 FOR_EACH_INDEX_IN_MASK(32, i, pKernelNvlink->enabledLinks) 868 { 869 if ( pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bConnected && 870 (pKernelNvlink->nvlinkLinks[i].remoteEndInfo.deviceType == 871 NVLINK_DEVICE_TYPE_NVSWITCH)) 872 { 873 if (nvlink_lib_train_links_from_swcfg_to_active( 874 &pKernelNvlink->nvlinkLinks[i].core_link, 1, NVLINK_STATE_CHANGE_SYNC) 875 != NVL_SUCCESS) 876 { 877 nvErrorLog_va((void *)pGpu, NVLINK_ERROR, 878 "NVLink: failed to train link %d to remote PCI:%04x:%02x:%02x", 879 i, 880 pKernelNvlink->nvlinkLinks[i].remoteEndInfo.domain, 881 pKernelNvlink->nvlinkLinks[i].remoteEndInfo.bus, 882 pKernelNvlink->nvlinkLinks[i].remoteEndInfo.device); 883 884 return NV_ERR_INVALID_STATE; 885 } 886 } 887 } 888 FOR_EACH_INDEX_IN_MASK_END; 889 890 #endif 891 892 return NV_OK; 893 } 894 895 /*! 896 * @brief Transition/Wakeup the links into/from sleep (L2) state 897 * 898 * @param[in] pGpu OBJGPU pointer 899 * @param[in] pKernelNvlink KernelNvlink pointer 900 * @param[in] linkMask Mask of links 901 * @param[in] bEntry Enter/Exit sleep (L2) 902 * 903 * @return NV_OK on success 904 */ 905 NV_STATUS 906 knvlinkEnterExitSleep_IMPL 907 ( 908 OBJGPU *pGpu, 909 KernelNvlink *pKernelNvlink, 910 NvU32 linkMask, 911 NvBool bEntry 912 ) 913 { 914 #if defined(INCLUDE_NVLINK_LIB) 915 916 OBJSYS *pSys = SYS_GET_INSTANCE(); 917 NvU32 linkId; 918 919 // NVLink L2 as a feature should be enabled 920 if (!pKernelNvlink->getProperty(pKernelNvlink, 921 PDB_PROP_KNVLINK_L2_POWER_STATE_ENABLED)) 922 { 923 NV_PRINTF(LEVEL_ERROR, "NVLink L2 is not supported. Returning\n"); 924 925 return NV_ERR_NOT_SUPPORTED; 926 } 927 928 // Return error if NVLink fabric is managed by FM 929 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 930 { 931 NV_PRINTF(LEVEL_ERROR, 932 "Skipping L2 entry/exit since fabric is externally managed\n"); 933 934 return NV_ERR_NOT_SUPPORTED; 935 } 936 937 // Check if all the links in the mask are connected 938 FOR_EACH_INDEX_IN_MASK(32, linkId, linkMask) 939 { 940 if (!pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected) 941 { 942 NV_PRINTF(LEVEL_ERROR, 943 "GPU%d: Link%d is not connected. Returning\n", 944 pGpu->gpuInstance, linkId); 945 946 return NV_ERR_NOT_SUPPORTED; 947 } 948 } 949 FOR_EACH_INDEX_IN_MASK_END; 950 951 // Links that share a PLL must enter/exit L2 together 952 FOR_EACH_INDEX_IN_MASK(32, linkId, linkMask) 953 { 954 // If the link is a PLL master, consider the slave link 955 if (pKernelNvlink->nvlinkLinks[linkId].pllMasterLinkId == linkId) 956 { 957 // If the slave link exists and is not init-disabled, it should be included 958 if ( (pKernelNvlink->nvlinkLinks[linkId].pllSlaveLinkId != NVLINK_MAX_LINKS_SW) && 959 (NVBIT(pKernelNvlink->nvlinkLinks[linkId].pllSlaveLinkId) & pKernelNvlink->enabledLinks) && 960 !(NVBIT(pKernelNvlink->nvlinkLinks[linkId].pllSlaveLinkId) & linkMask) ) 961 { 962 NV_PRINTF(LEVEL_ERROR, 963 "GPU%d: Links sharing PLL should enter/exit L2 together. Returning\n", 964 pGpu->gpuInstance); 965 966 return NV_ERR_NOT_SUPPORTED; 967 } 968 } 969 else 970 { 971 // For a slave link, its PLL master should be included if not init-disabled 972 if ( (NVBIT(pKernelNvlink->nvlinkLinks[linkId].pllMasterLinkId) & pKernelNvlink->enabledLinks) && 973 !(NVBIT(pKernelNvlink->nvlinkLinks[linkId].pllMasterLinkId) & linkMask) ) 974 { 975 NV_PRINTF(LEVEL_ERROR, 976 "GPU%d: Links sharing PLL should enter/exit L2 together. Returning\n", 977 pGpu->gpuInstance); 978 979 return NV_ERR_NOT_SUPPORTED; 980 } 981 } 982 } 983 FOR_EACH_INDEX_IN_MASK_END; 984 985 // Device must be registered in the nvlink core library 986 if (!pKernelNvlink->pNvlinkDev) 987 { 988 NV_PRINTF(LEVEL_ERROR, 989 "GPU%d: not registered in core lib. Returning\n", 990 pGpu->gpuInstance); 991 992 return NV_ERR_NOT_SUPPORTED; 993 } 994 995 if (bEntry) 996 { 997 // Remove the peer mapping in HSHUB and transition links to sleep (L2) 998 return _knvlinkEnterSleep(pGpu, pKernelNvlink, linkMask); 999 } 1000 else 1001 { 1002 // Wakeup the links from sleep (L2) and setup the peer mapping in HSHUB 1003 return _knvlinkExitSleep(pGpu, pKernelNvlink, linkMask); 1004 } 1005 #endif 1006 1007 return NV_OK; 1008 } 1009 1010 /*! 1011 * @brief Shutdown all the connected links associated with the device 1012 * through the nvlink core library. 1013 * 1014 * @param[in] pGpu OBJGPU pointer 1015 * @param[in] pKernelNvlink KernelNvlink pointer 1016 * 1017 * @return NV_OK on success 1018 */ 1019 NV_STATUS 1020 knvlinkCoreShutdownDeviceLinks_IMPL 1021 ( 1022 OBJGPU *pGpu, 1023 KernelNvlink *pKernelNvlink, 1024 NvBool bForceShutdown 1025 ) 1026 { 1027 #if defined(INCLUDE_NVLINK_LIB) 1028 1029 nvlink_link *pLinks[NVLINK_MAX_LINKS_SW] = {0}; 1030 OBJSYS *pSys = SYS_GET_INSTANCE(); 1031 NvU32 count = 0; 1032 NvU32 linkId; 1033 1034 // Skip link shutdown where fabric manager is present, for nvlink version bellow 4.0 1035 if ((pKernelNvlink->ipVerNvlink < NVLINK_VERSION_40 && 1036 pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) || 1037 (pKernelNvlink->pNvlinkDev == NULL)) 1038 { 1039 NV_PRINTF(LEVEL_INFO, 1040 "core lib device is either externally managed or not present, skipping\n"); 1041 1042 return NV_OK; 1043 } 1044 1045 // return early if there are no enabled links 1046 if (pKernelNvlink->enabledLinks == 0) 1047 { 1048 NV_PRINTF(LEVEL_INFO, "No links to shutdown for the GPU%d\n", 1049 pGpu->gpuInstance); 1050 1051 return NV_OK; 1052 } 1053 1054 if (!bForceShutdown && pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_MINION_GFW_BOOT)) 1055 { 1056 NV_PRINTF(LEVEL_INFO, 1057 "GFW boot is enabled. Link shutdown is not required, skipping\n"); 1058 1059 return NV_OK; 1060 } 1061 1062 FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks) 1063 { 1064 // Capture the links for lane shutdown through core lib if supported 1065 if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED)) 1066 { 1067 // Skip GPU in reset 1068 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType == 1069 NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_GPU) 1070 { 1071 OBJGPU* pRemoteGpu = gpumgrGetGpuFromBusInfo( 1072 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain, 1073 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus, 1074 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device); 1075 if (API_GPU_IN_RESET_SANITY_CHECK(pRemoteGpu)) 1076 { 1077 continue; 1078 } 1079 } 1080 pLinks[count] = pKernelNvlink->nvlinkLinks[linkId].core_link; 1081 count++; 1082 } 1083 else 1084 { 1085 nvlink_lib_powerdown_links_from_active_to_swcfg( 1086 &pKernelNvlink->nvlinkLinks[linkId].core_link, 1087 1, NVLINK_STATE_CHANGE_SYNC); 1088 } 1089 } 1090 FOR_EACH_INDEX_IN_MASK_END; 1091 1092 // Trigger laneshutdown through core lib if shutdown is supported 1093 if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED) && (count > 0)) 1094 { 1095 if (nvlink_lib_powerdown_links_from_active_to_off( 1096 pLinks, count, NVLINK_STATE_CHANGE_SYNC)) 1097 { 1098 NV_PRINTF(LEVEL_ERROR, "Unable to turn off links for the GPU%d\n", 1099 pGpu->gpuInstance); 1100 1101 return NV_ERR_INVALID_STATE; 1102 } 1103 } 1104 1105 #endif 1106 1107 return NV_OK; 1108 } 1109 1110 /*! 1111 * @brief Reset all the connected links associated with the device 1112 * through the nvlink core library. 1113 * 1114 * @param[in] pGpu OBJGPU pointer 1115 * @param[in] pKernelNvlink KernelNvlink pointer 1116 * 1117 * @return NV_OK on success 1118 */ 1119 NV_STATUS 1120 knvlinkCoreResetDeviceLinks_IMPL 1121 ( 1122 OBJGPU *pGpu, 1123 KernelNvlink *pKernelNvlink 1124 ) 1125 { 1126 #if defined(INCLUDE_NVLINK_LIB) 1127 1128 nvlink_link *pLinks[NVLINK_MAX_LINKS_SW] = {0}; 1129 OBJSYS *pSys = SYS_GET_INSTANCE(); 1130 NvU32 count = 0; 1131 NvU32 linkId; 1132 1133 // Skip link reset where fabric manager is present, for nvlink version bellow 4.0 1134 if ((pKernelNvlink->ipVerNvlink < NVLINK_VERSION_40 && 1135 pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) || 1136 (pKernelNvlink->pNvlinkDev == NULL)) 1137 { 1138 NV_PRINTF(LEVEL_INFO, 1139 "core lib device is either externally managed or not present, skipping\n"); 1140 1141 return NV_OK; 1142 } 1143 1144 // return early if there are no enabled links 1145 if (pKernelNvlink->enabledLinks == 0) 1146 { 1147 NV_PRINTF(LEVEL_INFO, "No links to reset for the GPU%d\n", 1148 pGpu->gpuInstance); 1149 1150 return NV_OK; 1151 } 1152 1153 // We only perform the link reset if lane shutdown is enabled 1154 if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED)) 1155 { 1156 FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks) 1157 { 1158 // Skip GPU in reset 1159 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType == 1160 NV2080_CTRL_NVLINK_DEVICE_INFO_DEVICE_TYPE_GPU) 1161 { 1162 OBJGPU* pRemoteGpu = gpumgrGetGpuFromBusInfo( 1163 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain, 1164 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus, 1165 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device); 1166 if (API_GPU_IN_RESET_SANITY_CHECK(pRemoteGpu)) 1167 { 1168 continue; 1169 } 1170 } 1171 pLinks[count] = pKernelNvlink->nvlinkLinks[linkId].core_link; 1172 count++; 1173 } 1174 FOR_EACH_INDEX_IN_MASK_END; 1175 1176 if (nvlink_lib_reset_links(pLinks, count, NVLINK_STATE_CHANGE_SYNC) && (count > 0)) 1177 { 1178 NV_PRINTF(LEVEL_ERROR, "Unable to reset link(s) for GPU%d\n", 1179 pGpu->gpuInstance); 1180 1181 return NV_ERR_INVALID_STATE; 1182 } 1183 } 1184 else 1185 { 1186 NV_PRINTF(LEVEL_INFO, 1187 "Lane shutdown not enabled, skipping link(s) reset for GPU%d\n", 1188 pGpu->gpuInstance); 1189 1190 return NV_ERR_INVALID_STATE; 1191 } 1192 1193 #endif 1194 1195 return NV_OK; 1196 } 1197 1198 /*! 1199 * @brief Retrain a link from either safe mode or off. 1200 * 1201 * @param[in] pGpu OBJGPU pointer 1202 * @param[in] pKernelNvlink KernelNvlink pointer 1203 * @param[in] linkId Link ID of the link in question 1204 * @param[in] bFromOff Whether link should be retrained from SAFE/OFF 1205 * 1206 * @returns NV_OK if link retraining was successful 1207 */ 1208 NV_STATUS 1209 knvlinkRetrainLink_IMPL 1210 ( 1211 OBJGPU *pGpu, 1212 KernelNvlink *pKernelNvlink, 1213 NvU32 linkId, 1214 NvBool bFromOff 1215 ) 1216 { 1217 NV_STATUS status = NV_OK; 1218 1219 // If NVLINK_LIB isn't enabled, we just execute prologue and return. 1220 _knvlinkRetrainLinkPrologue(pGpu, pKernelNvlink, linkId); 1221 1222 OBJSYS *pSys = SYS_GET_INSTANCE(); 1223 1224 // If fabric is managed by FM 1225 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 1226 { 1227 #if defined(INCLUDE_NVLINK_LIB) 1228 1229 // 1230 // Notify FM for link re-training. 1231 // 1232 // Note, at this point all DL interrupts should be disabled. The interrupts 1233 // will be enabled through nvlinkCoreReenableLinkInterruptsCallback only if 1234 // links can be successfully re-trained. 1235 // 1236 // It is responsibility of FM to highlight link re-training failures to the 1237 // system admin. Hence, we shouldn't be logging Xid in this case. 1238 // 1239 // It is worth to note that, there is no race in between interrupt 1240 // enable/disable register update as we notify FM only after disabling 1241 // interrupts. 1242 // 1243 gpuNotifySubDeviceEvent(pGpu, 1244 NV2080_NOTIFIERS_NVLINK_ERROR_RECOVERY_REQUIRED, 1245 NULL, 0, 0, (NvV16)NV2080_CTRL_NVLINK_UNIT_DL); 1246 1247 return NV_OK; 1248 #endif 1249 } 1250 1251 #if defined(INCLUDE_NVLINK_LIB) 1252 // 1253 // If this is a slave endpoint requesting the retrain, kick off a request 1254 // to the master instead. There is no need to (and indeed, we should not) 1255 // hold the master endpoint lock here. 1256 // 1257 if (!pKernelNvlink->nvlinkLinks[linkId].core_link->master) 1258 { 1259 nvlink_link_change *link_change; 1260 nvlink_link *slave, *master; 1261 1262 slave = pKernelNvlink->nvlinkLinks[linkId].core_link; 1263 if (nvlink_lib_get_link_master(slave, &master) != NVL_SUCCESS) 1264 { 1265 NV_PRINTF(LEVEL_ERROR, 1266 "link master could not be found from GPU%u link %u\n", 1267 gpuGetInstance(pGpu), linkId); 1268 1269 return NV_ERR_INVALID_STATE; 1270 } 1271 1272 NV_ASSERT_OR_RETURN(master != slave, NV_ERR_INVALID_STATE); 1273 1274 link_change = &slave->link_change; 1275 link_change->slave = slave; 1276 link_change->master = master; 1277 link_change->change_type = bFromOff ? nvlink_retrain_from_off : 1278 nvlink_retrain_from_safe; 1279 1280 if (master->link_handlers->queue_link_change(link_change) != NVL_SUCCESS) 1281 { 1282 return NV_ERR_GENERIC; 1283 } 1284 1285 // 1286 // Because the link retrain request to the master is asynchronous, 1287 // tell the caller they'll need to wait. 1288 // 1289 return NV_WARN_MORE_PROCESSING_REQUIRED; 1290 } 1291 #endif 1292 1293 if (bFromOff) 1294 { 1295 status = knvlinkRetrainLinkFromOff(pGpu, pKernelNvlink, linkId); 1296 } 1297 else 1298 { 1299 status = knvlinkRetrainLinkFromSafe(pGpu, pKernelNvlink, linkId); 1300 } 1301 1302 return status; 1303 } 1304 1305 /*! 1306 * @brief Floorsweep the nvlink config for the chip 1307 * 1308 * @param[in] pGpu OBJGPU pointer 1309 * @param[in] pKernelNvlink KernelNvlink pointer 1310 * @param[in] numLinksPerIp number of total links found in discovery 1311 * @param[out] pNumLinkActive number of links needed to be active 1312 * 1313 * @returns On success, sets unique fabric address and returns NV_OK. 1314 * On failure, returns NV_ERR_XXX. 1315 */ 1316 NV_STATUS 1317 knvlinkFloorSweep_IMPL 1318 ( 1319 OBJGPU *pGpu, 1320 KernelNvlink *pKernelNvlink, 1321 NvU32 numLinksPerIoctrl, 1322 NvU32 *pNumActiveLinksPerIoctrl 1323 ) 1324 { 1325 1326 #if defined(INCLUDE_NVLINK_LIB) 1327 NV_STATUS status = NV_OK; 1328 NvU32 linkId; 1329 NvU32 tmpDisabledLinkMask = 0; 1330 NvU32 tmpEnabledLinkMask = 0; 1331 nvlink_conn_info conn_info; 1332 1333 *pNumActiveLinksPerIoctrl = knvlinkGetNumActiveLinksPerIoctrl(pGpu, pKernelNvlink); 1334 if (!knvlinkIsFloorSweepingNeeded_HAL(pGpu, pKernelNvlink, *pNumActiveLinksPerIoctrl, numLinksPerIoctrl)) 1335 { 1336 return NV_OK; 1337 } 1338 1339 // The path here is important not getting the connection info 1340 FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks) 1341 { 1342 nvlink_lib_discover_and_get_remote_conn_info( 1343 pKernelNvlink->nvlinkLinks[linkId].core_link, &conn_info, 0); 1344 } 1345 FOR_EACH_INDEX_IN_MASK_END; 1346 1347 // 1348 // This call must be before the floorswept to cache the NVLink bridge 1349 // information in physical RM. 1350 // 1351 knvlinkDirectConnectCheck_HAL(pGpu, pKernelNvlink); 1352 1353 // floorsweeping in corelib will update connection info that RM qill query below 1354 (void)nvlink_lib_powerdown_floorswept_links_to_off(pKernelNvlink->pNvlinkDev); 1355 1356 // 1357 // If a link in the enabledLinkMask is not trained after floorsweeping then 1358 // then add it to a tmp disabled linkMask 1359 // 1360 1361 // Get the link train status for the enabled link masks 1362 NV2080_CTRL_NVLINK_ARE_LINKS_TRAINED_PARAMS linkTrainedParams; 1363 1364 portMemSet(&linkTrainedParams, 0, sizeof(linkTrainedParams)); 1365 linkTrainedParams.linkMask = pKernelNvlink->enabledLinks; 1366 linkTrainedParams.bActiveOnly = NV_TRUE; 1367 1368 // Reset timeout to clear any accumulated timeouts from link init 1369 if (IS_GSP_CLIENT(pGpu)) 1370 { 1371 threadStateResetTimeout(pGpu); 1372 } 1373 1374 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1375 NV2080_CTRL_CMD_NVLINK_ARE_LINKS_TRAINED, 1376 (void *)&linkTrainedParams, 1377 sizeof(linkTrainedParams)); 1378 if (status != NV_OK) 1379 { 1380 NV_PRINTF(LEVEL_ERROR, "Failed to get the link train status for links\n"); 1381 return status; 1382 } 1383 1384 // 1385 // Create a temporary mask of all links that are now enabled: 1386 // classified as a link in active 1387 // 1388 FOR_EACH_INDEX_IN_MASK(32, linkId, pKernelNvlink->enabledLinks) 1389 { 1390 if (linkTrainedParams.bIsLinkActive[linkId]) 1391 { 1392 tmpEnabledLinkMask |= BIT(linkId); 1393 } 1394 else 1395 { 1396 tmpDisabledLinkMask |= BIT(linkId); 1397 } 1398 } 1399 FOR_EACH_INDEX_IN_MASK_END; 1400 1401 // Redo linkMasks based on the search above being the ground truth 1402 pKernelNvlink->enabledLinks = tmpEnabledLinkMask; 1403 1404 // 1405 // remove any links not in active in the tmpEnabledLinkMask from all 1406 // other link masks as these have been floorswept by the corelib 1407 // 1408 pKernelNvlink->disconnectedLinkMask = tmpEnabledLinkMask; 1409 pKernelNvlink->initDisabledLinksMask = tmpDisabledLinkMask; 1410 1411 1412 status = knvlinkProcessInitDisabledLinks(pGpu, pKernelNvlink); 1413 if (status != NV_OK) 1414 { 1415 NV_ASSERT(status == NV_OK); 1416 return status; 1417 } 1418 1419 // Re-sync the link masks with GSP 1420 status = knvlinkSyncLinkMasksAndVbiosInfo(pGpu, pKernelNvlink); 1421 if (status != NV_OK) 1422 { 1423 NV_ASSERT(status == NV_OK); 1424 return status; 1425 } 1426 1427 // 1428 // Assert that the number of links in active is always less then 1429 // or equal to the number of active links on the chips 1430 // 1431 if(!(nvPopCount32(tmpEnabledLinkMask) <= *pNumActiveLinksPerIoctrl * nvPopCount32(pKernelNvlink->ioctrlMask))) 1432 { 1433 NV_PRINTF(LEVEL_INFO, 1434 "Floorsweeping didn't work! enabledMaskCount: 0x%x and numActiveLinksTotal: 0x%x. Current link info cached in SW: discoveredLinks: 0x%x; enabledLinks:0x%x; disconnectedLinks:0x%x; initDisabledLinksMask:0x%x\n", 1435 nvPopCount32(tmpEnabledLinkMask), *pNumActiveLinksPerIoctrl * nvPopCount32(pKernelNvlink->ioctrlMask), pKernelNvlink->discoveredLinks, pKernelNvlink->enabledLinks, pKernelNvlink->disconnectedLinkMask, pKernelNvlink->initDisabledLinksMask); 1436 1437 return NV_ERR_NOT_READY; 1438 } 1439 1440 pKernelNvlink->bFloorSwept = NV_TRUE; 1441 #endif //INCLUDE_NVLINK_LIB 1442 return NV_OK; 1443 } 1444 1445 /*! 1446 * @brief Retrain the link from OFF state 1447 * 1448 * @param[in] pGpu OBJGPU pointer 1449 * @param[in] pKernelNvlink KernelNvlink pointer 1450 * @param[in] linkId Link ID of the link in question 1451 * 1452 * @returns NV_OK if link retraining was successful 1453 */ 1454 NV_STATUS 1455 knvlinkRetrainLinkFromOff 1456 ( 1457 OBJGPU *pGpu, 1458 KernelNvlink *pKernelNvlink, 1459 NvU32 linkId 1460 ) 1461 { 1462 1463 return NV_OK; 1464 } 1465 1466 /*! 1467 * @brief Retrain the link from SAFE state 1468 * 1469 * @param[in] pGpu OBJGPU pointer 1470 * @param[in] pKernelNvlink KernelNvlink pointer 1471 * @param[in] linkId Link ID of the link in question 1472 * 1473 * @returns NV_OK if link retraining was successful 1474 */ 1475 NV_STATUS 1476 knvlinkRetrainLinkFromSafe 1477 ( 1478 OBJGPU *pGpu, 1479 KernelNvlink *pKernelNvlink, 1480 NvU32 linkId 1481 ) 1482 { 1483 1484 return NV_OK; 1485 } 1486 1487 /*! 1488 * @brief _knvlinkRetrainLinkPrologue currently disables DL interrupts 1489 * 1490 * @param[in] pGpu OBJGPU pointer 1491 * @param[in] pKernelNvlink KernelNvlink pointer 1492 * @param[in] linkId Link ID of the link in question 1493 */ 1494 static void 1495 _knvlinkRetrainLinkPrologue 1496 ( 1497 OBJGPU *pGpu, 1498 KernelNvlink *pKernelNvlink, 1499 NvU32 linkId 1500 ) 1501 { 1502 1503 return; 1504 } 1505 1506 #if defined(INCLUDE_NVLINK_LIB) 1507 1508 /*! 1509 * @brief Activate the connections discovered in topology discovery 1510 * 1511 * @param[in] pGpu OBJGPU pointer 1512 * @param[in] pKernelNvlink KernelNvlink pointer 1513 * @param[in] bCheckDegradedMode Whether to check for degraded mode 1514 * 1515 * @return NV_OK on success 1516 */ 1517 static NV_STATUS 1518 _knvlinkActivateDiscoveredConns 1519 ( 1520 OBJGPU *pGpu, 1521 KernelNvlink *pKernelNvlink, 1522 NvBool bCheckDegradedMode 1523 ) 1524 { 1525 NvU32 initDisconnectedLinkMask = pKernelNvlink->disconnectedLinkMask; 1526 NvU32 switchLinkMasks = 0; 1527 NvBool bPeerUpdated = NV_FALSE; 1528 NV_STATUS status = NV_OK; 1529 NvU32 linkId; 1530 1531 // 1532 // Degraded Mode on LR10+ systems. Check for degraded mode if this was not done before 1533 // and if new connections were discovered from the core library. 1534 // 1535 if (bCheckDegradedMode) 1536 { 1537 status = knvlinkApplyNvswitchDegradedModeSettings_HAL(pGpu, pKernelNvlink, 1538 &switchLinkMasks); 1539 } 1540 1541 // We only need to look at links that are considered disconnected 1542 FOR_EACH_INDEX_IN_MASK(32, linkId, initDisconnectedLinkMask) 1543 { 1544 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bConnected) 1545 { 1546 // This link is now marked connected 1547 pKernelNvlink->disconnectedLinkMask &= ~NVBIT(linkId); 1548 1549 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType 1550 == NVLINK_DEVICE_TYPE_GPU) 1551 { 1552 bPeerUpdated = NV_TRUE; 1553 1554 // 1555 // Activate the p2p link. This includes copying the remote device 1556 // information for the remote link and enabling the post topology 1557 // steps on both the ends of the link. 1558 // 1559 // NOTE: HSHUB will nott be setup for the discovered peer link here 1560 // and will only be configured when a P2P object is created 1561 // 1562 status = _knvlinkActivateDiscoveredP2pConn(pGpu, pKernelNvlink, linkId); 1563 } 1564 else if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType 1565 == NVLINK_DEVICE_TYPE_NVSWITCH) 1566 { 1567 status = _knvlinkActivateDiscoveredSwitchConn(pGpu, pKernelNvlink, linkId); 1568 1569 // 1570 // There is no need to mark link as a master. On NVSwitch systems, 1571 // External Fabric Management should be enabled by default. 1572 // 1573 switchLinkMasks |= NVBIT(linkId); 1574 } 1575 else 1576 { 1577 // 1578 // Activate the sysmem link. This includes even training the link to 1579 // ACTIVE, since for sysmem link post-topology steps should be setup 1580 // only after ACTIVE 1581 // 1582 status = _knvlinkActivateDiscoveredSysmemConn(pGpu, pKernelNvlink, linkId); 1583 } 1584 1585 // If any of the above failed, return failure 1586 if (status != NV_OK) 1587 { 1588 NV_PRINTF(LEVEL_ERROR, 1589 "Failed to activate link%d on GPU%d!!!\n", linkId, 1590 pGpu->gpuInstance); 1591 1592 return status; 1593 } 1594 } 1595 } 1596 FOR_EACH_INDEX_IN_MASK_END; 1597 1598 #if defined(NVCPU_PPC64LE) || defined(NVCPU_AARCH64) 1599 if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_SYSMEM_SUPPORT_ENABLED)) 1600 { 1601 // Credits should be released after Active for sysmem 1602 knvlinkEnableLinksPostTopology_HAL(pGpu, pKernelNvlink, pKernelNvlink->enabledLinks); 1603 if (status != NV_OK) 1604 { 1605 return status; 1606 } 1607 1608 // Enable SYSMEM links in HSHUB. On P9 this must happen after Active 1609 knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink); 1610 } 1611 #endif 1612 1613 // If any new connection was discovered in this call 1614 if (initDisconnectedLinkMask != pKernelNvlink->disconnectedLinkMask) 1615 { 1616 if (pKernelNvlink->disconnectedLinkMask == pKernelNvlink->enabledLinks) //GPU degraded case 1617 { 1618 bPeerUpdated |= _knvlinkUpdateSwitchLinkMasksGpuDegraded(pGpu, pKernelNvlink); 1619 } 1620 else // other cases 1621 { 1622 bPeerUpdated |= _knvlinkUpdateSwitchLinkMasks(pGpu, pKernelNvlink, 1623 switchLinkMasks); 1624 } 1625 1626 _knvlinkPrintTopologySummary(pGpu, pKernelNvlink); 1627 1628 // 1629 // Make sure we update the CE mappings for this GPU, if the known set 1630 // of peers has changed. 1631 // 1632 knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink); 1633 if (bPeerUpdated) 1634 { 1635 // 1636 // Request that any peers updated also update their CE mappings, 1637 // since they now have a new peer. 1638 // 1639 _knvlinkUpdatePeerConfigs(pGpu, pKernelNvlink); 1640 } 1641 } 1642 1643 return status; 1644 } 1645 1646 /*! 1647 * @brief Activate the given P2P connection 1648 * This function updates the RM state for the discovered P2P connection 1649 * and enables post-topology steps on both ends of the connection. But, 1650 * it does not configure HSHUB on any end of the connection. HSHUB will 1651 * be configured only when a P2P object is created 1652 * 1653 * @param[in] pGpu OBJGPU pointer 1654 * @param[in] pKernelNvlink KernelNvlink pointer 1655 * @param[in] linkId Link ID 1656 * 1657 * @return NV_OK on success 1658 */ 1659 static NV_STATUS 1660 _knvlinkActivateDiscoveredP2pConn 1661 ( 1662 OBJGPU *pGpu, 1663 KernelNvlink *pKernelNvlink, 1664 NvU32 linkId 1665 ) 1666 { 1667 OBJGPU *pGpu0 = pGpu; 1668 OBJGPU *pGpu1 = NULL; 1669 KernelNvlink *pKernelNvlink0 = GPU_GET_KERNEL_NVLINK(pGpu0); 1670 NV_STATUS status = NV_OK; 1671 NvBool bUpdateConnStatus = NV_FALSE; 1672 NvU32 remoteLinkId; 1673 NvU32 gpuInst; 1674 1675 // Get the remote OBJGPU and Nvlink 1676 for (gpuInst = 0; gpuInst < NV_MAX_DEVICES; gpuInst++) 1677 { 1678 pGpu1 = gpumgrGetGpu(gpuInst); 1679 1680 if (pGpu1 && 1681 // Just rely on PCIe DBDF values for detecting the remote 1682 (pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.domain == gpuGetDomain(pGpu1)) && 1683 (pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.bus == gpuGetBus(pGpu1)) && 1684 (pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.device == gpuGetDevice(pGpu1)) && 1685 (pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.function == 0)) 1686 { 1687 KernelNvlink *pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1); 1688 1689 // Map the remote GPU's instance number to the associated links on this GPU. 1690 status = knvlinkSetLinkMaskToPeer(pGpu0, pKernelNvlink0, pGpu1, 1691 (pKernelNvlink0->peerLinkMasks[gpuInst] | NVBIT(linkId))); 1692 if (status != NV_OK) 1693 return status; 1694 1695 // 1696 // Post Topology enable on the local end of the link. 1697 // Needs to happen before HSHUB is setup for this link on any end. 1698 // 1699 status = knvlinkEnableLinksPostTopology_HAL(pGpu0, pKernelNvlink0, NVBIT(linkId)); 1700 if (status != NV_OK) 1701 { 1702 return status; 1703 } 1704 1705 // Set the remote device information for the remote device 1706 if (pKernelNvlink1) 1707 { 1708 remoteLinkId = pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.linkNumber; 1709 1710 // RPC into GSP-RM to update the link remote connection status only if its required 1711 if (pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.bConnected == NV_FALSE) 1712 bUpdateConnStatus = NV_TRUE; 1713 1714 // Set the PCI information for remote end 1715 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.bConnected = NV_TRUE; 1716 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.domain = pKernelNvlink0->pNvlinkDev->pciInfo.domain; 1717 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.bus = pKernelNvlink0->pNvlinkDev->pciInfo.bus; 1718 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.device = pKernelNvlink0->pNvlinkDev->pciInfo.device; 1719 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.function = pKernelNvlink0->pNvlinkDev->pciInfo.function; 1720 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.pciDeviceId = pKernelNvlink0->pNvlinkDev->pciInfo.pciDeviceId; 1721 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.deviceType = pKernelNvlink0->pNvlinkDev->type; 1722 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.chipSid = pKernelNvlink0->nvlinkLinks[linkId].core_link->localSid; 1723 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.linkNumber = linkId; 1724 1725 // Update the DLPL revision in the connection information 1726 pKernelNvlink0->nvlinkLinks[linkId].remoteEndInfo.ipVerDlPl = pKernelNvlink1->nvlinkLinks[remoteLinkId].ipVerDlPl; 1727 pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.ipVerDlPl = pKernelNvlink0->nvlinkLinks[linkId].ipVerDlPl; 1728 1729 if (bUpdateConnStatus) 1730 { 1731 // RPC into GSP-RM to update the link remote connection status for pGpu1 for the given link 1732 status = knvlinkUpdateLinkConnectionStatus(pGpu1, pKernelNvlink1, remoteLinkId); 1733 if (status != NV_OK) 1734 { 1735 return status; 1736 } 1737 } 1738 1739 pKernelNvlink1->disconnectedLinkMask &= ~NVBIT(remoteLinkId); 1740 1741 // Map this GPU's instance number to the associated link on the remote end. 1742 status = knvlinkSetLinkMaskToPeer(pGpu1, pKernelNvlink1, pGpu0, 1743 (pKernelNvlink1->peerLinkMasks[gpuGetInstance(pGpu0)] | NVBIT(remoteLinkId))); 1744 if (status != NV_OK) 1745 return status; 1746 1747 // 1748 // Post Topology enable on the remote end of the link. 1749 // Needs to happen before HSHUB is setup for this link on any end. 1750 // 1751 status = knvlinkEnableLinksPostTopology_HAL(pGpu1, pKernelNvlink1, NVBIT(remoteLinkId)); 1752 if (status != NV_OK) 1753 { 1754 return status; 1755 } 1756 1757 // Set the deviceUUID 1758 portMemCopy(pKernelNvlink1->nvlinkLinks[remoteLinkId].remoteEndInfo.devUuid, 1759 NV_UUID_LEN, 1760 pGpu0->gpuUuid.uuid, 1761 NV_UUID_LEN); 1762 1763 // 1764 // The master of a GPU <-> GPU link depends on instance number. This is so that when locking 1765 // (which requires the master to be locked before the slave), the lower GPU instance number 1766 // will always be locked first, which is how rmGpuLocksAcquire acquires them. For loopback, 1767 // fall back to link ID instead. 1768 // 1769 if ((gpuGetInstance(pGpu0) < gpuGetInstance(pGpu1)) || 1770 ((gpuGetInstance(pGpu0) == gpuGetInstance(pGpu1)) && 1771 (linkId < remoteLinkId))) 1772 { 1773 NV_ASSERT(NVL_SUCCESS == nvlink_lib_set_link_master( 1774 pKernelNvlink0->nvlinkLinks[linkId].core_link)); 1775 } 1776 else if ((gpuGetInstance(pGpu1) < gpuGetInstance(pGpu0)) || 1777 ((gpuGetInstance(pGpu1) == gpuGetInstance(pGpu0)) && 1778 (remoteLinkId < linkId))) 1779 { 1780 NV_ASSERT(NVL_SUCCESS == nvlink_lib_set_link_master( 1781 pKernelNvlink1->nvlinkLinks[remoteLinkId].core_link)); 1782 } 1783 1784 break; 1785 } 1786 } 1787 } 1788 1789 return status; 1790 } 1791 1792 /*! 1793 * @brief Activate the given switch connection 1794 * 1795 * @param[in] pGpu OBJGPU pointer 1796 * @param[in] pKernelNvlink KernelNvlink pointer 1797 * @param[in] linkId Link ID 1798 * 1799 * @return NV_OK on success 1800 */ 1801 static NV_STATUS 1802 _knvlinkActivateDiscoveredSwitchConn 1803 ( 1804 OBJGPU *pGpu, 1805 KernelNvlink *pKernelNvlink, 1806 NvU32 linkId 1807 ) 1808 { 1809 NV_STATUS status = NV_OK; 1810 1811 // Post Topology enablement for switch links 1812 status = knvlinkEnableLinksPostTopology_HAL(pGpu, pKernelNvlink, NVBIT(linkId)); 1813 if (status != NV_OK) 1814 { 1815 return status; 1816 } 1817 1818 return NV_OK; 1819 } 1820 1821 /*! 1822 * @brief Activate the given P2P connection 1823 * This function updates the RM state for the discovered sysmem 1824 * connection and trains the connection to ACTIVE, because, for 1825 * sysmem link post-topology steps can only be configured after 1826 * ACTIVE. HSHUB is also configured for sysmem link here. 1827 * 1828 * @param[in] pGpu OBJGPU pointer 1829 * @param[in] pKernelNvlink KernelNvlink pointer 1830 * @param[in] linkId Link ID 1831 * 1832 * @return NV_OK on success 1833 */ 1834 static NV_STATUS 1835 _knvlinkActivateDiscoveredSysmemConn 1836 ( 1837 OBJGPU *pGpu, 1838 KernelNvlink *pKernelNvlink, 1839 NvU32 linkId 1840 ) 1841 { 1842 NV_STATUS status = NV_OK; 1843 1844 NV2080_CTRL_NVLINK_UPDATE_HSHUB_MUX_PARAMS updateHshubMuxParams; 1845 NV2080_CTRL_NVLINK_SETUP_NVLINK_SYSMEM_PARAMS nvlinkSysmemParams; 1846 1847 pKernelNvlink->sysmemLinkMask |= NVBIT(linkId); 1848 1849 portMemSet(&nvlinkSysmemParams, 0, sizeof(nvlinkSysmemParams)); 1850 nvlinkSysmemParams.sysmemLinkMask = pKernelNvlink->sysmemLinkMask; 1851 1852 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1853 NV2080_CTRL_CMD_NVLINK_SETUP_NVLINK_SYSMEM, 1854 (void *)&nvlinkSysmemParams, 1855 sizeof(nvlinkSysmemParams)); 1856 if (status != NV_OK) 1857 { 1858 NV_PRINTF(LEVEL_ERROR, "Failed to setup HSHUB NVLink sysmem links state\n"); 1859 return status; 1860 } 1861 1862 // Always make the GPU side the master for NPU connections 1863 NV_ASSERT(NVL_SUCCESS == nvlink_lib_set_link_master( 1864 pKernelNvlink->nvlinkLinks[linkId].core_link)); 1865 1866 // Train SYSMEM links to Active, and only then enable traffic 1867 status = knvlinkTrainSysmemLinksToActive(pGpu, pKernelNvlink); 1868 if (status != NV_OK) 1869 { 1870 NV_PRINTF(LEVEL_ERROR, 1871 "FAILED TO TRAIN CPU/SYSMEM LINKS TO ACTIVE on GPU%d!!!\n", 1872 pGpu->gpuInstance); 1873 1874 NV_ASSERT(0); 1875 } 1876 1877 portMemSet(&updateHshubMuxParams, 0, sizeof(updateHshubMuxParams)); 1878 updateHshubMuxParams.updateType = NV2080_CTRL_NVLINK_UPDATE_HSHUB_MUX_TYPE_PROGRAM; 1879 updateHshubMuxParams.bSysMem = NV_TRUE; 1880 1881 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1882 NV2080_CTRL_CMD_NVLINK_UPDATE_HSHUB_MUX, 1883 (void *)&updateHshubMuxParams, 1884 sizeof(updateHshubMuxParams)); 1885 return status; 1886 } 1887 1888 /*! 1889 * @brief Transition the mask of links into sleep (L2) state 1890 * 1891 * @param[in] pGpu OBJGPU pointer 1892 * @param[in] pKernelNvlink KernelNvlink pointer 1893 * @param[in] linkMask Mask of links 1894 * 1895 * @return NV_OK on success 1896 */ 1897 static NV_STATUS 1898 _knvlinkEnterSleep 1899 ( 1900 OBJGPU *pGpu, 1901 KernelNvlink *pKernelNvlink, 1902 NvU32 linkMask 1903 ) 1904 { 1905 NV_STATUS retStatus = NV_OK; 1906 NvlStatus status = NVL_SUCCESS; 1907 1908 NV2080_CTRL_NVLINK_PROGRAM_BUFFERREADY_PARAMS programBufferRdyParams; 1909 NV2080_CTRL_NVLINK_SAVE_RESTORE_HSHUB_STATE_PARAMS saveRestoreHshubStateParams; 1910 1911 portMemSet(&programBufferRdyParams, 0, sizeof(programBufferRdyParams)); 1912 programBufferRdyParams.flags = NV2080_CTRL_NVLINK_PROGRAM_BUFFERREADY_FLAGS_SAVE; 1913 programBufferRdyParams.bSysmem = NV_FALSE; 1914 programBufferRdyParams.peerLinkMask = linkMask; 1915 1916 // Save Bufferready state for the the mask of links entering L2 1917 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1918 NV2080_CTRL_CMD_NVLINK_PROGRAM_BUFFERREADY, 1919 (void *)&programBufferRdyParams, 1920 sizeof(programBufferRdyParams)); 1921 if (status != NV_OK) 1922 return status; 1923 1924 portMemSet(&saveRestoreHshubStateParams, 0, sizeof(saveRestoreHshubStateParams)); 1925 saveRestoreHshubStateParams.linkMask = linkMask; 1926 saveRestoreHshubStateParams.bSave = NV_TRUE; 1927 1928 // Save HSHUB SW state for the links which will need to be restored later 1929 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1930 NV2080_CTRL_CMD_NVLINK_SAVE_RESTORE_HSHUB_STATE, 1931 (void *)&saveRestoreHshubStateParams, 1932 sizeof(saveRestoreHshubStateParams)); 1933 if (status != NV_OK) 1934 return status; 1935 1936 // In L2 Entry path 1937 pKernelNvlink->bL2Entry = NV_TRUE; 1938 1939 // Put the mask of links of the device to sleep 1940 status = nvlink_lib_powerdown_links_from_active_to_L2(pKernelNvlink->pNvlinkDev, 1941 linkMask, 1942 NVLINK_STATE_CHANGE_ASYNC); 1943 if (status == NVL_MORE_PROCESSING_REQUIRED) 1944 { 1945 NV_PRINTF(LEVEL_INFO, 1946 "Transition to L2 for GPU%d: linkMask 0x%x in progress... Waiting for " 1947 "remote endpoints to request L2 entry\n", pGpu->gpuInstance, 1948 linkMask); 1949 1950 return NV_WARN_MORE_PROCESSING_REQUIRED; 1951 } 1952 1953 if (status != NVL_SUCCESS) 1954 { 1955 NV_PRINTF(LEVEL_ERROR, 1956 "Unable to put the linkmask 0x%x of GPU%d to SLEEP\n", 1957 linkMask, pGpu->gpuInstance); 1958 1959 return NV_ERR_GENERIC; 1960 } 1961 1962 return retStatus; 1963 } 1964 1965 /*! 1966 * @brief Wakeup the mask of links from sleep (L2) state 1967 * 1968 * @param[in] pGpu OBJGPU pointer 1969 * @param[in] pKernelNvlink KernelNvlink pointer 1970 * @param[in] linkMask Mask of links 1971 * 1972 * @return NV_OK on success 1973 */ 1974 static NV_STATUS 1975 _knvlinkExitSleep 1976 ( 1977 OBJGPU *pGpu, 1978 KernelNvlink *pKernelNvlink, 1979 NvU32 linkMask 1980 ) 1981 { 1982 NvlStatus status = NVL_SUCCESS; 1983 NvlStatus trainingStatus = NVL_SUCCESS; 1984 NvU32 linkId; 1985 NvU32 remoteLinkId; 1986 NvU32 gpuInst; 1987 RMTIMEOUT timeout; 1988 NvU32 linkTrainingTimeout = 10000000; 1989 1990 NV2080_CTRL_NVLINK_PROGRAM_BUFFERREADY_PARAMS programBufferRdyParams; 1991 NV2080_CTRL_NVLINK_SAVE_RESTORE_HSHUB_STATE_PARAMS saveRestoreHshubStateParams; 1992 1993 pKernelNvlink->bL2Entry = NV_FALSE; 1994 1995 // Kick-off ALI if it is enabled 1996 if (pKernelNvlink->bEnableAli) 1997 { 1998 // 1999 // For each link, request a change to active. 2000 // Don't have to wait for the request to finish as links 2001 // will be queries via DLSTAT to know their status and training 2002 // progression. 2003 // 2004 FOR_EACH_INDEX_IN_MASK(32, linkId, linkMask) 2005 { 2006 status = knvlinkTrainLinksToActiveAli(pGpu, pKernelNvlink, NVBIT(linkId), NV_FALSE); 2007 if (status != NV_OK) 2008 { 2009 NV_PRINTF(LEVEL_ERROR, 2010 "Failed to request Link %d to transition to active\n", linkId); 2011 } 2012 #if defined(INCLUDE_NVLINK_LIB) 2013 pKernelNvlink->nvlinkLinks[linkId].core_link->bStateSaved = NV_FALSE; 2014 #endif 2015 } 2016 FOR_EACH_INDEX_IN_MASK_END; 2017 2018 // 2019 // Get all links that are passed RxDet after L2 exit and poll on those 2020 // links till they reach active 2021 // 2022 if (knvlinkDiscoverPostRxDetLinks_HAL(pGpu, pKernelNvlink, pGpu) == NV_OK) 2023 { 2024 gpuSetTimeout(pGpu, linkTrainingTimeout, &timeout, IS_SILICON(pGpu) ? 2025 (GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE | GPU_TIMEOUT_FLAGS_DEFAULT) : 0); 2026 do 2027 { 2028 2029 status = gpuCheckTimeout(pGpu, &timeout); 2030 trainingStatus = knvlinkCheckTrainingIsComplete(pGpu, pGpu, pKernelNvlink); 2031 if (trainingStatus == NV_OK) 2032 { 2033 break; 2034 } 2035 osSpinLoop(); 2036 } 2037 while (status != NV_ERR_TIMEOUT); 2038 2039 if (status == NV_ERR_TIMEOUT) 2040 { 2041 NV_PRINTF(LEVEL_ERROR,"Timedout while checking to see if training complete!\n"); 2042 } 2043 } 2044 } 2045 else 2046 { 2047 // Wakeup the mask of links of the device from sleep using legacy l2 exit 2048 status = nvlink_lib_train_links_from_L2_to_active(pKernelNvlink->pNvlinkDev, 2049 linkMask, 2050 NVLINK_STATE_CHANGE_ASYNC); 2051 } 2052 2053 if (status == NVL_SUCCESS) 2054 { 2055 // Perform post-initialization setup for links that exited L2 2056 FOR_EACH_INDEX_IN_MASK(32, linkId, linkMask) 2057 { 2058 // Post topology link enable for pre-Ampere. This sets up buffer ready 2059 status = knvlinkEnableLinksPostTopology_HAL(pGpu, pKernelNvlink, NVBIT(linkId)); 2060 if (status != NV_OK) 2061 { 2062 return status; 2063 } 2064 2065 // Update the current NVLink configuration 2066 knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink); 2067 2068 // Perform post-topology initialization steps on the remote endpoint 2069 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_GPU) 2070 { 2071 OBJGPU *pGpu1 = NULL; 2072 KernelNvlink *pKernelNvlink1 = NULL; 2073 2074 // Get the remote OBJGPU and Nvlink 2075 for (gpuInst = 0; gpuInst < NV_MAX_DEVICES; gpuInst++) 2076 { 2077 pGpu1 = gpumgrGetGpu(gpuInst); 2078 2079 if (pGpu1 && 2080 (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain == gpuGetDomain(pGpu1) && 2081 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus == gpuGetBus(pGpu1) && 2082 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device == gpuGetDevice(pGpu1) && 2083 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.function == 0)) 2084 { 2085 pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1); 2086 remoteLinkId = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.linkNumber; 2087 2088 // Post topology link enable for pre-Ampere. This sets up buffer ready 2089 status = knvlinkEnableLinksPostTopology_HAL(pGpu1, pKernelNvlink1, NVBIT(remoteLinkId)); 2090 if (status != NV_OK) 2091 { 2092 return status; 2093 } 2094 2095 // Update the current NVLink configuration 2096 knvlinkUpdateCurrentConfig(pGpu1, pKernelNvlink1); 2097 2098 break; 2099 } 2100 } 2101 } 2102 } 2103 FOR_EACH_INDEX_IN_MASK_END; 2104 } 2105 2106 // 2107 // Restore HSHUB *ONLY AFTER* links have been trained and post-topology setup is complete 2108 // on both ends of the link. Only then HSHUB can be configured for P2P on any side of link 2109 // 2110 if (status == NVL_SUCCESS) 2111 { 2112 portMemSet(&saveRestoreHshubStateParams, 0, sizeof(saveRestoreHshubStateParams)); 2113 saveRestoreHshubStateParams.linkMask = linkMask; 2114 saveRestoreHshubStateParams.bSave = NV_FALSE; 2115 2116 // Restore HSHUB SW state for the links which exited L2 state 2117 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 2118 NV2080_CTRL_CMD_NVLINK_SAVE_RESTORE_HSHUB_STATE, 2119 (void *)&saveRestoreHshubStateParams, 2120 sizeof(saveRestoreHshubStateParams)); 2121 if (status != NV_OK) 2122 return status; 2123 2124 knvlinkUpdateCurrentConfig(pGpu, pKernelNvlink); 2125 2126 portMemSet(&programBufferRdyParams, 0, sizeof(programBufferRdyParams)); 2127 programBufferRdyParams.flags = NV2080_CTRL_NVLINK_PROGRAM_BUFFERREADY_FLAGS_RESTORE; 2128 programBufferRdyParams.bSysmem = NV_FALSE; 2129 programBufferRdyParams.peerLinkMask = linkMask; 2130 2131 // Restore Bufferready state for the links which exited L2 state 2132 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 2133 NV2080_CTRL_CMD_NVLINK_PROGRAM_BUFFERREADY, 2134 (void *)&programBufferRdyParams, 2135 sizeof(programBufferRdyParams)); 2136 if (status != NV_OK) 2137 return status; 2138 2139 FOR_EACH_INDEX_IN_MASK(32, linkId, linkMask) 2140 { 2141 if (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.deviceType == NVLINK_DEVICE_TYPE_GPU) 2142 { 2143 OBJGPU *pGpu1 = NULL; 2144 KernelNvlink *pKernelNvlink1 = NULL; 2145 2146 // Get the remote OBJGPU and Nvlink 2147 for (gpuInst = 0; gpuInst < NV_MAX_DEVICES; gpuInst++) 2148 { 2149 pGpu1 = gpumgrGetGpu(gpuInst); 2150 2151 if (pGpu1 && 2152 (pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.domain == gpuGetDomain(pGpu1) && 2153 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.bus == gpuGetBus(pGpu1) && 2154 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.device == gpuGetDevice(pGpu1) && 2155 pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.function == 0)) 2156 { 2157 pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1); 2158 remoteLinkId = pKernelNvlink->nvlinkLinks[linkId].remoteEndInfo.linkNumber; 2159 2160 portMemSet(&saveRestoreHshubStateParams, 0, sizeof(saveRestoreHshubStateParams)); 2161 saveRestoreHshubStateParams.linkMask = NVBIT(remoteLinkId); 2162 saveRestoreHshubStateParams.bSave = NV_FALSE; 2163 2164 // Restore HSHUB SW state for the links which exited L2 state 2165 status = knvlinkExecGspRmRpc(pGpu1, pKernelNvlink1, 2166 NV2080_CTRL_CMD_NVLINK_SAVE_RESTORE_HSHUB_STATE, 2167 (void *)&saveRestoreHshubStateParams, 2168 sizeof(saveRestoreHshubStateParams)); 2169 if (status != NV_OK) 2170 return status; 2171 2172 knvlinkUpdateCurrentConfig(pGpu1, pKernelNvlink1); 2173 2174 portMemSet(&programBufferRdyParams, 0, sizeof(programBufferRdyParams)); 2175 programBufferRdyParams.flags = NV2080_CTRL_NVLINK_PROGRAM_BUFFERREADY_FLAGS_RESTORE; 2176 programBufferRdyParams.bSysmem = NV_FALSE; 2177 programBufferRdyParams.peerLinkMask = NVBIT(remoteLinkId); 2178 2179 // 2180 // Restore Buffer Ready state for the links from cached SW state after HSHUB 2181 // settings have been restored 2182 // 2183 status = knvlinkExecGspRmRpc(pGpu1, pKernelNvlink1, 2184 NV2080_CTRL_CMD_NVLINK_PROGRAM_BUFFERREADY, 2185 (void *)&programBufferRdyParams, 2186 sizeof(programBufferRdyParams)); 2187 if (status != NV_OK) 2188 return status; 2189 2190 break; 2191 } 2192 } 2193 } 2194 } 2195 FOR_EACH_INDEX_IN_MASK_END; 2196 } 2197 2198 if (status == NVL_MORE_PROCESSING_REQUIRED) 2199 { 2200 NV_PRINTF(LEVEL_INFO, 2201 "Transition to L0 for GPU%d: linkMask 0x%x in progress... Waiting for " 2202 "remote endpoints to request L2 exit\n", pGpu->gpuInstance, 2203 linkMask); 2204 2205 return NV_WARN_MORE_PROCESSING_REQUIRED; 2206 } 2207 2208 if (status != NVL_SUCCESS) 2209 { 2210 NV_PRINTF(LEVEL_ERROR, 2211 "Unable to wakeup the linkmask 0x%x of GPU%d from SLEEP\n", 2212 linkMask, pGpu->gpuInstance); 2213 2214 return NV_ERR_GENERIC; 2215 } 2216 2217 return NV_OK; 2218 } 2219 2220 /*! 2221 * @brief Updates GPU peer info (peerMask) based on switchLinkMasks 2222 * 2223 * @param[in] pGpu OBJGPU pointer 2224 * @param[in] pKernelNvlink KernelNvlink pointer 2225 * @param[in] switchLinkMasks Mask of switch links 2226 * 2227 * @return Returns NV_TRUE if peerMask is updated 2228 */ 2229 static NvBool 2230 _knvlinkUpdateSwitchLinkMasks 2231 ( 2232 OBJGPU *pGpu, 2233 KernelNvlink *pKernelNvlink, 2234 NvU32 switchLinkMasks 2235 ) 2236 { 2237 KernelNvlink *pKernelNvlink1 = NULL; 2238 OBJGPU *pGpu1 = NULL; 2239 NvBool bPeerUpdated = NV_FALSE; 2240 NV_STATUS status = NV_OK; 2241 NvU32 gpuInst; 2242 2243 // 2244 // On NvSwitch systems, all the enabled and connected GPU links should 2245 // go through NvSwitch. We don't support GPU<->GPU or GPU<->NPU direct 2246 // connections on NvSwitch systems. 2247 // 2248 if (!knvlinkIsGpuConnectedToNvswitch(pGpu, pKernelNvlink)) 2249 { 2250 return bPeerUpdated; 2251 } 2252 2253 for (gpuInst = 0; gpuInst < NV_MAX_DEVICES; gpuInst++) 2254 { 2255 pGpu1 = gpumgrGetGpu(gpuInst); 2256 if (!pGpu1) 2257 { 2258 continue; 2259 } 2260 2261 // No support for SLI P2P on nvswitch systems. 2262 if (IsSLIEnabled(pGpu1)) 2263 { 2264 continue; 2265 } 2266 2267 pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1); 2268 2269 if (!pKernelNvlink1) 2270 { 2271 continue; 2272 } 2273 2274 if (!pKernelNvlink1->discoveredLinks) 2275 { 2276 continue; 2277 } 2278 2279 if (!knvlinkIsGpuConnectedToNvswitch(pGpu1, pKernelNvlink1)) 2280 { 2281 continue; 2282 } 2283 2284 // Update local peerLinkMasks. 2285 status = knvlinkSetLinkMaskToPeer(pGpu, pKernelNvlink, pGpu1, switchLinkMasks); 2286 if (status != NV_OK) 2287 return NV_FALSE; 2288 2289 // 2290 // Update remote peerLinkMasks only if a remote endpoint is connected. 2291 // 2292 // We are deliberately picking up loopback peerLinkMask, because it 2293 // represents the actual nvswitch connection mask for that GPU and 2294 // guarantees that the end point is connected to nvswitch. 2295 // 2296 status = knvlinkSetLinkMaskToPeer(pGpu1, pKernelNvlink1, pGpu, 2297 pKernelNvlink1->peerLinkMasks[gpuGetInstance(pGpu1)]); 2298 if (status != NV_OK) 2299 return NV_FALSE; 2300 2301 bPeerUpdated = NV_TRUE; 2302 } 2303 2304 return bPeerUpdated; 2305 } 2306 2307 /*! 2308 * @brief Updates GPU peer info (peerMask) when a GPU is degraded 2309 * 2310 * @param[in] pGpu OBJGPU pointer 2311 * @param[in] pKernelNvlink KernelNvlink pointer 2312 * 2313 */ 2314 static NvBool 2315 _knvlinkUpdateSwitchLinkMasksGpuDegraded 2316 ( 2317 OBJGPU *pGpu, 2318 KernelNvlink *pKernelNvlink 2319 ) 2320 { 2321 KernelNvlink *pKernelNvlink1 = NULL; 2322 OBJGPU *pGpu1 = NULL; 2323 NvBool bPeerUpdated = NV_FALSE; 2324 NV_STATUS status = NV_OK; 2325 NvU32 gpuInst; 2326 2327 for (gpuInst = 0; gpuInst < NV_MAX_DEVICES; gpuInst++) 2328 { 2329 pGpu1 = gpumgrGetGpu(gpuInst); 2330 if (!pGpu1) 2331 { 2332 continue; 2333 } 2334 2335 // No support for SLI P2P on nvswitch systems. 2336 if (IsSLIEnabled(pGpu1)) 2337 { 2338 continue; 2339 } 2340 2341 pKernelNvlink1 = GPU_GET_KERNEL_NVLINK(pGpu1); 2342 2343 if (!pKernelNvlink1) 2344 { 2345 continue; 2346 } 2347 2348 if (!pKernelNvlink1->discoveredLinks) 2349 { 2350 continue; 2351 } 2352 2353 if (!knvlinkIsGpuConnectedToNvswitch(pGpu1, pKernelNvlink1)) 2354 { 2355 continue; 2356 } 2357 2358 // Update local peerLinkMasks. 2359 status = knvlinkSetLinkMaskToPeer(pGpu, pKernelNvlink, pGpu1, 0); 2360 if (status != NV_OK) 2361 return NV_FALSE; 2362 2363 // Update remote peerLinkMasks 2364 status = knvlinkSetLinkMaskToPeer(pGpu1, pKernelNvlink1, pGpu, 0); 2365 if (status != NV_OK) 2366 return NV_FALSE; 2367 2368 bPeerUpdated = NV_TRUE; 2369 } 2370 2371 return bPeerUpdated; 2372 } 2373 2374 /*! 2375 * For each known peer, update their configurations, now that another 2376 * one of their peers (this GPU) has been initialized. 2377 * 2378 * This will update the PCE-LCE mappings, but it will not trigger any 2379 * HSHUB updates since peer IDs shouldn't have been allocated at this 2380 * point. 2381 * 2382 * @param[in] pGpu OBJGPU pointer 2383 * @param[in] pKernelNvlink KernelNvlink pointer 2384 */ 2385 static void 2386 _knvlinkUpdatePeerConfigs 2387 ( 2388 OBJGPU *pGpu, 2389 KernelNvlink *pKernelNvlink 2390 ) 2391 { 2392 NvU32 gpuInst; 2393 2394 for (gpuInst = 0; gpuInst < NV_ARRAY_ELEMENTS(pKernelNvlink->peerLinkMasks); gpuInst++) 2395 { 2396 if (pKernelNvlink->peerLinkMasks[gpuInst] != 0) 2397 { 2398 OBJGPU *pRemoteGpu = gpumgrGetGpu(gpuInst); 2399 2400 if (pRemoteGpu != NULL) 2401 { 2402 KernelNvlink *pRemoteKernelNvlink = GPU_GET_KERNEL_NVLINK(pRemoteGpu); 2403 2404 if (pRemoteKernelNvlink != NULL) 2405 { 2406 NV_PRINTF(LEVEL_INFO, 2407 "GPU%u requesting GPU%u NVLINK config update\n", 2408 gpuGetInstance(pGpu), 2409 gpuGetInstance(pRemoteGpu)); 2410 2411 _knvlinkPrintTopologySummary(pRemoteGpu, pRemoteKernelNvlink); 2412 2413 // Update CE mappings on remote GPUs since we have new connections 2414 knvlinkUpdateCurrentConfig(pRemoteGpu, pRemoteKernelNvlink); 2415 } 2416 } 2417 } 2418 } 2419 } 2420 2421 /*! 2422 * Print the nvlink topology for this GPU 2423 * 2424 * @param[in] pGpu OBJGPU pointer 2425 * @param[in] pKernelNvlink KernelNvlink pointer 2426 */ 2427 static void 2428 _knvlinkPrintTopologySummary 2429 ( 2430 OBJGPU *pGpu, 2431 KernelNvlink *pKernelNvlink 2432 ) 2433 { 2434 #if NV_PRINTF_ENABLED 2435 2436 NvU32 i; 2437 NV_STATUS status; 2438 2439 if (DBG_RMMSG_CHECK(LEVEL_INFO) == 0) 2440 { 2441 return; 2442 } 2443 2444 NV_PRINTF(LEVEL_INFO, "GPU%02u cached topology:\n", gpuGetInstance(pGpu)); 2445 2446 NV2080_CTRL_NVLINK_HSHUB_GET_SYSMEM_NVLINK_MASK_PARAMS params; 2447 portMemSet(¶ms, 0, sizeof(params)); 2448 2449 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 2450 NV2080_CTRL_CMD_NVLINK_HSHUB_GET_SYSMEM_NVLINK_MASK, 2451 (void *)¶ms, sizeof(params)); 2452 if (status != NV_OK) 2453 { 2454 NV_PRINTF(LEVEL_ERROR, "Unable to determine sysmem link mask\n"); 2455 return; 2456 } 2457 2458 // Print the discovered sysmem links 2459 if (params.sysmemLinkMask != 0) 2460 { 2461 NV_PRINTF(LEVEL_INFO, " sysmem link mask : 0x%x\n", params.sysmemLinkMask); 2462 } 2463 2464 // Print the discovered p2p links 2465 for (i = 0; i < NV_ARRAY_ELEMENTS(pKernelNvlink->peerLinkMasks); i++) 2466 { 2467 if (pKernelNvlink->peerLinkMasks[i] != 0) 2468 { 2469 NV_PRINTF(LEVEL_INFO, " GPU%02u link mask : 0x%x\n", i, 2470 pKernelNvlink->peerLinkMasks[i]); 2471 } 2472 } 2473 2474 // Print the links which do not have a connection yet 2475 if (pKernelNvlink->disconnectedLinkMask != 0) 2476 { 2477 NV_PRINTF(LEVEL_INFO, " unknown link mask: 0x%x\n", 2478 pKernelNvlink->disconnectedLinkMask); 2479 } 2480 2481 #endif 2482 } 2483 2484 #endif 2485