1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #define NVOC_KERNEL_NVLINK_H_PRIVATE_ACCESS_ALLOWED 25 26 // FIXME XXX 27 #define NVOC_KERNEL_IOCTRL_H_PRIVATE_ACCESS_ALLOWED 28 29 #include "os/os.h" 30 #include "core/hal.h" 31 #include "core/info_block.h" 32 #include "core/locks.h" 33 #include "core/thread_state.h" 34 #include "gpu/gpu.h" 35 36 #include "kernel/gpu/nvlink/kernel_nvlink.h" 37 #include "kernel/gpu/nvlink/kernel_ioctrl.h" 38 39 #if defined(INCLUDE_NVLINK_LIB) 40 41 static void _knvlinkCorePassiveLinkChangeCallback(NvU32, void *); 42 43 /*! 44 * Compile time assert to ensure NV2080_CTRL_NVLINK_MAX_SEED_BUFFER_SIZE == 45 * NVLINK_MAX_SEED_BUFFER_SIZE 46 */ 47 ct_assert(NV2080_CTRL_NVLINK_MAX_SEED_BUFFER_SIZE == 48 NVLINK_MAX_SEED_BUFFER_SIZE); 49 50 /*! 51 * Compile time asserts to ensure NV2080_NVLINK_CORE_LINK_STATE* == 52 * NVLINK_LINKSTATE* 53 */ 54 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_OFF == 55 NVLINK_LINKSTATE_OFF); 56 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_HS == 57 NVLINK_LINKSTATE_HS); 58 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_SAFE == 59 NVLINK_LINKSTATE_SAFE); 60 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_FAULT == 61 NVLINK_LINKSTATE_FAULT); 62 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_RECOVERY == 63 NVLINK_LINKSTATE_RECOVERY); 64 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_FAIL == 65 NVLINK_LINKSTATE_FAIL); 66 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_DETECT == 67 NVLINK_LINKSTATE_DETECT); 68 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_RESET == 69 NVLINK_LINKSTATE_RESET); 70 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_ENABLE_PM == 71 NVLINK_LINKSTATE_ENABLE_PM); 72 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_DISABLE_PM == 73 NVLINK_LINKSTATE_DISABLE_PM); 74 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_SLEEP == 75 NVLINK_LINKSTATE_SLEEP); 76 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_SAVE_STATE == 77 NVLINK_LINKSTATE_SAVE_STATE); 78 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_RESTORE_STATE == 79 NVLINK_LINKSTATE_RESTORE_STATE); 80 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_PRE_HS == 81 NVLINK_LINKSTATE_PRE_HS); 82 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_DISABLE_ERR_DETECT == 83 NVLINK_LINKSTATE_DISABLE_ERR_DETECT); 84 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_LANE_DISABLE == 85 NVLINK_LINKSTATE_LANE_DISABLE); 86 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_LANE_SHUTDOWN == 87 NVLINK_LINKSTATE_LANE_SHUTDOWN); 88 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_TRAFFIC_SETUP == 89 NVLINK_LINKSTATE_TRAFFIC_SETUP); 90 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_INITPHASE1 == 91 NVLINK_LINKSTATE_INITPHASE1); 92 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_INITNEGOTIATE == 93 NVLINK_LINKSTATE_INITNEGOTIATE); 94 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_POST_INITNEGOTIATE == 95 NVLINK_LINKSTATE_POST_INITNEGOTIATE); 96 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_INITOPTIMIZE == 97 NVLINK_LINKSTATE_INITOPTIMIZE); 98 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_POST_INITOPTIMIZE == 99 NVLINK_LINKSTATE_POST_INITOPTIMIZE); 100 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_DISABLE_HEARTBEAT == 101 NVLINK_LINKSTATE_DISABLE_HEARTBEAT); 102 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_CONTAIN == 103 NVLINK_LINKSTATE_CONTAIN); 104 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_INITTL == 105 NVLINK_LINKSTATE_INITTL); 106 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_INITPHASE5 == 107 NVLINK_LINKSTATE_INITPHASE5); 108 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_ALI == 109 NVLINK_LINKSTATE_ALI); 110 ct_assert(NV2080_NVLINK_CORE_LINK_STATE_ACTIVE_PENDING == 111 NVLINK_LINKSTATE_ACTIVE_PENDING); 112 113 /*! 114 * Compile time asserts to ensure NV2080_NVLINK_CORE_SUBLINK_STATE_TX* == 115 * NVLINK_SUBLINK_STATE_TX* 116 */ 117 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_TX_HS == 118 NVLINK_SUBLINK_STATE_TX_HS); 119 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_TX_SINGLE_LANE == 120 NVLINK_SUBLINK_STATE_TX_SINGLE_LANE); 121 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_TX_TRAIN == 122 NVLINK_SUBLINK_STATE_TX_TRAIN); 123 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_TX_SAFE == 124 NVLINK_SUBLINK_STATE_TX_SAFE); 125 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_TX_OFF == 126 NVLINK_SUBLINK_STATE_TX_OFF); 127 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_TX_COMMON_MODE == 128 NVLINK_SUBLINK_STATE_TX_COMMON_MODE); 129 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_TX_COMMON_MODE_DISABLE == 130 NVLINK_SUBLINK_STATE_TX_COMMON_MODE_DISABLE); 131 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_TX_DATA_READY == 132 NVLINK_SUBLINK_STATE_TX_DATA_READY); 133 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_TX_EQ == 134 NVLINK_SUBLINK_STATE_TX_EQ); 135 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_TX_PRBS_EN == 136 NVLINK_SUBLINK_STATE_TX_PRBS_EN); 137 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_TX_POST_HS == 138 NVLINK_SUBLINK_STATE_TX_POST_HS); 139 140 /*! 141 * Compile time asserts to ensure NV2080_NVLINK_CORE_SUBLINK_STATE_RX* == 142 * NVLINK_SUBLINK_STATE_RX* 143 */ 144 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_RX_HS == 145 NVLINK_SUBLINK_STATE_RX_HS); 146 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_RX_SINGLE_LANE == 147 NVLINK_SUBLINK_STATE_RX_SINGLE_LANE); 148 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_RX_TRAIN == 149 NVLINK_SUBLINK_STATE_RX_TRAIN); 150 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_RX_SAFE == 151 NVLINK_SUBLINK_STATE_RX_SAFE); 152 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_RX_OFF == 153 NVLINK_SUBLINK_STATE_RX_OFF); 154 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_RX_RXCAL == 155 NVLINK_SUBLINK_STATE_RX_RXCAL); 156 ct_assert(NV2080_NVLINK_CORE_SUBLINK_STATE_RX_INIT_TERM == 157 NVLINK_SUBLINK_STATE_RX_INIT_TERM); 158 159 /*! 160 * @brief Callback function for adding link to nvlink core 161 * 162 * @param[in] nvlink_link pointer 163 * 164 * @returns NVL_SUCCESS on success 165 */ 166 NvlStatus 167 knvlinkCoreAddLinkCallback 168 ( 169 nvlink_link *link 170 ) 171 { 172 return 0; 173 } 174 175 /*! 176 * @brief Callback function for removing link from nvlink core 177 * 178 * @param[in] nvlink_link pointer 179 * 180 * @returns NVL_SUCCESS on success 181 */ 182 NvlStatus 183 knvlinkCoreRemoveLinkCallback 184 ( 185 nvlink_link *link 186 ) 187 { 188 portMemFree((NvU8 *)link->linkName); 189 link->linkName = NULL; 190 link->link_handlers = NULL; 191 link->dev = NULL; 192 portMemFree(link); 193 return 0; 194 } 195 196 /*! 197 * @brief Callback function for locking a link so its state can be accessed 198 * and modified atomically. 199 * 200 * @param[in] nvlink_link pointer 201 * 202 * @returns NVL_SUCCESS on success 203 */ 204 NvlStatus 205 knvlinkCoreLockLinkCallback 206 ( 207 nvlink_link *link 208 ) 209 { 210 KNVLINK_RM_LINK *pNvlinkLink = (KNVLINK_RM_LINK *)link->link_info; 211 OBJSYS *pSys = SYS_GET_INSTANCE(); 212 213 // Return if NVLink fabric is managed by FM 214 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 215 { 216 return NVL_SUCCESS; 217 } 218 219 // 220 // We track the lock state of this API via the master/parent GPU of the 221 // subdevice, since the locking APIs currently available to us operate at 222 // the device level. 223 // 224 OBJGPU *pGpu = gpumgrGetParentGPU(pNvlinkLink->pGpu); 225 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 226 227 NV_ASSERT_OR_RETURN(pKernelNvlink != NULL, NVL_ERR_INVALID_STATE); 228 229 // First check if the lock is already held 230 if (rmDeviceGpuLockIsOwner(gpuGetInstance(pGpu))) 231 { 232 // 233 // If the lock is held with deviceLockRefcount == 0, it means the 234 // lock was acquired before this function was first called for the 235 // device, so we will not release the lock via the unlock callback. 236 // 237 if (pKernelNvlink->deviceLockRefcount > 0) 238 { 239 pKernelNvlink->deviceLockRefcount++; 240 NV_PRINTF(LEVEL_INFO, "incremented device lock refcnt to %u\n", 241 pKernelNvlink->deviceLockRefcount); 242 } 243 else 244 { 245 NV_PRINTF(LEVEL_INFO, 246 "device lock acquired outside of the core library callbacks\n"); 247 } 248 } 249 else 250 { 251 if (rmDeviceGpuLocksAcquire(pGpu, GPUS_LOCK_FLAGS_NONE, RM_LOCK_MODULES_NVLINK) == NV_OK) 252 { 253 NV_PRINTF(LEVEL_INFO, "acquired device GPU locks\n"); 254 255 pKernelNvlink->deviceLockRefcount++; 256 } 257 else 258 { 259 NV_PRINTF(LEVEL_INFO, "failed to acquire device GPU locks!\n"); 260 261 return NVL_ERR_GENERIC; 262 } 263 } 264 265 return NVL_SUCCESS; 266 } 267 268 /*! 269 * @brief Callback function for unlocking a link. 270 * 271 * This should only be called after nvlinkCoreLockLinkCallback(). 272 * 273 * @param[in] nvlink_link pointer 274 */ 275 void 276 knvlinkCoreUnlockLinkCallback 277 ( 278 nvlink_link *link 279 ) 280 { 281 KNVLINK_RM_LINK *pNvlinkLink = (KNVLINK_RM_LINK *)link->link_info; 282 OBJSYS *pSys = SYS_GET_INSTANCE(); 283 284 // Return if NVLink fabric is managed by FM 285 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 286 { 287 return; 288 } 289 290 // 291 // We track the lock state of this API via the master/parent GPU of the 292 // subdevice, since the locking APIs currently available to us operate at 293 // the device level. 294 // 295 OBJGPU *pGpu = gpumgrGetParentGPU(pNvlinkLink->pGpu); 296 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 297 298 NV_ASSERT_OR_RETURN_VOID(rmDeviceGpuLockIsOwner(gpuGetInstance(pGpu))); 299 300 if (pKernelNvlink == NULL) 301 { 302 NV_PRINTF(LEVEL_ERROR, "pKernelNvlink is NULL, returning early\n"); 303 304 return; 305 } 306 307 if (pKernelNvlink->deviceLockRefcount > 0) 308 { 309 if (--pKernelNvlink->deviceLockRefcount == 0) 310 { 311 rmDeviceGpuLocksRelease(pGpu, GPUS_LOCK_FLAGS_NONE, NULL); 312 NV_PRINTF(LEVEL_INFO, "released device GPU locks\n"); 313 } 314 else 315 { 316 NV_PRINTF(LEVEL_INFO, "decremented device lock refcnt to %u\n", 317 pKernelNvlink->deviceLockRefcount); 318 } 319 } 320 else 321 { 322 NV_PRINTF(LEVEL_INFO, 323 "device lock acquired outside of the core library callbacks\n"); 324 } 325 } 326 327 /*! 328 * @brief Function to be executed when the master end 329 * of a link triggers the retraining of the link 330 * 331 * @param[in] gpuInstance Master GPU instance 332 * @param[in] linkChangeData Contains information of both ends 333 */ 334 static void 335 _knvlinkCorePassiveLinkChangeCallback 336 ( 337 NvU32 gpuInstance, 338 void *linkChangeData 339 ) 340 { 341 OBJGPU *pGpu = NULL; 342 OBJSYS *pSys = SYS_GET_INSTANCE(); 343 KernelNvlink *pKernelNvlink = NULL; 344 345 KNVLINK_RM_LINK *pNvlinkLink; 346 nvlink_link_change *link_change; 347 nvlink_link *slave; 348 nvlink_link *master; 349 350 link_change = *(nvlink_link_change **)linkChangeData; 351 master = link_change->master; 352 slave = link_change->slave; 353 pNvlinkLink = (KNVLINK_RM_LINK *)master->link_info; 354 355 pGpu = gpumgrGetGpu(gpuInstance); 356 NV_ASSERT(pGpu == pNvlinkLink->pGpu); 357 358 pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 359 360 // If link training is disabled through regkey 361 if (pKernelNvlink && pKernelNvlink->bSkipLinkTraining) 362 { 363 return; 364 } 365 366 // If fabric is externally managed through FM, return 367 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 368 { 369 return; 370 } 371 372 if (osAcquireRmSema(pSys->pSema) == NV_OK) 373 { 374 if (master->link_handlers->lock(master) == NVL_SUCCESS) 375 { 376 if (slave->link_handlers->lock(slave) == NVL_SUCCESS) 377 { 378 if (pKernelNvlink != NULL) 379 { 380 NvU32 linkId = pNvlinkLink->linkId; 381 382 // 383 // TODO: we should probably be smarter about detecting if 384 // the master has already taken its own action in 385 // retraining the link that would remove the need to 386 // process this one. But for now, just blindly process 387 // the link change request from the slave. 388 // 389 knvlinkRetrainLink(pGpu, pKernelNvlink, linkId, 390 link_change->change_type == nvlink_retrain_from_off); 391 } 392 else 393 { 394 NV_PRINTF(LEVEL_ERROR, 395 "master GPU does not support NVLINK!\n"); 396 DBG_BREAKPOINT(); 397 } 398 slave->link_handlers->unlock(slave); 399 } 400 else 401 { 402 NV_PRINTF(LEVEL_ERROR, "failed to acquire slave lock!\n"); 403 } 404 master->link_handlers->unlock(master); 405 } 406 else 407 { 408 NV_PRINTF(LEVEL_ERROR, "failed to acquire the master lock!\n"); 409 } 410 osReleaseRmSema(pSys->pSema, NULL); 411 } 412 else 413 { 414 NV_PRINTF(LEVEL_ERROR, "failed to acquire the RM semaphore!\n"); 415 } 416 417 return; 418 } 419 420 /*! 421 * @brief Callback function for queuing a link change request from the 422 * link slave. 423 * 424 * This function is only called for links which are the master of the 425 * connection. 426 * 427 * The master link is NOT locked at the time this callback is called, 428 * and this callback must not attempt to acquire the master link lock. 429 * 430 * @param[in] nvlink_link_change pointer 431 * 432 * @returns NVL_SUCCESS on success 433 */ 434 NvlStatus 435 knvlinkCoreQueueLinkChangeCallback 436 ( 437 nvlink_link_change *link_change 438 ) 439 { 440 441 KNVLINK_RM_LINK *pNvlinkLink; 442 OBJGPU *pGpu = NULL; 443 OBJOS *pOS = NULL; 444 NV_STATUS status = NV_OK; 445 void *pWorkItemData; 446 447 pNvlinkLink = (KNVLINK_RM_LINK *)link_change->master->link_info; 448 pGpu = pNvlinkLink->pGpu; 449 450 if (pGpu == NULL) 451 { 452 return NVL_ERR_INVALID_STATE; 453 } 454 455 // The master should be marked as such 456 NV_ASSERT_OR_RETURN(link_change->master->master, NV_ERR_INVALID_STATE); 457 458 pOS = GPU_GET_OS(pGpu); 459 460 pWorkItemData = portMemAllocNonPaged(sizeof(nvlink_link_change *)); 461 NV_ASSERT_OR_RETURN(pWorkItemData != NULL, NVL_NO_MEM); 462 463 *((nvlink_link_change **)pWorkItemData) = link_change; 464 465 // 466 // This function will free the argument if it succeeds, hence the need for 467 // the work item data wrapper. 468 // 469 status = pOS->osQueueWorkItem(pGpu, _knvlinkCorePassiveLinkChangeCallback, 470 pWorkItemData); 471 if (status != NV_OK) 472 { 473 portMemFree(pWorkItemData); 474 return NVL_ERR_GENERIC; 475 } 476 477 return NVL_SUCCESS; 478 } 479 480 /*! 481 * @brief Callback function for setting a DL link mode 482 * 483 * @param[in] nvlink_link pointer 484 * @param[in] Link mode to be set 485 * @param[in] Flags 486 * 487 * @returns NVL_SUCCESS on success 488 */ 489 NvlStatus 490 knvlinkCoreSetDlLinkModeCallback 491 ( 492 nvlink_link *link, 493 NvU64 mode, 494 NvU32 flags 495 ) 496 { 497 KNVLINK_RM_LINK *pNvlinkLink; 498 OBJGPU *pGpu = NULL; 499 KernelNvlink *pKernelNvlink = NULL; 500 KernelIoctrl *pKernelIoctrl = NULL; 501 NV_STATUS status = NV_OK; 502 NvU8 linkIndex; 503 NV2080_CTRL_NVLINK_CORE_CALLBACK_PARAMS params; 504 NV2080_CTRL_NVLINK_CALLBACK_SET_DL_LINK_MODE_PARAMS 505 *pSetDlLinkModeParams; 506 507 portMemSet(¶ms, 0, sizeof(params)); 508 509 pNvlinkLink = (KNVLINK_RM_LINK *)link->link_info; 510 pGpu = pNvlinkLink->pGpu; 511 512 if (pGpu == NULL) 513 { 514 NV_PRINTF(LEVEL_ERROR, "Error processing link info!\n"); 515 return 1; 516 } 517 518 pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 519 linkIndex = pNvlinkLink->linkId; 520 pKernelIoctrl = KNVLINK_LINK_GET_IOCTRL(pKernelNvlink, linkIndex); 521 522 if (pKernelIoctrl == NULL) 523 return 0; 524 525 // If link training is disabled through regkey 526 if (pKernelNvlink->bSkipLinkTraining) 527 { 528 return 0; 529 } 530 531 params.linkId = linkIndex; 532 params.callbackType.type = NV2080_CTRL_NVLINK_CALLBACK_TYPE_SET_DL_LINK_MODE; 533 534 pSetDlLinkModeParams = ¶ms.callbackType.callbackParams.setDlLinkMode; 535 pSetDlLinkModeParams->mode = mode; 536 pSetDlLinkModeParams->bSync = (flags == NVLINK_STATE_CHANGE_SYNC) ? 537 NV_TRUE : NV_FALSE; 538 539 switch (mode) 540 { 541 case NVLINK_LINKSTATE_OFF: 542 { 543 pSetDlLinkModeParams->linkMode = 544 NV2080_NVLINK_CORE_LINK_STATE_OFF; 545 546 break; 547 } 548 case NVLINK_LINKSTATE_PRE_HS: 549 { 550 pSetDlLinkModeParams->linkMode = 551 NV2080_NVLINK_CORE_LINK_STATE_PRE_HS; 552 553 pSetDlLinkModeParams->linkModeParams.linkModePreHsParams.remoteDeviceType = 554 pKernelNvlink->nvlinkLinks[linkIndex].remoteEndInfo.deviceType; 555 pSetDlLinkModeParams->linkModeParams.linkModePreHsParams.ipVerDlPl = 556 pKernelNvlink->nvlinkLinks[linkIndex].remoteEndInfo.ipVerDlPl; 557 558 break; 559 } 560 case NVLINK_LINKSTATE_INITPHASE1: 561 { 562 pSetDlLinkModeParams->linkMode = 563 NV2080_NVLINK_CORE_LINK_STATE_INITPHASE1; 564 565 if (pKernelIoctrl->getProperty(pKernelIoctrl, PDB_PROP_KIOCTRL_MINION_CACHE_SEEDS)) 566 { 567 NvU32 *seedDataSrc = pKernelNvlink->nvlinkLinks[linkIndex].core_link->seedData; 568 NvU32 *seedDataDest = 569 pSetDlLinkModeParams->linkModeParams.linkModeInitPhase1Params.seedData; 570 571 portMemCopy(seedDataDest, sizeof(*seedDataDest) * NV2080_CTRL_NVLINK_MAX_SEED_BUFFER_SIZE, 572 seedDataSrc, sizeof(*seedDataSrc) * NVLINK_MAX_SEED_BUFFER_SIZE); 573 } 574 575 break; 576 } 577 case NVLINK_LINKSTATE_POST_INITNEGOTIATE: 578 { 579 pSetDlLinkModeParams->linkMode = 580 NV2080_NVLINK_CORE_LINK_STATE_POST_INITNEGOTIATE; 581 582 break; 583 } 584 case NVLINK_LINKSTATE_POST_INITOPTIMIZE: 585 { 586 NvU32 initoptimizeTimeout; 587 THREAD_STATE_NODE threadNode; 588 THREAD_STATE_NODE *pThreadNode = NULL; 589 NvBool bDoThreadStateFree = NV_FALSE; 590 591 status = threadStateGetCurrent(&pThreadNode, pGpu); 592 if (status != NV_OK) 593 { 594 NV_PRINTF(LEVEL_INFO, 595 "Thread state not initialized!\n"); 596 597 // 598 // There is a possiblity that the entrypoint to the 599 // callback does not setup threadstate (ie. MODs). 600 // If there is no thread state, then initialize it. 601 // 602 threadStateInit(&threadNode, THREAD_STATE_FLAGS_NONE); 603 bDoThreadStateFree = NV_TRUE; 604 605 // Getting thread state a second time should not fail 606 status = threadStateGetCurrent(&pThreadNode, pGpu); 607 if (status != NV_OK) 608 { 609 NV_PRINTF(LEVEL_ERROR, "Error getting current thread!\n"); 610 threadStateFree(&threadNode, THREAD_STATE_FLAGS_NONE); 611 return 1; 612 } 613 614 NV_ASSERT(pThreadNode == &threadNode); 615 } 616 617 initoptimizeTimeout = gpuScaleTimeout(pGpu, 618 NVLINK_INITOPTIMIZE_POLL_TIMEOUT); 619 620 // 621 // Override the thread state timeout, 622 // so GSP doesn't timeout after 4 seconds 623 // 624 threadStateSetTimeoutOverride(pThreadNode, 625 (NvU64)initoptimizeTimeout / 1000); 626 627 pSetDlLinkModeParams->linkMode = 628 NV2080_NVLINK_CORE_LINK_STATE_POST_INITOPTIMIZE; 629 630 // Poll for 10 seconds to avoid GSP timeout 631 while(1) 632 { 633 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 634 NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK, 635 (void *)¶ms, sizeof(params)); 636 if (status != NV_OK) 637 { 638 NV_PRINTF(LEVEL_ERROR, 639 "Error calling and polling for Init Optimize status! link 0x%x\n", 640 linkIndex); 641 break; 642 } 643 644 // Check if polling is done 645 if (pSetDlLinkModeParams->linkModeParams.linkModePostInitOptimizeParams.bPollDone) 646 { 647 break; 648 } 649 else 650 { 651 // Add a 1 second delay so GSP isn't spammed with commands 652 osDelay(NVLINK_INITOPTIMIZE_POLL_COUNT_DELAY_MS); 653 osSpinLoop(); 654 } 655 } 656 657 if (bDoThreadStateFree) 658 { 659 threadStateFree(&threadNode, THREAD_STATE_FLAGS_NONE); 660 } 661 662 // Nothing else to do, return early 663 return (status == NV_OK) ? 0 : 1; 664 } 665 default: 666 { 667 // Do nothing 668 break; 669 } 670 } 671 672 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 673 NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK, 674 (void *)¶ms, sizeof(params)); 675 if (status != NV_OK) 676 { 677 NV_PRINTF(LEVEL_ERROR, "Error setting current link state: 0x%llx!\n", mode); 678 return 1; 679 } 680 681 // Post control call operations 682 switch (mode) 683 { 684 case NVLINK_LINKSTATE_SAVE_STATE: 685 { 686 // If the request was to save the link state, update on success 687 link->bStateSaved = NV_TRUE; 688 break; 689 } 690 case NVLINK_LINKSTATE_RESTORE_STATE: 691 { 692 // If the request was to restore the link state, update on success 693 link->bStateSaved = NV_FALSE; 694 break; 695 } 696 case NVLINK_LINKSTATE_OFF: 697 { 698 if (pKernelIoctrl->getProperty(pKernelIoctrl, PDB_PROP_KIOCTRL_MINION_CACHE_SEEDS)) 699 { 700 NvU32 *seedDataSrc = pSetDlLinkModeParams->linkModeParams.linkModeOffParams.seedData; 701 NvU32 *seedDataDest = pKernelNvlink->nvlinkLinks[linkIndex].core_link->seedData; 702 703 portMemCopy(seedDataDest, sizeof(*seedDataDest) * NV2080_CTRL_NVLINK_MAX_SEED_BUFFER_SIZE, 704 seedDataSrc, sizeof(*seedDataSrc) * NVLINK_MAX_SEED_BUFFER_SIZE); 705 } 706 707 break; 708 } 709 case NVLINK_LINKSTATE_POST_INITNEGOTIATE: 710 { 711 NV2080_CTRL_NVLINK_SET_DL_LINK_MODE_POST_INITNEGOTIATE_PARAMS 712 *pPostInitNegotiateParams; 713 714 pPostInitNegotiateParams = 715 &pSetDlLinkModeParams->linkModeParams.linkModePostInitNegotiateParams; 716 717 // Save Remote/Local link SID info into core lib 718 pKernelNvlink->nvlinkLinks[linkIndex].core_link->bInitnegotiateConfigGood = 719 pPostInitNegotiateParams->bInitnegotiateConfigGood; 720 pKernelNvlink->nvlinkLinks[linkIndex].core_link->remoteSid = 721 pPostInitNegotiateParams->remoteLocalSidInfo.remoteSid; 722 pKernelNvlink->nvlinkLinks[linkIndex].core_link->remoteDeviceType = 723 pPostInitNegotiateParams->remoteLocalSidInfo.remoteDeviceType; 724 pKernelNvlink->nvlinkLinks[linkIndex].core_link->remoteLinkId = 725 pPostInitNegotiateParams->remoteLocalSidInfo.remoteLinkId; 726 pKernelNvlink->nvlinkLinks[linkIndex].core_link->localSid = 727 pPostInitNegotiateParams->remoteLocalSidInfo.localSid; 728 729 break; 730 } 731 default: 732 { 733 // Do nothing 734 break; 735 } 736 } 737 738 return 0; 739 } 740 741 /*! 742 * @brief Callback function for getting a DL link mode 743 * 744 * @param[in] nvlink_link pointer 745 * @param[out] Current mode of the link 746 * 747 * @returns NVL_SUCCESS on success 748 */ 749 NvlStatus 750 knvlinkCoreGetDlLinkModeCallback 751 ( 752 nvlink_link *link, 753 NvU64 *mode 754 ) 755 { 756 KNVLINK_RM_LINK *pNvlinkLink; 757 OBJGPU *pGpu = NULL; 758 KernelNvlink *pKernelNvlink = NULL; 759 NV_STATUS status = NV_OK; 760 NV2080_CTRL_NVLINK_CORE_CALLBACK_PARAMS params; 761 762 portMemSet(¶ms, 0, sizeof(params)); 763 764 pNvlinkLink = (KNVLINK_RM_LINK *)link->link_info; 765 pGpu = pNvlinkLink->pGpu; 766 767 if (pGpu == NULL) 768 { 769 NV_PRINTF(LEVEL_ERROR, "Error processing link info!\n"); 770 return 1; 771 } 772 773 pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 774 775 params.linkId = pNvlinkLink->linkId; 776 params.callbackType.type = NV2080_CTRL_NVLINK_CALLBACK_TYPE_GET_DL_LINK_MODE; 777 778 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 779 NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK, 780 (void *)¶ms, sizeof(params)); 781 if (status != NV_OK) 782 { 783 NV_PRINTF(LEVEL_ERROR, "Error getting current link state!\n"); 784 return 1; 785 } 786 787 *mode = (NvU64) params.callbackType.callbackParams.getDlLinkMode.mode; 788 return 0; 789 } 790 791 /*! 792 * @brief Callback function for setting a TL link mode 793 * 794 * @param[in] nvlink_link pointer 795 * @param[in] Link mode to be set 796 * @param[in] Flags 797 * 798 * @returns NVL_SUCCESS on success 799 */ 800 NvlStatus 801 knvlinkCoreSetTlLinkModeCallback 802 ( 803 nvlink_link *link, 804 NvU64 mode, 805 NvU32 flags 806 ) 807 { 808 KNVLINK_RM_LINK *pNvlinkLink; 809 OBJGPU *pGpu = NULL; 810 KernelNvlink *pKernelNvlink = NULL; 811 NV_STATUS status = NV_OK; 812 NV2080_CTRL_NVLINK_CORE_CALLBACK_PARAMS params; 813 NV2080_CTRL_NVLINK_CALLBACK_SET_TL_LINK_MODE_PARAMS 814 *pSetTlLinkModeParams; 815 816 portMemSet(¶ms, 0, sizeof(params)); 817 818 pNvlinkLink = (KNVLINK_RM_LINK *)link->link_info; 819 pGpu = pNvlinkLink->pGpu; 820 821 if (pGpu == NULL) 822 { 823 NV_PRINTF(LEVEL_ERROR, "Error processing link info!\n"); 824 return 1; 825 } 826 827 pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 828 829 // If link training is disabled through regkey 830 if (pKernelNvlink->bSkipLinkTraining) 831 { 832 return 0; 833 } 834 835 params.linkId = pNvlinkLink->linkId; 836 params.callbackType.type = NV2080_CTRL_NVLINK_CALLBACK_TYPE_SET_TL_LINK_MODE; 837 838 pSetTlLinkModeParams = ¶ms.callbackType.callbackParams.setTlLinkMode; 839 pSetTlLinkModeParams->mode = mode; 840 pSetTlLinkModeParams->bSync = (flags == NVLINK_STATE_CHANGE_SYNC) ? 841 NV_TRUE : NV_FALSE; 842 843 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 844 NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK, 845 (void *)¶ms, sizeof(params)); 846 if (status != NV_OK) 847 { 848 NV_PRINTF(LEVEL_ERROR, "Error setting current link state!\n"); 849 return 1; 850 } 851 852 return 0; 853 } 854 855 /*! 856 * @brief Callback function for getting a TL link mode 857 * 858 * @param[in] nvlink_link pointer 859 * @param[out] Current mode of the link 860 * 861 * @returns NVL_SUCCESS on success 862 */ 863 NvlStatus 864 knvlinkCoreGetTlLinkModeCallback 865 ( 866 nvlink_link *link, 867 NvU64 *mode 868 ) 869 { 870 KNVLINK_RM_LINK *pNvlinkLink; 871 OBJGPU *pGpu = NULL; 872 KernelNvlink *pKernelNvlink = NULL; 873 NV_STATUS status = NV_OK; 874 NV2080_CTRL_NVLINK_CORE_CALLBACK_PARAMS params; 875 876 portMemSet(¶ms, 0, sizeof(params)); 877 878 pNvlinkLink = (KNVLINK_RM_LINK *)link->link_info; 879 pGpu = pNvlinkLink->pGpu; 880 881 if (pGpu == NULL) 882 { 883 NV_PRINTF(LEVEL_ERROR, "Error processing link info!\n"); 884 return 1; 885 } 886 887 pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 888 889 params.linkId = pNvlinkLink->linkId; 890 params.callbackType.type = NV2080_CTRL_NVLINK_CALLBACK_TYPE_GET_TL_LINK_MODE; 891 892 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 893 NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK, 894 (void *)¶ms, sizeof(params)); 895 if (status != NV_OK) 896 { 897 NV_PRINTF(LEVEL_ERROR, "Error getting current link state!\n"); 898 return 1; 899 } 900 901 *mode = (NvU64) params.callbackType.callbackParams.getTlLinkMode.mode; 902 return 0; 903 } 904 905 /*! 906 * @brief Callback function for setting Tx sublink mode 907 * 908 * @param[in] nvlink_link pointer 909 * @param[in] TX Sublink mode to be set 910 * 911 * @returns NVL_SUCCESS on success 912 */ 913 NvlStatus 914 knvlinkCoreSetTxSublinkModeCallback 915 ( 916 nvlink_link *link, 917 NvU64 mode, 918 NvU32 flags 919 ) 920 { 921 KNVLINK_RM_LINK *pNvlinkLink; 922 OBJGPU *pGpu = NULL; 923 KernelNvlink *pKernelNvlink = NULL; 924 NV_STATUS status = NV_OK; 925 NV2080_CTRL_NVLINK_CORE_CALLBACK_PARAMS params; 926 NV2080_CTRL_NVLINK_CALLBACK_SET_TX_SUBLINK_MODE_PARAMS 927 *pSetTxSublinkModeParams; 928 929 portMemSet(¶ms, 0, sizeof(params)); 930 931 pNvlinkLink = (KNVLINK_RM_LINK *)link->link_info; 932 pGpu = pNvlinkLink->pGpu; 933 934 if (pGpu == NULL) 935 { 936 NV_PRINTF(LEVEL_ERROR, "Error processing link info!\n"); 937 return 1; 938 } 939 940 pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 941 942 // If link training is disabled through regkey 943 if (pKernelNvlink->bSkipLinkTraining) 944 { 945 return 0; 946 } 947 948 params.linkId = pNvlinkLink->linkId; 949 params.callbackType.type = 950 NV2080_CTRL_NVLINK_CALLBACK_TYPE_SET_TX_SUBLINK_MODE; 951 952 pSetTxSublinkModeParams = ¶ms.callbackType.callbackParams.setTxSublinkMode; 953 pSetTxSublinkModeParams->mode = mode; 954 pSetTxSublinkModeParams->bSync = (flags == NVLINK_STATE_CHANGE_SYNC) ? 955 NV_TRUE : NV_FALSE; 956 957 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 958 NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK, 959 (void *)¶ms, sizeof(params)); 960 if (status != NV_OK) 961 { 962 NV_PRINTF(LEVEL_ERROR, 963 "Error setting TX sublink mode. mode = 0x%08llx\n", mode); 964 return 1; 965 } 966 967 switch(mode) 968 { 969 case NVLINK_SUBLINK_STATE_TX_COMMON_MODE: 970 case NVLINK_SUBLINK_STATE_TX_EQ: 971 case NVLINK_SUBLINK_STATE_TX_DATA_READY: 972 link->tx_sublink_state = (NvU32) mode; 973 break; 974 default: 975 break; 976 } 977 978 return 0; 979 } 980 981 /*! 982 * @brief Callback function for setting Rx sublink mode 983 * 984 * @param[in] nvlink_link pointer 985 * @param[in] RX Sublink mode to be set 986 * 987 * @returns NVL_SUCCESS on success 988 */ 989 NvlStatus 990 knvlinkCoreSetRxSublinkModeCallback 991 ( 992 nvlink_link *link, 993 NvU64 mode, 994 NvU32 flags 995 ) 996 { 997 KNVLINK_RM_LINK *pNvlinkLink; 998 OBJGPU *pGpu = NULL; 999 KernelNvlink *pKernelNvlink = NULL; 1000 NV_STATUS status = NV_OK; 1001 NV2080_CTRL_NVLINK_CORE_CALLBACK_PARAMS params; 1002 NV2080_CTRL_NVLINK_CALLBACK_SET_RX_SUBLINK_MODE_PARAMS 1003 *pSetRxSublinkModeParams; 1004 1005 pNvlinkLink = (KNVLINK_RM_LINK *)link->link_info; 1006 pGpu = pNvlinkLink->pGpu; 1007 1008 if (pGpu == NULL) 1009 { 1010 NV_PRINTF(LEVEL_ERROR, "Error processing link info!\n"); 1011 return 1; 1012 } 1013 1014 pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 1015 1016 // If link training is disabled through regkey 1017 if (pKernelNvlink->bSkipLinkTraining) 1018 { 1019 return 0; 1020 } 1021 1022 params.linkId = pNvlinkLink->linkId; 1023 params.callbackType.type = 1024 NV2080_CTRL_NVLINK_CALLBACK_TYPE_SET_RX_SUBLINK_MODE; 1025 1026 pSetRxSublinkModeParams = 1027 ¶ms.callbackType.callbackParams.setRxSublinkMode; 1028 pSetRxSublinkModeParams->mode = mode; 1029 pSetRxSublinkModeParams->bSync = (flags == NVLINK_STATE_CHANGE_SYNC) ? 1030 NV_TRUE : NV_FALSE; 1031 1032 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1033 NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK, 1034 (void *)¶ms, sizeof(params)); 1035 if (status != NV_OK) 1036 { 1037 NV_PRINTF(LEVEL_ERROR, "Error setting RX sublink mode!\n"); 1038 return 1; 1039 } 1040 1041 switch(mode) 1042 { 1043 case NVLINK_SUBLINK_STATE_RX_RXCAL: 1044 case NVLINK_SUBLINK_STATE_RX_INIT_TERM: 1045 link->rx_sublink_state = (NvU32) mode; 1046 break; 1047 default: 1048 break; 1049 } 1050 1051 return 0; 1052 } 1053 1054 /*! 1055 * @brief Callback function for getting Tx sublink mode 1056 * 1057 * @param[in] nvlink_link pointer 1058 * @param[out] Current mode of the TX sublink 1059 * 1060 * @returns NVL_SUCCESS on success 1061 */ 1062 NvlStatus 1063 knvlinkCoreGetTxSublinkModeCallback 1064 ( 1065 nvlink_link *link, 1066 NvU64 *mode, 1067 NvU32 *subMode 1068 ) 1069 { 1070 KNVLINK_RM_LINK *pNvlinkLink; 1071 OBJGPU *pGpu = NULL; 1072 KernelNvlink *pKernelNvlink = NULL; 1073 NV_STATUS status = NV_OK; 1074 NV2080_CTRL_NVLINK_CORE_CALLBACK_PARAMS params; 1075 1076 portMemSet(¶ms, 0, sizeof(params)); 1077 1078 // Initialize to default values 1079 params.callbackType.callbackParams.getTxSublinkMode.sublinkMode = 1080 NVLINK_SUBLINK_STATE_TX_OFF; 1081 params.callbackType.callbackParams.getTxSublinkMode.sublinkSubMode = 1082 NVLINK_SUBLINK_SUBSTATE_TX_STABLE; 1083 1084 pNvlinkLink = (KNVLINK_RM_LINK *)link->link_info; 1085 pGpu = pNvlinkLink->pGpu; 1086 1087 if (pGpu == NULL) 1088 { 1089 NV_PRINTF(LEVEL_ERROR, "Error processing link info!\n"); 1090 return 1; 1091 } 1092 1093 pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 1094 1095 params.linkId = pNvlinkLink->linkId; 1096 params.callbackType.type = 1097 NV2080_CTRL_NVLINK_CALLBACK_TYPE_GET_TX_SUBLINK_MODE; 1098 1099 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1100 NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK, 1101 (void *)¶ms, sizeof(params)); 1102 if (status != NV_OK) 1103 { 1104 NV_PRINTF(LEVEL_ERROR, "Error getting current TX sublink state!\n"); 1105 return 1; 1106 } 1107 1108 *mode = (NvU64) params.callbackType.callbackParams.getTxSublinkMode.sublinkMode; 1109 *subMode = params.callbackType.callbackParams.getTxSublinkMode.sublinkSubMode; 1110 return 0; 1111 } 1112 1113 /*! 1114 * @brief Callback function for getting Rx sublink mode 1115 * 1116 * @param[in] nvlink_link pointer 1117 * @param[out] Current mode of the RX sublink 1118 * 1119 * @returns NVL_SUCCESS on success 1120 */ 1121 NvlStatus 1122 knvlinkCoreGetRxSublinkModeCallback 1123 ( 1124 nvlink_link *link, 1125 NvU64 *mode, 1126 NvU32 *subMode 1127 ) 1128 { 1129 KNVLINK_RM_LINK *pNvlinkLink; 1130 OBJGPU *pGpu = NULL; 1131 KernelNvlink *pKernelNvlink = NULL; 1132 NV_STATUS status = NV_OK; 1133 NV2080_CTRL_NVLINK_CORE_CALLBACK_PARAMS params; 1134 1135 portMemSet(¶ms, 0, sizeof(params)); 1136 1137 // Initialize to default values 1138 params.callbackType.callbackParams.getRxSublinkMode.sublinkMode = 1139 NVLINK_SUBLINK_STATE_RX_OFF; 1140 params.callbackType.callbackParams.getRxSublinkMode.sublinkSubMode = 1141 NVLINK_SUBLINK_SUBSTATE_RX_STABLE; 1142 1143 pNvlinkLink = (KNVLINK_RM_LINK *)link->link_info; 1144 pGpu = pNvlinkLink->pGpu; 1145 1146 if (pGpu == NULL) 1147 { 1148 NV_PRINTF(LEVEL_ERROR, "Error processing link info!\n"); 1149 return 1; 1150 } 1151 1152 pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 1153 1154 params.linkId = pNvlinkLink->linkId; 1155 params.callbackType.type = 1156 NV2080_CTRL_NVLINK_CALLBACK_TYPE_GET_RX_SUBLINK_MODE; 1157 1158 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1159 NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK, 1160 (void *)¶ms, sizeof(params)); 1161 if (status != NV_OK) 1162 { 1163 NV_PRINTF(LEVEL_ERROR, "Error getting current RX sublink state!\n"); 1164 return 1; 1165 } 1166 1167 *mode = (NvU64) params.callbackType.callbackParams.getRxSublinkMode.sublinkMode; 1168 *subMode = params.callbackType.callbackParams.getRxSublinkMode.sublinkSubMode;; 1169 return 0; 1170 } 1171 1172 /*! 1173 * @brief Callback function for performing receiver detect 1174 * 1175 * @param[in] nvlink_link pointer 1176 * 1177 * @returns NVL_SUCCESS on success 1178 */ 1179 NvlStatus 1180 knvlinkCoreSetRxSublinkDetectCallback 1181 ( 1182 nvlink_link *link, 1183 NvU32 flags 1184 ) 1185 { 1186 KNVLINK_RM_LINK *pNvlinkLink; 1187 OBJGPU *pGpu = NULL; 1188 KernelNvlink *pKernelNvlink = NULL; 1189 NV_STATUS status = NV_OK; 1190 NV2080_CTRL_NVLINK_CORE_CALLBACK_PARAMS params; 1191 NV2080_CTRL_NVLINK_CALLBACK_SET_RX_DETECT_PARAMS 1192 *pSetRxDetectParams; 1193 1194 portMemSet(¶ms, 0, sizeof(params)); 1195 1196 pNvlinkLink = (KNVLINK_RM_LINK *)link->link_info; 1197 pGpu = pNvlinkLink->pGpu; 1198 1199 if (pGpu == NULL) 1200 { 1201 NV_PRINTF(LEVEL_ERROR, "Error processing link info!\n"); 1202 return 1; 1203 } 1204 1205 pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 1206 1207 params.linkId = pNvlinkLink->linkId; 1208 params.callbackType.type = 1209 NV2080_CTRL_NVLINK_CALLBACK_TYPE_SET_RX_SUBLINK_DETECT; 1210 1211 pSetRxDetectParams = 1212 ¶ms.callbackType.callbackParams.setRxSublinkDetect; 1213 pSetRxDetectParams->bSync = (flags == NVLINK_STATE_CHANGE_SYNC) ? 1214 NV_TRUE : NV_FALSE; 1215 1216 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1217 NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK, 1218 (void *)¶ms, sizeof(params)); 1219 if (status != NV_OK) 1220 { 1221 NV_PRINTF(LEVEL_ERROR, 1222 "Error performing RXDET (Receiver Detect) on link!\n"); 1223 return 1; 1224 } 1225 1226 return 0; 1227 } 1228 1229 /*! 1230 * @brief Callback function for getting status of receiver detect 1231 * 1232 * @param[in] nvlink_link pointer 1233 * 1234 * @returns NVL_SUCCESS on success 1235 */ 1236 NvlStatus 1237 knvlinkCoreGetRxSublinkDetectCallback 1238 ( 1239 nvlink_link *link 1240 ) 1241 { 1242 KNVLINK_RM_LINK *pNvlinkLink; 1243 OBJGPU *pGpu = NULL; 1244 KernelNvlink *pKernelNvlink = NULL; 1245 NV_STATUS status = NV_OK; 1246 NvU32 linkId; 1247 NV2080_CTRL_NVLINK_CORE_CALLBACK_PARAMS params; 1248 NV2080_CTRL_NVLINK_CALLBACK_GET_RX_DETECT_PARAMS 1249 *pGetRxDetectParams; 1250 1251 portMemSet(¶ms, 0, sizeof(params)); 1252 1253 pNvlinkLink = (KNVLINK_RM_LINK *)link->link_info; 1254 pGpu = pNvlinkLink->pGpu; 1255 linkId = pNvlinkLink->linkId; 1256 1257 if (pGpu == NULL) 1258 { 1259 NV_PRINTF(LEVEL_ERROR, "Error processing link info!\n"); 1260 return 1; 1261 } 1262 1263 pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 1264 1265 params.linkId = linkId; 1266 params.callbackType.type = 1267 NV2080_CTRL_NVLINK_CALLBACK_TYPE_GET_RX_SUBLINK_DETECT; 1268 1269 pGetRxDetectParams = ¶ms.callbackType.callbackParams.getRxSublinkDetect; 1270 1271 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1272 NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK, 1273 (void *)¶ms, sizeof(params)); 1274 1275 // Store RXDET status mask 1276 pKernelNvlink->nvlinkLinks[linkId].laneRxdetStatusMask = 1277 pGetRxDetectParams->laneRxdetStatusMask; 1278 1279 // Update bRxDetected field based on RXDET status 1280 link->bRxDetected = (status == NV_OK ? NV_TRUE : NV_FALSE); 1281 1282 if (status != NV_OK) 1283 { 1284 NV_PRINTF(LEVEL_INFO, "RXDET (Receiver Detect) failed on link!\n"); 1285 return 1; 1286 } 1287 1288 return 0; 1289 } 1290 1291 /*! 1292 * @brief Callback function for sending a discovery token over a link 1293 * 1294 * @param[in] nvlink_link pointer 1295 * @param[in] Token to be sent on the link 1296 * 1297 * @returns NVL_SUCCESS on success 1298 */ 1299 NvlStatus 1300 knvlinkCoreWriteDiscoveryTokenCallback 1301 ( 1302 nvlink_link *link, 1303 NvU64 token 1304 ) 1305 { 1306 KNVLINK_RM_LINK *pNvlinkLink = (KNVLINK_RM_LINK *)link->link_info; 1307 OBJGPU *pGpu = pNvlinkLink->pGpu; 1308 KernelNvlink *pKernelNvlink = NULL; 1309 NV_STATUS status = NV_OK; 1310 1311 if (pGpu == NULL) 1312 { 1313 NV_PRINTF(LEVEL_ERROR, "Error processing link info!\n"); 1314 return 1; 1315 } 1316 1317 pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 1318 1319 // 1320 // If Nvlink4.0+ get the "token" values via SIDs stored 1321 // by MINION 1322 // 1323 if (pNvlinkLink->ipVerDlPl >= NVLINK_VERSION_40) 1324 { 1325 NV2080_CTRL_NVLINK_UPDATE_REMOTE_LOCAL_SID_PARAMS params; 1326 portMemSet(¶ms, 0, sizeof(params)); 1327 params.linkId = pNvlinkLink->linkId; 1328 1329 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1330 NV2080_CTRL_CMD_NVLINK_UPDATE_REMOTE_LOCAL_SID, 1331 (void *)¶ms, sizeof(params)); 1332 if (status != NV_OK) 1333 { 1334 NV_PRINTF(LEVEL_ERROR, "Error updating Local/Remote SID Info!\n"); 1335 return status; 1336 } 1337 1338 link->remoteSid = 1339 params.remoteLocalSidInfo.remoteSid; 1340 link->remoteDeviceType = 1341 params.remoteLocalSidInfo.remoteDeviceType; 1342 link->remoteLinkId = 1343 params.remoteLocalSidInfo.remoteLinkId; 1344 link->localSid = 1345 params.remoteLocalSidInfo.localSid; 1346 } 1347 else 1348 { 1349 1350 NV2080_CTRL_NVLINK_CORE_CALLBACK_PARAMS params; 1351 NV2080_CTRL_NVLINK_CALLBACK_RD_WR_DISCOVERY_TOKEN_PARAMS 1352 *pWriteDiscoveryTokenParams; 1353 1354 portMemSet(¶ms, 0, sizeof(params)); 1355 params.linkId = pNvlinkLink->linkId; 1356 params.callbackType.type = 1357 NV2080_CTRL_NVLINK_CALLBACK_TYPE_WRITE_DISCOVERY_TOKEN; 1358 1359 pWriteDiscoveryTokenParams = 1360 ¶ms.callbackType.callbackParams.writeDiscoveryToken; 1361 pWriteDiscoveryTokenParams->ipVerDlPl = pNvlinkLink->ipVerDlPl; 1362 pWriteDiscoveryTokenParams->token = token; 1363 1364 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1365 NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK, 1366 (void *)¶ms, sizeof(params)); 1367 } 1368 1369 if (status != NV_OK) 1370 { 1371 if (status != NV_ERR_NOT_SUPPORTED) 1372 { 1373 NV_PRINTF(LEVEL_ERROR, "Error writing Discovery Token!\n"); 1374 } 1375 else 1376 { 1377 NV_PRINTF(LEVEL_INFO, "R4 Tokens not supported on the chip!\n"); 1378 } 1379 1380 return 1; 1381 } 1382 1383 return 0; 1384 } 1385 1386 /*! 1387 * @brief Callback function for getting a discovery token on a link 1388 * 1389 * @param[in] nvlink_link pointer 1390 * @param[out] Token received on the link 1391 * 1392 * @returns NVL_SUCCESS on success 1393 */ 1394 NvlStatus 1395 knvlinkCoreReadDiscoveryTokenCallback 1396 ( 1397 nvlink_link *link, 1398 NvU64 *token 1399 ) 1400 { 1401 KNVLINK_RM_LINK *pNvlinkLink = (KNVLINK_RM_LINK *)link->link_info; 1402 OBJGPU *pGpu = pNvlinkLink->pGpu; 1403 KernelNvlink *pKernelNvlink = NULL; 1404 NV_STATUS status = NV_OK; 1405 NV2080_CTRL_NVLINK_CORE_CALLBACK_PARAMS params; 1406 NV2080_CTRL_NVLINK_CALLBACK_RD_WR_DISCOVERY_TOKEN_PARAMS 1407 *pReadDiscoveryTokenParams; 1408 1409 portMemSet(¶ms, 0, sizeof(params)); 1410 1411 if (token == NULL) 1412 { 1413 NV_PRINTF(LEVEL_ERROR, "Bad token address provided!\n"); 1414 return 1; 1415 } 1416 1417 if (pGpu == NULL) 1418 { 1419 NV_PRINTF(LEVEL_ERROR, "Error processing link info!\n"); 1420 return 1; 1421 } 1422 1423 pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 1424 1425 // If Nvlink4.0+ then reading tokens is no longer supported 1426 if (pNvlinkLink->ipVerDlPl >= NVLINK_VERSION_40) 1427 { 1428 status = NV_ERR_NOT_SUPPORTED; 1429 } 1430 else 1431 { 1432 params.linkId = pNvlinkLink->linkId; 1433 params.callbackType.type = 1434 NV2080_CTRL_NVLINK_CALLBACK_TYPE_READ_DISCOVERY_TOKEN; 1435 1436 pReadDiscoveryTokenParams = 1437 ¶ms.callbackType.callbackParams.readDiscoveryToken; 1438 pReadDiscoveryTokenParams->ipVerDlPl = pNvlinkLink->ipVerDlPl; 1439 1440 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1441 NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK, 1442 (void *)¶ms, sizeof(params)); 1443 } 1444 1445 if (status != NV_OK) 1446 { 1447 if (status != NV_ERR_NOT_SUPPORTED) 1448 { 1449 NV_PRINTF(LEVEL_ERROR, "Error reading discovery token!\n"); 1450 } 1451 else 1452 { 1453 NV_PRINTF(LEVEL_INFO, "R4 Tokens not supported on the chip!\n"); 1454 } 1455 1456 return 1; 1457 } 1458 1459 *token = pReadDiscoveryTokenParams->token; 1460 1461 return 0; 1462 } 1463 1464 /*! 1465 * @brief Callback function for post link training tasks. 1466 * 1467 * @param[in] nvlink_link pointer 1468 */ 1469 void 1470 knvlinkCoreTrainingCompleteCallback 1471 ( 1472 nvlink_link *link 1473 ) 1474 { 1475 KNVLINK_RM_LINK *pNvlinkLink; 1476 OBJGPU *pGpu = NULL; 1477 KernelNvlink *pKernelNvlink = NULL; 1478 NV_STATUS status; 1479 NV2080_CTRL_NVLINK_CORE_CALLBACK_PARAMS params; 1480 1481 portMemSet(¶ms, 0, sizeof(params)); 1482 1483 pNvlinkLink = (KNVLINK_RM_LINK *)link->link_info; 1484 1485 pGpu = pNvlinkLink->pGpu; 1486 pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 1487 1488 params.linkId = pNvlinkLink->linkId; 1489 params.callbackType.type = NV2080_CTRL_NVLINK_CALLBACK_TYPE_TRAINING_COMPLETE; 1490 1491 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1492 NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK, 1493 (void *)¶ms, sizeof(params)); 1494 if (status != NV_OK) 1495 { 1496 NV_PRINTF(LEVEL_ERROR, "Error issuing NvLink Training Complete callback!\n"); 1497 } 1498 } 1499 1500 /* 1501 * @brief nvlinkCoreGetUphyLoadCallback send ALI training on the specified link 1502 * 1503 * @param[in] link nvlink_link pointer 1504 */ 1505 NvlStatus 1506 knvlinkCoreAliTrainingCallback 1507 ( 1508 nvlink_link *link 1509 ) 1510 { 1511 KNVLINK_RM_LINK *pNvlinkLink = (KNVLINK_RM_LINK *) link->link_info; 1512 OBJGPU *pGpu = pNvlinkLink->pGpu; 1513 KernelNvlink * pKernelNvlink = NULL; 1514 NV_STATUS status; 1515 1516 if (pGpu == NULL) 1517 { 1518 NV_PRINTF(LEVEL_ERROR, "Error processing link info!\n"); 1519 return 1; 1520 } 1521 1522 pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 1523 1524 status = knvlinkPreTrainLinksToActiveAli(pGpu, pKernelNvlink, 1525 BIT(pNvlinkLink->linkId), NV_TRUE); 1526 if (status != NV_OK) 1527 { 1528 goto knvlinkCoreAliTrainingCallback_end; 1529 } 1530 1531 status = knvlinkTrainLinksToActiveAli(pGpu, pKernelNvlink, NVBIT(pNvlinkLink->linkId), NV_FALSE); 1532 1533 knvlinkCoreAliTrainingCallback_end: 1534 if (status != NV_OK) 1535 { 1536 NV_PRINTF(LEVEL_ERROR, 1537 "Failed to request Link %d to transition to active\n", pNvlinkLink->linkId); 1538 return 1; 1539 } 1540 1541 return 0; 1542 } 1543 1544 /*! 1545 * @brief nvlinkCoreGetUphyLoadCallback checks if uphy is locked 1546 * 1547 * @param[in] pGpu OBJGPU pointer 1548 * @param[in] link nvlink_link pointer 1549 * @param[out] bUnlocked Uphy is locked or unlocked 1550 */ 1551 void 1552 knvlinkCoreGetUphyLoadCallback 1553 ( 1554 nvlink_link *link, 1555 NvBool *bUnlocked 1556 ) 1557 { 1558 KNVLINK_RM_LINK *pNvlinkLink = (KNVLINK_RM_LINK *)link->link_info; 1559 OBJGPU *pGpu = pNvlinkLink->pGpu; 1560 KernelNvlink *pKernelNvlink = NULL; 1561 NV_STATUS status; 1562 NV2080_CTRL_NVLINK_CORE_CALLBACK_PARAMS params; 1563 NV2080_CTRL_NVLINK_CALLBACK_GET_UPHY_LOAD_PARAMS 1564 *pGetUphyLoadParams; 1565 1566 portMemSet(¶ms, 0, sizeof(params)); 1567 1568 if (pGpu == NULL) 1569 { 1570 NV_PRINTF(LEVEL_ERROR, "Error processing link info!\n"); 1571 return; 1572 } 1573 1574 pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 1575 1576 params.linkId = pNvlinkLink->linkId; 1577 params.callbackType.type = 1578 NV2080_CTRL_NVLINK_CALLBACK_TYPE_GET_UPHY_LOAD; 1579 1580 pGetUphyLoadParams = 1581 ¶ms.callbackType.callbackParams.getUphyLoad; 1582 1583 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1584 NV2080_CTRL_CMD_NVLINK_CORE_CALLBACK, 1585 (void *)¶ms, sizeof(params)); 1586 if (status != NV_OK) 1587 { 1588 NV_PRINTF(LEVEL_ERROR, "Error issuing NvLink Get Uphy Load callback!\n"); 1589 } 1590 1591 *bUnlocked = pGetUphyLoadParams->bUnlocked; 1592 } 1593 1594 #endif 1595