1 /******************************************************************************* 2 Copyright (c) 2019-2020 NVidia Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 *******************************************************************************/ 22 23 #include "nvlink.h" 24 #include "nvlink_export.h" 25 #include "nvlink_os.h" 26 #include "../nvlink_ctx.h" 27 #include "../nvlink_helper.h" 28 29 static void _nvlink_core_set_sublink_pre_hs_settings(nvlink_link *, NvU32); 30 static void _nvlink_core_set_link_pre_active_settings(nvlink_link *, NvU32); 31 static void _nvlink_core_set_link_post_active_settings(nvlink_link *, NvU32); 32 33 NvlStatus 34 nvlink_core_train_check_link_ready_ALI 35 ( 36 nvlink_link **links, 37 NvU32 linkCount 38 ) 39 { 40 NvU32 i = 0; 41 NvlStatus status = NVL_SUCCESS; 42 43 if (links == NULL) 44 { 45 return NVL_BAD_ARGS; 46 } 47 48 for (i = 0; i < linkCount; i++) 49 { 50 if (links[i] == NULL) 51 continue; 52 53 if (!nvlink_core_check_link_state(links[i], NVLINK_LINKSTATE_ALI)) 54 { 55 // If link is not in active, update status to be error and continue 56 status = NVL_ERR_GENERIC; 57 continue; 58 } 59 60 links[i]->link_handlers->training_complete(links[i]); 61 } 62 63 return status; 64 } 65 66 /** 67 * Link training 68 * Train the internode connection link from SWCFG to ACTIVE 69 * 70 * @param[in] conn NVLink connection pointer 71 * @param[in] isMasterEnd Is this the master end of the connection 72 * @param[in] flags Flags to track if training is sync/async 73 * 74 * return NVL_SUCCESS if the link trains successfully 75 */ 76 NvlStatus 77 nvlink_core_train_internode_conns_from_swcfg_to_active 78 ( 79 nvlink_internode_conn **conns, 80 NvU32 connCount, 81 NvU32 *isMasterEnd, 82 NvU32 flags 83 ) 84 { 85 NvlStatus status = NVL_SUCCESS; 86 NvU32 i; 87 NvBool skipConn[NVLINK_MAX_SYSTEM_LINK_NUM] = {0}; 88 89 if ((conns == NULL) || (connCount == 0) || (isMasterEnd == 0)) 90 { 91 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 92 "%s: No connections to train to ACTIVE\n", 93 __FUNCTION__)); 94 95 return NVL_BAD_ARGS; 96 } 97 98 for (i = 0; i < connCount; i++) 99 { 100 if (conns[i] == NULL) 101 continue; 102 103 // Don't do anything if the link is already at HS. 104 if ((nvlink_core_check_link_state(conns[i]->local_end, NVLINK_LINKSTATE_HS)) && 105 (nvlink_core_check_tx_sublink_state(conns[i]->local_end, 106 NVLINK_SUBLINK_STATE_TX_HS)) && 107 (nvlink_core_check_rx_sublink_state(conns[i]->local_end, 108 NVLINK_SUBLINK_STATE_RX_HS))) 109 { 110 // 111 // Note: On NVLink version < 3.0, bufferready is set prior to link state 112 // change to ACTIVE. So, return early. For NVLink version >= 3.0, 113 // bufferready is only set after link is ACTIVE. Hence, proceed to 114 // the subsequent code 115 // 116 if (conns[i]->local_end->version < NVLINK_DEVICE_VERSION_30) 117 { 118 skipConn[i] = NV_TRUE; 119 } 120 } 121 122 // 123 // For NVLink version < 3.0, we can train link to ACTIVE only when link is at 124 // SWCFG and sublink are at HS 125 // 126 if (conns[i]->local_end->version < NVLINK_DEVICE_VERSION_30) 127 { 128 if (!(nvlink_core_check_link_state(conns[i]->local_end, NVLINK_LINKSTATE_SAFE)) || 129 !(nvlink_core_check_tx_sublink_state(conns[i]->local_end, 130 NVLINK_SUBLINK_STATE_TX_HS)) || 131 !(nvlink_core_check_rx_sublink_state(conns[i]->local_end, 132 NVLINK_SUBLINK_STATE_RX_HS))) 133 { 134 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 135 "%s: Invalid link/sublink mode while training link to HIGH SPEED" 136 " %s:%s \n", 137 __FUNCTION__, 138 conns[i]->local_end->dev->deviceName, conns[i]->local_end->linkName)); 139 nvlink_core_print_link_state(conns[i]->local_end); 140 skipConn[i] = NV_TRUE; 141 } 142 } 143 } 144 145 for (i = 0; i < connCount; i++) 146 { 147 if ((conns[i] == NULL) || skipConn[i]) 148 { 149 continue; 150 } 151 152 _nvlink_core_set_link_pre_active_settings(conns[i]->local_end, flags); 153 154 // Change mode for master link. The other link end should transition to active. 155 if (isMasterEnd[i] == NV_TRUE) 156 { 157 conns[i]->local_end->link_handlers->set_dl_link_mode(conns[i]->local_end, 158 NVLINK_LINKSTATE_HS, 159 flags); 160 } 161 } 162 163 for (i = 0; i < connCount; i++) 164 { 165 if (conns[i] == NULL) 166 continue; 167 168 // Wait for the link state to change. 169 status = nvlink_core_poll_link_state(conns[i]->local_end, 170 NVLINK_LINKSTATE_HS, 171 NVLINK_TRANSITION_HS_TIMEOUT); 172 if (status != NVL_SUCCESS) 173 { 174 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 175 "%s: Unable to set link state to ACTIVE for link" 176 " %s:%s \n", 177 __FUNCTION__, 178 conns[i]->local_end->dev->deviceName, conns[i]->local_end->linkName)); 179 } 180 else 181 { 182 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_SETUP, 183 "%s: Successfully able to set link state to ACTIVE for link" 184 " %s:%s \n", 185 __FUNCTION__, 186 conns[i]->local_end->dev->deviceName, conns[i]->local_end->linkName)); 187 } 188 189 // Do all the miscellaneous settings once the link is trained to ACTIVE. 190 _nvlink_core_set_link_post_active_settings(conns[i]->local_end, flags); 191 } 192 193 // 194 // Always return success to FM on training failures 195 // FM will read link states to determine sucessfull training 196 // 197 return NVL_SUCCESS; 198 } 199 200 /** 201 * Link training 202 * Train the internode connection sublink to enter high speed 203 * 204 * @param[in] conn NVLink connection pointer 205 * @param[in] flags Flags to track if the training is sync/async 206 * 207 * return NVL_SUCCESS if the sublink trained successfully 208 */ 209 NvlStatus 210 nvlink_core_train_internode_conn_sublink_from_safe_to_hs 211 ( 212 nvlink_internode_conn *conn, 213 NvU32 flags 214 ) 215 { 216 NvlStatus status = NVL_SUCCESS; 217 218 if (conn == NULL) 219 { 220 return NVL_BAD_ARGS; 221 } 222 223 // NVLink 3.0 onwards this is handled through INITOPTIMIZE, return error 224 if (conn->local_end->version >= NVLINK_DEVICE_VERSION_30) 225 { 226 return NVL_ERR_NOT_SUPPORTED; 227 } 228 229 _nvlink_core_set_sublink_pre_hs_settings(conn->local_end, flags); 230 231 // don't do anything if the link is already at HS. 232 if ((nvlink_core_check_link_state(conn->local_end, NVLINK_LINKSTATE_HS)) && 233 (nvlink_core_check_tx_sublink_state(conn->local_end, 234 NVLINK_SUBLINK_STATE_TX_HS)) && 235 (nvlink_core_check_rx_sublink_state(conn->local_end, 236 NVLINK_SUBLINK_STATE_RX_HS))) 237 { 238 // both link and sublinks are at HS. don't do anything. 239 return NVL_SUCCESS; 240 } 241 242 // we can train sublink to HS only when link is at SWCFG. 243 if (!nvlink_core_check_link_state(conn->local_end, NVLINK_LINKSTATE_SAFE)) 244 { 245 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 246 "%s: Invalid link mode while training sublink to HIGH SPEED" 247 " %s:%s \n", 248 __FUNCTION__, 249 conn->local_end->dev->deviceName, conn->local_end->linkName)); 250 nvlink_core_print_link_state(conn->local_end); 251 return NVL_ERR_INVALID_STATE; 252 } 253 254 // tx sublink state must be in SAFE as well. 255 if (!nvlink_core_check_tx_sublink_state(conn->local_end, 256 NVLINK_SUBLINK_STATE_TX_SAFE)) 257 { 258 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 259 "%s: Invalid tx sublink mode while training sublink to HIGH SPEED" 260 " %s:%s \n", 261 __FUNCTION__, 262 conn->local_end->dev->deviceName, conn->local_end->linkName)); 263 nvlink_core_print_link_state(conn->local_end); 264 return NVL_ERR_INVALID_STATE; 265 } 266 267 // 268 // rx sublink state may be in SAFE mode or in HS, if the other end of the 269 // connection already toggled tx sublink mode to HS. 270 // 271 if (!((nvlink_core_check_rx_sublink_state(conn->local_end, 272 NVLINK_SUBLINK_STATE_RX_SAFE)) || 273 (nvlink_core_check_rx_sublink_state(conn->local_end, 274 NVLINK_SUBLINK_STATE_RX_HS)))) 275 { 276 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 277 "%s: Invalid rx sublink mode while training sublink to HIGH SPEED" 278 " %s:%s \n", 279 __FUNCTION__, 280 conn->local_end->dev->deviceName, conn->local_end->linkName)); 281 nvlink_core_print_link_state(conn->local_end); 282 return NVL_ERR_INVALID_STATE; 283 } 284 285 // Put TX sublink in HS 286 conn->local_end->link_handlers->set_tx_mode(conn->local_end, 287 NVLINK_SUBLINK_STATE_TX_HS, 288 flags); 289 290 // Wait for sublink to go in HS. 291 status = nvlink_core_poll_tx_sublink_state(conn->local_end, 292 NVLINK_SUBLINK_STATE_TX_HS, 293 NVLINK_SUBLINK_SUBSTATE_TX_STABLE, 294 NVLINK_TRANSITION_HS_TIMEOUT); 295 if (status != NVL_SUCCESS) 296 { 297 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 298 "%s: Unable to set sublink state to HIGH SPEED for link" 299 " %s:%s \n", 300 __FUNCTION__, 301 conn->local_end->dev->deviceName, conn->local_end->linkName)); 302 return status; 303 } 304 305 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_SETUP, 306 "%s:Successfully able to set sublink state to HIGH SPEED for link" 307 " %s:%s \n", 308 __FUNCTION__, 309 conn->local_end->dev->deviceName, conn->local_end->linkName)); 310 311 return status; 312 } 313 314 /** 315 * Train a given set of intranode connections from L2 to ACTIVE state 316 * 317 * @param[in] conns Array of connections to train 318 * @param[in] connCount Number of connections in the array 319 * @param[in] flags Flags to track if training is sync/async 320 * 321 * return NVL_SUCCESS if the connections train successfully 322 */ 323 NvlStatus 324 nvlink_core_train_intranode_conns_from_from_L2_to_active 325 ( 326 nvlink_intranode_conn **conns, 327 NvU32 connCount, 328 NvU32 flags 329 ) 330 { 331 NvlStatus status = NVL_SUCCESS; 332 NvU64 linkMode = NVLINK_LINKSTATE_OFF; 333 NvU32 i; 334 335 if ((conns == NULL) || (connCount == 0)) 336 { 337 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 338 "%s: No connections to exit L2\n", 339 __FUNCTION__)); 340 341 return NVL_ERR_GENERIC; 342 } 343 344 /**************** Start the L2 exit sequence for the connections ***************/ 345 346 // STEP 1: Reset all endpoints of the links. This clears any link state 347 for (i = 0; i < connCount; i++) 348 { 349 if (conns[i] == NULL) 350 continue; 351 352 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 353 NVLINK_LINKSTATE_RESET, 354 flags); 355 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 356 NVLINK_LINKSTATE_RESET, 357 flags); 358 } 359 360 // STEP 2: NVLink 3 and beyond, we also need to perform INITPHASE1 361 for (i = 0; i < connCount; i++) 362 { 363 if (conns[i] == NULL) 364 continue; 365 366 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 367 NVLINK_LINKSTATE_INITPHASE1, 368 flags); 369 if (conns[i]->end0 != conns[i]->end1) 370 { 371 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 372 NVLINK_LINKSTATE_INITPHASE1, 373 flags); 374 } 375 } 376 377 // Get link state on all endpoints. This ensures that NVLINK_LINKSTATE_INITPHASE1 completes 378 if (flags == NVLINK_STATE_CHANGE_ASYNC) 379 { 380 for (i = 0; i < connCount; i++) 381 { 382 if (conns[i] == NULL) 383 continue; 384 385 status = conns[i]->end0->link_handlers->get_dl_link_mode(conns[i]->end0, &linkMode); 386 if ((status != NVL_SUCCESS) || 387 (linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT)) 388 { 389 return status; 390 } 391 392 status = conns[i]->end1->link_handlers->get_dl_link_mode(conns[i]->end1, &linkMode); 393 if ((status != NVL_SUCCESS) || 394 (linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT)) 395 { 396 return status; 397 } 398 } 399 } 400 401 // Verify that all the endpoints are now in INIT state 402 for (i = 0; i < connCount; i++) 403 { 404 if (conns[i] == NULL) 405 continue; 406 407 status = nvlink_core_check_intranode_conn_state(conns[i], NVLINK_LINKSTATE_OFF); 408 if (status != NVL_SUCCESS) 409 { 410 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 411 "%s: Connection did not transition to INIT. ", 412 __FUNCTION__)); 413 nvlink_core_print_intranode_conn(conns[i]); 414 415 return status; 416 } 417 } 418 419 // STEP 3: Restore all end point state saved while entering SLEEP state 420 for (i = 0; i < connCount; i++) 421 { 422 if (conns[i] == NULL) 423 continue; 424 425 if (conns[i]->end0->bStateSaved) 426 { 427 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 428 NVLINK_LINKSTATE_RESTORE_STATE, 429 flags); 430 } 431 432 if (conns[i]->end1->bStateSaved) 433 { 434 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 435 NVLINK_LINKSTATE_RESTORE_STATE, 436 flags); 437 } 438 } 439 440 // Get link state on all endpoints. This ensures that NVLINK_LINKSTATE_RESTORE_STATE completes 441 if (flags == NVLINK_STATE_CHANGE_ASYNC) 442 { 443 for (i = 0; i < connCount; i++) 444 { 445 if (conns[i] == NULL) 446 continue; 447 448 status = conns[i]->end0->link_handlers->get_dl_link_mode(conns[i]->end0, &linkMode); 449 if ((status != NVL_SUCCESS) || 450 (linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT)) 451 { 452 return status; 453 } 454 455 status = conns[i]->end1->link_handlers->get_dl_link_mode(conns[i]->end1, &linkMode); 456 if ((status != NVL_SUCCESS) || 457 (linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT)) 458 { 459 return status; 460 } 461 } 462 } 463 464 // STEP 4: Initialize RX Termination on all end points 465 for (i = 0; i < connCount; i++) 466 { 467 if (conns[i] == NULL) 468 continue; 469 470 conns[i]->end0->link_handlers->set_rx_mode(conns[i]->end0, 471 NVLINK_SUBLINK_STATE_RX_INIT_TERM, 472 flags); 473 conns[i]->end1->link_handlers->set_rx_mode(conns[i]->end1, 474 NVLINK_SUBLINK_STATE_RX_INIT_TERM, 475 flags); 476 } 477 478 // Get link state on all endpoints. This ensures that NVLINK_SUBLINK_STATE_RX_INIT_TERM completes 479 if (flags == NVLINK_STATE_CHANGE_ASYNC) 480 { 481 for (i = 0; i < connCount; i++) 482 { 483 if (conns[i] == NULL) 484 continue; 485 486 status = conns[i]->end0->link_handlers->get_dl_link_mode(conns[i]->end0, &linkMode); 487 if ((status != NVL_SUCCESS) || 488 (linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT)) 489 { 490 return status; 491 } 492 493 status = conns[i]->end1->link_handlers->get_dl_link_mode(conns[i]->end1, &linkMode); 494 if ((status != NVL_SUCCESS) || 495 (linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT)) 496 { 497 return status; 498 } 499 } 500 } 501 502 // STEP 5: Enable Common mode on Tx's of all endpoints 503 for (i = 0; i < connCount; i++) 504 { 505 if (conns[i] == NULL) 506 continue; 507 508 if (!((conns[i]->end0->tx_sublink_state == NVLINK_SUBLINK_STATE_TX_COMMON_MODE) || 509 (conns[i]->end0->tx_sublink_state == NVLINK_SUBLINK_STATE_TX_COMMON_MODE_DISABLE) || 510 (conns[i]->end0->tx_sublink_state == NVLINK_SUBLINK_STATE_TX_DATA_READY))) 511 { 512 conns[i]->end0->link_handlers->set_tx_mode(conns[i]->end0, 513 NVLINK_SUBLINK_STATE_TX_COMMON_MODE, 514 flags); 515 } 516 if (!((conns[i]->end1->tx_sublink_state == NVLINK_SUBLINK_STATE_TX_COMMON_MODE) || 517 (conns[i]->end1->tx_sublink_state == NVLINK_SUBLINK_STATE_TX_COMMON_MODE_DISABLE) || 518 (conns[i]->end1->tx_sublink_state == NVLINK_SUBLINK_STATE_TX_DATA_READY))) 519 { 520 conns[i]->end1->link_handlers->set_tx_mode(conns[i]->end1, 521 NVLINK_SUBLINK_STATE_TX_COMMON_MODE, 522 flags); 523 } 524 } 525 526 // Get link state on all endpoints. This ensures that NVLINK_SUBLINK_STATE_TX_COMMON_MODE completes 527 if (flags == NVLINK_STATE_CHANGE_ASYNC) 528 { 529 for (i = 0; i < connCount; i++) 530 { 531 if (conns[i] == NULL) 532 continue; 533 534 status = conns[i]->end0->link_handlers->get_dl_link_mode(conns[i]->end0, &linkMode); 535 if ((status != NVL_SUCCESS) || 536 (linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT)) 537 { 538 return status; 539 } 540 541 status = conns[i]->end1->link_handlers->get_dl_link_mode(conns[i]->end1, &linkMode); 542 if ((status != NVL_SUCCESS) || 543 (linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT)) 544 { 545 return status; 546 } 547 } 548 } 549 550 // STEP 6: Put all Rx's in RXCAL 551 for (i = 0; i < connCount; i++) 552 { 553 if (conns[i] == NULL) 554 continue; 555 556 if (conns[i]->end0->rx_sublink_state != NVLINK_SUBLINK_STATE_RX_RXCAL) 557 { 558 conns[i]->end0->link_handlers->set_rx_mode(conns[i]->end0, 559 NVLINK_SUBLINK_STATE_RX_RXCAL, 560 flags); 561 } 562 if (conns[i]->end1->rx_sublink_state != NVLINK_SUBLINK_STATE_RX_RXCAL) 563 { 564 conns[i]->end1->link_handlers->set_rx_mode(conns[i]->end1, 565 NVLINK_SUBLINK_STATE_RX_RXCAL, 566 flags); 567 } 568 } 569 570 // STEP 7: Disable Tx common mode 571 for (i = 0; i < connCount; i++) 572 { 573 if (conns[i] == NULL) 574 continue; 575 576 if (!((conns[i]->end0->tx_sublink_state == NVLINK_SUBLINK_STATE_TX_COMMON_MODE_DISABLE) || 577 (conns[i]->end0->tx_sublink_state == NVLINK_SUBLINK_STATE_TX_DATA_READY))) 578 { 579 conns[i]->end0->link_handlers->set_tx_mode(conns[i]->end0, 580 NVLINK_SUBLINK_STATE_TX_COMMON_MODE_DISABLE, 581 flags); 582 } 583 if (!((conns[i]->end1->tx_sublink_state == NVLINK_SUBLINK_STATE_TX_COMMON_MODE_DISABLE) || 584 (conns[i]->end1->tx_sublink_state == NVLINK_SUBLINK_STATE_TX_DATA_READY))) 585 { 586 conns[i]->end1->link_handlers->set_tx_mode(conns[i]->end1, 587 NVLINK_SUBLINK_STATE_TX_COMMON_MODE_DISABLE, 588 flags); 589 } 590 } 591 592 // STEP 8: Set Data Ready and Enable 593 for (i = 0; i < connCount; i++) 594 { 595 if (conns[i] == NULL) 596 continue; 597 598 if (conns[i]->end0->tx_sublink_state != NVLINK_SUBLINK_STATE_TX_DATA_READY) 599 { 600 conns[i]->end0->link_handlers->set_tx_mode(conns[i]->end0, 601 NVLINK_SUBLINK_STATE_TX_DATA_READY, 602 flags); 603 } 604 if (conns[i]->end1->tx_sublink_state != NVLINK_SUBLINK_STATE_TX_DATA_READY) 605 { 606 conns[i]->end1->link_handlers->set_tx_mode(conns[i]->end1, 607 NVLINK_SUBLINK_STATE_TX_DATA_READY, 608 flags); 609 } 610 } 611 612 // Get link state on all endpoints. This ensures that NVLINK_SUBLINK_STATE_TX_DATA_READY completes 613 if (flags == NVLINK_STATE_CHANGE_ASYNC) 614 { 615 for (i = 0; i < connCount; i++) 616 { 617 if (conns[i] == NULL) 618 continue; 619 620 status = conns[i]->end0->link_handlers->get_dl_link_mode(conns[i]->end0, &linkMode); 621 if ((status != NVL_SUCCESS) || 622 (linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT)) 623 { 624 return status; 625 } 626 627 status = conns[i]->end1->link_handlers->get_dl_link_mode(conns[i]->end1, &linkMode); 628 if ((status != NVL_SUCCESS) || 629 (linkMode == NVLINK_LINKSTATE_FAIL) || (linkMode == NVLINK_LINKSTATE_FAULT)) 630 { 631 return status; 632 } 633 } 634 } 635 636 // STEP 9: Set link mode to SAFE 637 for (i = 0; i < connCount; i++) 638 { 639 if (conns[i] == NULL) 640 continue; 641 642 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 643 NVLINK_LINKSTATE_SAFE, 644 flags); 645 if (conns[i]->end0 != conns[i]->end1) 646 { 647 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 648 NVLINK_LINKSTATE_SAFE, 649 flags); 650 } 651 } 652 653 // Verify all the endpoints link state now reflect SAFE state 654 for (i = 0; i < connCount; i++) 655 { 656 if (conns[i] == NULL) 657 continue; 658 659 status = nvlink_core_poll_link_state(conns[i]->end0, 660 NVLINK_LINKSTATE_SAFE, 661 NVLINK_TRANSITION_SAFE_TIMEOUT); 662 if (status != NVL_SUCCESS) 663 { 664 return status; 665 } 666 667 status = nvlink_core_poll_link_state(conns[i]->end1, 668 NVLINK_LINKSTATE_SAFE, 669 NVLINK_TRANSITION_SAFE_TIMEOUT); 670 if (status != NVL_SUCCESS) 671 { 672 return status; 673 } 674 675 status = nvlink_core_poll_sublink_state(conns[i]->end0, 676 NVLINK_SUBLINK_STATE_TX_SAFE, 677 NVLINK_SUBLINK_SUBSTATE_TX_STABLE, 678 conns[i]->end1, 679 NVLINK_SUBLINK_STATE_RX_SAFE, 680 NVLINK_SUBLINK_SUBSTATE_RX_STABLE, 681 NVLINK_TRANSITION_SAFE_TIMEOUT); 682 if (status != NVL_SUCCESS) 683 { 684 return status; 685 } 686 687 status = nvlink_core_poll_sublink_state(conns[i]->end1, 688 NVLINK_SUBLINK_STATE_TX_SAFE, 689 NVLINK_SUBLINK_SUBSTATE_TX_STABLE, 690 conns[i]->end0, 691 NVLINK_SUBLINK_STATE_RX_SAFE, 692 NVLINK_SUBLINK_SUBSTATE_RX_STABLE, 693 NVLINK_TRANSITION_SAFE_TIMEOUT); 694 if (status != NVL_SUCCESS) 695 { 696 return status; 697 } 698 } 699 700 // STEP 9: Set INITNEOGOTIATE 701 for (i = 0; i < connCount; i++) 702 { 703 if (conns[i] == NULL) 704 continue; 705 706 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 707 NVLINK_LINKSTATE_INITNEGOTIATE, 708 flags); 709 if (conns[i]->end0 != conns[i]->end1) 710 { 711 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 712 NVLINK_LINKSTATE_INITNEGOTIATE, 713 flags); 714 } 715 } 716 717 // Bug 2398907 mentioned that a link pair can take upto 125us for DL stat to have CONFIG_GOOD. 718 nvlink_sleep(1); 719 720 // STEP 8: Set POST_INITNEGOTIATE 721 for (i = 0; i < connCount; i++) 722 { 723 if (conns[i] == NULL) 724 continue; 725 726 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 727 NVLINK_LINKSTATE_POST_INITNEGOTIATE, 728 flags); 729 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 730 NVLINK_LINKSTATE_POST_INITNEGOTIATE, 731 flags); 732 } 733 734 if (connCount != 0) 735 { 736 // STEP 11: Train the sublinks to HS and links to ACTIVE 737 if (conns[0]->end0->version >= NVLINK_DEVICE_VERSION_30) 738 { 739 // NVLink 3.0+ ALT training 740 status = nvlink_core_train_intranode_conns_from_swcfg_to_active_ALT(conns, 741 connCount, 742 flags); 743 } 744 else 745 { 746 // Legacy training 747 status = nvlink_core_train_intranode_conns_from_swcfg_to_active_legacy(conns, 748 connCount, 749 flags); 750 } 751 752 if (status != NVL_SUCCESS) 753 { 754 return status; 755 } 756 757 for (i = 0; i < connCount; i++) 758 { 759 if (conns[i] == NULL) 760 continue; 761 762 // Update the power state transition status of the link 763 conns[i]->end0->powerStateTransitionStatus = nvlink_power_state_in_L0; 764 conns[i]->end1->powerStateTransitionStatus = nvlink_power_state_in_L0; 765 } 766 } 767 768 /***************** End of L2 exit sequence for the connections *****************/ 769 770 return status; 771 } 772 773 /** 774 * Train intranode connections associated with a list of links to HS 775 * using non-ALI sequence 776 * 777 * @param[in] conns Array of connections to train 778 * @param[in] connCount Number of connections in the array 779 * @param[in] flags Flags to track if training is sync/async 780 * 781 * return NVL_SUCCESS if the connections train successfully 782 */ 783 NvlStatus 784 nvlink_core_train_intranode_conns_from_swcfg_to_active_non_ALI 785 ( 786 nvlink_intranode_conn **conns, 787 NvU32 connCount, 788 NvU32 flags 789 ) 790 { 791 NvlStatus status = NVL_SUCCESS; 792 NvlStatus pollStatus = NVL_SUCCESS; 793 NvU32 i; 794 795 if ((conns == NULL) || (connCount == 0)) 796 { 797 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 798 "%s: No connections to train to ACTIVE\n", 799 __FUNCTION__)); 800 801 return NVL_ERR_GENERIC; 802 } 803 804 // Trigger INITOPTIMIZE on both ends of the connection 805 for (i = 0; i < connCount; i++) 806 { 807 if (conns[i] == NULL) 808 continue; 809 810 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 811 NVLINK_LINKSTATE_INITOPTIMIZE, 812 flags); 813 814 // On loopback, only send INITOPTIMIZE to one side. 815 if (conns[i]->end0 != conns[i]->end1) 816 { 817 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 818 NVLINK_LINKSTATE_INITOPTIMIZE, 819 flags); 820 } 821 } 822 823 // Trigger POST_INITOPTIMIZE (Checks INITOPTIMIZE was successful) on both ends of the connection 824 for (i = 0; i < connCount; i++) 825 { 826 if (conns[i] == NULL) 827 continue; 828 829 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 830 NVLINK_LINKSTATE_POST_INITOPTIMIZE, 831 flags); 832 833 // On loopback, only send POST_INITOPTIMIZE to one side. 834 if (conns[i]->end0 != conns[i]->end1) 835 { 836 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 837 NVLINK_LINKSTATE_POST_INITOPTIMIZE, 838 flags); 839 } 840 } 841 842 // Set link modes to ACTIVE 843 for (i = 0; i < connCount; i++) 844 { 845 if (conns[i] == NULL) 846 continue; 847 848 // Some settings required before moving to ACTIVE 849 _nvlink_core_set_link_pre_active_settings(conns[i]->end0, flags); 850 _nvlink_core_set_link_pre_active_settings(conns[i]->end1, flags); 851 852 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 853 NVLINK_LINKSTATE_HS, 854 flags); 855 856 // If not in loopback send the dl link mode 857 if (conns[i]->end0 != conns[i]->end1) 858 { 859 860 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 861 NVLINK_LINKSTATE_HS, 862 flags); 863 } 864 865 } 866 867 // Verify link mode HS on the endpoints 868 for (i = 0; i < connCount; i++) 869 { 870 if (conns[i] == NULL) 871 continue; 872 873 pollStatus = nvlink_core_poll_link_state(conns[i]->end0, 874 NVLINK_LINKSTATE_HS, 875 NVLINK_TRANSITION_HS_TIMEOUT); 876 if (pollStatus != NVL_SUCCESS) 877 { 878 status = pollStatus; 879 } 880 881 pollStatus = nvlink_core_poll_link_state(conns[i]->end1, 882 NVLINK_LINKSTATE_HS, 883 NVLINK_TRANSITION_HS_TIMEOUT); 884 if (pollStatus != NVL_SUCCESS) 885 { 886 status = pollStatus; 887 } 888 889 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 890 NVLINK_LINKSTATE_INITTL, 891 flags); 892 893 // On loopback, only send once 894 if (conns[i]->end0 != conns[i]->end1) 895 { 896 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 897 NVLINK_LINKSTATE_INITTL, 898 flags); 899 } 900 901 conns[i]->end0->link_handlers->training_complete(conns[i]->end0); 902 903 // On loopback, only send once 904 if (conns[i]->end0 != conns[i]->end1) 905 { 906 conns[i]->end1->link_handlers->training_complete(conns[i]->end1); 907 } 908 909 conns[i]->end0->link_handlers->set_tx_mode(conns[i]->end0, 910 NVLINK_SUBLINK_STATE_TX_POST_HS, 911 flags); 912 // On loopback, only send once 913 if (conns[i]->end0 != conns[i]->end1) 914 { 915 conns[i]->end1->link_handlers->set_tx_mode(conns[i]->end1, 916 NVLINK_SUBLINK_STATE_TX_POST_HS, 917 flags); 918 } 919 920 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 921 NVLINK_LINKSTATE_TRAFFIC_SETUP, 922 flags); 923 // On loopback, only send once 924 if (conns[i]->end0 != conns[i]->end1) 925 { 926 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 927 NVLINK_LINKSTATE_TRAFFIC_SETUP, 928 flags); 929 } 930 } 931 932 return status; 933 } 934 935 /** 936 * Train intranode connections associated with a list of links to HS 937 * using non-ALI sequence 938 * 939 * @param[in] links Array of links to train 940 * @param[in] numLinks Number of links in the array 941 * 942 * return NVL_SUCCESS if the connections train successfully 943 */ 944 NvlStatus 945 nvlink_core_train_intranode_conns_from_off_to_active_ALI 946 ( 947 nvlink_link **pLinks, 948 NvU32 numLinks 949 ) 950 { 951 NvlStatus status = NVL_SUCCESS; 952 NvlStatus returnStatus = NVL_SUCCESS; 953 NvU32 i; 954 955 if ((pLinks == NULL) || (numLinks == 0)) 956 { 957 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 958 "%s: No links to train to ACTIVE\n", 959 __FUNCTION__)); 960 961 return NVL_ERR_GENERIC; 962 } 963 964 for (i = 0; i < numLinks; ++i) 965 { 966 if (pLinks[i] == NULL) 967 continue; 968 969 status = pLinks[i]->link_handlers->ali_training(pLinks[i]); 970 if (status != NVL_SUCCESS) 971 { 972 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 973 "%s: failed to send ALI link training on link 0x%x\n", 974 __FUNCTION__, pLinks[i]->linkNumber)); 975 returnStatus = status; 976 } 977 } 978 979 return returnStatus; 980 } 981 982 /** 983 * Train intranode connections associated with a list of links to HS 984 * using ALT sequence 985 * 986 * @param[in] conns Array of connections to train 987 * @param[in] connCount Number of connections in the array 988 * @param[in] flags Flags to track if training is sync/async 989 * 990 * return NVL_SUCCESS if the connections train successfully 991 */ 992 NvlStatus 993 nvlink_core_train_intranode_conns_from_swcfg_to_active_ALT 994 ( 995 nvlink_intranode_conn **conns, 996 NvU32 connCount, 997 NvU32 flags 998 ) 999 { 1000 NvlStatus status = NVL_SUCCESS; 1001 NvlStatus pollStatus = NVL_SUCCESS; 1002 NvU64 linkMode = NVLINK_LINKSTATE_OFF; 1003 NvU32 i; 1004 NvBool skipConn[NVLINK_MAX_SYSTEM_LINK_NUM] = {0}; 1005 1006 if ((conns == NULL) || (connCount == 0)) 1007 { 1008 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 1009 "%s: No connections to train to ACTIVE\n", 1010 __FUNCTION__)); 1011 1012 return NVL_ERR_GENERIC; 1013 } 1014 1015 for (i = 0; i < connCount; i++) 1016 { 1017 if (conns[i] == NULL) 1018 continue; 1019 1020 status = conns[i]->end0->link_handlers->get_dl_link_mode(conns[i]->end0, &linkMode); 1021 if (status != NVL_SUCCESS) 1022 { 1023 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 1024 "%s: Unable to get link mode for %s:%s\n", 1025 __FUNCTION__, conns[i]->end0->dev->deviceName, conns[i]->end0->linkName)); 1026 continue; 1027 } 1028 1029 // 1030 // Skip training if links are in HS 1031 // Only checking end0 here because HS implies both sides are HS 1032 // 1033 if (linkMode == NVLINK_LINKSTATE_HS) 1034 { 1035 skipConn[i] = NV_TRUE; 1036 } 1037 } 1038 1039 // Trigger INITOPTIMIZE on both ends of the connection 1040 for (i = 0; i < connCount; i++) 1041 { 1042 if ((conns[i] == NULL) || skipConn[i]) 1043 { 1044 continue; 1045 } 1046 1047 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 1048 NVLINK_LINKSTATE_INITOPTIMIZE, 1049 flags); 1050 1051 // On loopback, only send INITOPTIMIZE to one side. 1052 if (conns[i]->end0 != conns[i]->end1) 1053 { 1054 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 1055 NVLINK_LINKSTATE_INITOPTIMIZE, 1056 flags); 1057 } 1058 } 1059 1060 // Trigger POST_INITOPTIMIZE (Checks INITOPTIMIZE was successful) on both ends of the connection 1061 for (i = 0; i < connCount; i++) 1062 { 1063 if ((conns[i] == NULL) || skipConn[i]) 1064 { 1065 continue; 1066 } 1067 1068 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 1069 NVLINK_LINKSTATE_POST_INITOPTIMIZE, 1070 flags); 1071 1072 // On loopback, only send POST_INITOPTIMIZE to one side. 1073 if (conns[i]->end0 != conns[i]->end1) 1074 { 1075 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 1076 NVLINK_LINKSTATE_POST_INITOPTIMIZE, 1077 flags); 1078 } 1079 } 1080 1081 // Set link modes to ACTIVE 1082 for (i = 0; i < connCount; i++) 1083 { 1084 if ((conns[i] == NULL) || skipConn[i]) 1085 { 1086 continue; 1087 } 1088 1089 // Some settings required before moving to ACTIVE 1090 _nvlink_core_set_link_pre_active_settings(conns[i]->end0, flags); 1091 _nvlink_core_set_link_pre_active_settings(conns[i]->end1, flags); 1092 1093 // 1094 // Put only end0 in ACTIVE mode. The other end should automatically go to Active. 1095 // If it does not go to ACTIVE then we need to do fault handling. 1096 // 1097 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 1098 NVLINK_LINKSTATE_HS, 1099 flags); 1100 } 1101 1102 // Verify link mode HS on the endpoints 1103 for (i = 0; i < connCount; i++) 1104 { 1105 if ((conns[i] == NULL) || skipConn[i]) 1106 { 1107 continue; 1108 } 1109 1110 pollStatus = nvlink_core_poll_link_state(conns[i]->end1, 1111 NVLINK_LINKSTATE_HS, 1112 NVLINK_TRANSITION_HS_TIMEOUT); 1113 if (pollStatus != NVL_SUCCESS) 1114 { 1115 status = pollStatus; 1116 } 1117 else 1118 { 1119 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO, 1120 "%s: Successfully able to set linkstate to ACTIVE for links" 1121 " %s:%s<->%s:%s\n", 1122 __FUNCTION__, 1123 conns[i]->end0->dev->deviceName, conns[i]->end0->linkName, 1124 conns[i]->end1->dev->deviceName, conns[i]->end1->linkName)); 1125 } 1126 1127 conns[i]->end0->link_handlers->training_complete(conns[i]->end0); 1128 1129 // On loopback, only send once 1130 if (conns[i]->end0 != conns[i]->end1) 1131 { 1132 conns[i]->end1->link_handlers->training_complete(conns[i]->end1); 1133 } 1134 1135 conns[i]->end0->link_handlers->set_tx_mode(conns[i]->end0, 1136 NVLINK_SUBLINK_STATE_TX_POST_HS, 1137 flags); 1138 // On loopback, only send once 1139 if (conns[i]->end0 != conns[i]->end1) 1140 { 1141 conns[i]->end1->link_handlers->set_tx_mode(conns[i]->end1, 1142 NVLINK_SUBLINK_STATE_TX_POST_HS, 1143 flags); 1144 } 1145 1146 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 1147 NVLINK_LINKSTATE_TRAFFIC_SETUP, 1148 flags); 1149 // On loopback, only send once 1150 if (conns[i]->end0 != conns[i]->end1) 1151 { 1152 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 1153 NVLINK_LINKSTATE_TRAFFIC_SETUP, 1154 flags); 1155 } 1156 1157 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 1158 NVLINK_LINKSTATE_ENABLE_PM, 1159 flags); 1160 // On loopback, only send once 1161 if (conns[i]->end0 != conns[i]->end1) 1162 { 1163 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 1164 NVLINK_LINKSTATE_ENABLE_PM, 1165 flags); 1166 } 1167 } 1168 1169 return status; 1170 } 1171 1172 /** 1173 * Train a single intranode connection associated with a list of links to HS 1174 * using legacy pre-Ampere sequence 1175 * 1176 * @param[in] conns Array of connections to train 1177 * @param[in] connCount Number of connections in the array 1178 * @param[in] flags Flags to track if training is sync/async 1179 * 1180 * return NVL_SUCCESS if the connections train successfully 1181 */ 1182 NvlStatus 1183 nvlink_core_train_intranode_conns_from_swcfg_to_active_legacy 1184 ( 1185 nvlink_intranode_conn **conns, 1186 NvU32 connCount, 1187 NvU32 flags 1188 ) 1189 { 1190 NvlStatus status = NVL_SUCCESS; 1191 NvlStatus pollStatus = NVL_SUCCESS; 1192 NvU32 i; 1193 1194 if ((conns == NULL) || (connCount == 0)) 1195 { 1196 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 1197 "%s: No connections to train to ACTIVE\n", 1198 __FUNCTION__)); 1199 1200 return NVL_ERR_GENERIC; 1201 } 1202 1203 // Enable PRBS generator on both ends of the link 1204 for (i = 0; i < connCount; i++) 1205 { 1206 if (conns[i] == NULL) 1207 continue; 1208 1209 _nvlink_core_set_sublink_pre_hs_settings(conns[i]->end0, flags); 1210 _nvlink_core_set_sublink_pre_hs_settings(conns[i]->end1, flags); 1211 } 1212 1213 // Put TX sublink on both ends in High Speed 1214 for (i = 0; i < connCount; i++) 1215 { 1216 if (conns[i] == NULL) 1217 continue; 1218 1219 conns[i]->end0->link_handlers->set_tx_mode(conns[i]->end0, 1220 NVLINK_SUBLINK_STATE_TX_HS, 1221 flags); 1222 conns[i]->end1->link_handlers->set_tx_mode(conns[i]->end1, 1223 NVLINK_SUBLINK_STATE_TX_HS, 1224 flags); 1225 } 1226 1227 // Wait for sublinks to go in High Speed. 1228 for (i = 0; i < connCount; i++) 1229 { 1230 if (conns[i] == NULL) 1231 continue; 1232 1233 pollStatus = nvlink_core_poll_sublink_state(conns[i]->end0, 1234 NVLINK_SUBLINK_STATE_TX_HS, 1235 NVLINK_SUBLINK_SUBSTATE_TX_STABLE, 1236 conns[i]->end1, 1237 NVLINK_SUBLINK_STATE_RX_HS, 1238 NVLINK_SUBLINK_SUBSTATE_RX_STABLE, 1239 NVLINK_TRANSITION_HS_TIMEOUT); 1240 if (pollStatus != NVL_SUCCESS) 1241 { 1242 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 1243 "%s: Unable to set sublinks in High Speed mode", 1244 __FUNCTION__)); 1245 1246 status = pollStatus; 1247 } 1248 1249 pollStatus = nvlink_core_poll_sublink_state(conns[i]->end1, 1250 NVLINK_SUBLINK_STATE_TX_HS, 1251 NVLINK_SUBLINK_SUBSTATE_TX_STABLE, 1252 conns[i]->end0, 1253 NVLINK_SUBLINK_STATE_RX_HS, 1254 NVLINK_SUBLINK_SUBSTATE_RX_STABLE, 1255 NVLINK_TRANSITION_HS_TIMEOUT); 1256 if (pollStatus != NVL_SUCCESS) 1257 { 1258 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 1259 "%s: Unable to set sublinks in High Speed mode", 1260 __FUNCTION__)); 1261 1262 status = pollStatus; 1263 } 1264 } 1265 1266 // Some settings required before moving to ACTIVE 1267 for (i = 0; i < connCount; i++) 1268 { 1269 if (conns[i] == NULL) 1270 continue; 1271 1272 _nvlink_core_set_link_pre_active_settings(conns[i]->end0, flags); 1273 _nvlink_core_set_link_pre_active_settings(conns[i]->end1, flags); 1274 1275 // 1276 // Put only end0 in ACTIVE mode. The other end should automatically go to Active. 1277 // If it does not go to ACTIVE then we need to do fault handling. 1278 // 1279 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 1280 NVLINK_LINKSTATE_HS, 1281 flags); 1282 } 1283 1284 // Verify link mode HS on the endpoints 1285 for (i = 0; i < connCount; i++) 1286 { 1287 if (conns[i] == NULL) 1288 continue; 1289 1290 pollStatus = nvlink_core_poll_link_state(conns[i]->end1, 1291 NVLINK_LINKSTATE_HS, 1292 NVLINK_TRANSITION_HS_TIMEOUT); 1293 if (pollStatus == NVL_SUCCESS) 1294 { 1295 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_INFO, 1296 "%s: Successfully able to train an intranode connection to Active", 1297 __FUNCTION__)); 1298 nvlink_core_print_intranode_conn(conns[i]); 1299 } 1300 else 1301 { 1302 NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS, 1303 "%s: Unable to train an intranode connection to Active", 1304 __FUNCTION__)); 1305 1306 nvlink_core_print_intranode_conn(conns[i]); 1307 status = NVL_ERR_GENERIC; 1308 } 1309 1310 conns[i]->end0->link_handlers->training_complete(conns[i]->end0); 1311 conns[i]->end1->link_handlers->training_complete(conns[i]->end1); 1312 1313 conns[i]->end0->link_handlers->set_tx_mode(conns[i]->end0, 1314 NVLINK_SUBLINK_STATE_TX_POST_HS, 1315 flags); 1316 conns[i]->end1->link_handlers->set_tx_mode(conns[i]->end1, 1317 NVLINK_SUBLINK_STATE_TX_POST_HS, 1318 flags); 1319 1320 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 1321 NVLINK_LINKSTATE_TRAFFIC_SETUP, 1322 flags); 1323 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 1324 NVLINK_LINKSTATE_TRAFFIC_SETUP, 1325 flags); 1326 1327 conns[i]->end0->link_handlers->set_dl_link_mode(conns[i]->end0, 1328 NVLINK_LINKSTATE_ENABLE_PM, 1329 flags); 1330 conns[i]->end1->link_handlers->set_dl_link_mode(conns[i]->end1, 1331 NVLINK_LINKSTATE_ENABLE_PM, 1332 flags); 1333 } 1334 1335 return status; 1336 } 1337 1338 /** 1339 * Miscellaneous pre High Speed settings. 1340 * Do all the sublink specific settings before it is trained to HS mode 1341 * 1342 * @param[in] link NVLink Link pointer 1343 * @param[in] flags Flags to track if the step is sync/async 1344 */ 1345 static void 1346 _nvlink_core_set_sublink_pre_hs_settings 1347 ( 1348 nvlink_link *link, 1349 NvU32 flags 1350 ) 1351 { 1352 if (link == NULL) 1353 return; 1354 1355 // 1356 // Before training the sublinks to HS, the PROD values must be loaded. 1357 // On Volta/NVSwitch, the PROD values get loaded by UCODE during DLPL Init. 1358 // So, this PRBS setting is not a prerequisite for High Speed transition. 1359 // However, for GP100, these values and several other RX end parameters get 1360 // loaded as part of PRBS enable. If these values are not initialized, then 1361 // the RX end of the link won't transition to High Speed. 1362 // 1363 1364 // Enable PRBS generator 1365 link->link_handlers->set_tx_mode(link, NVLINK_SUBLINK_STATE_TX_PRBS_EN, flags); 1366 } 1367 1368 /** 1369 * Miscellaneous pre Active mode settings 1370 * Do all the link specific settings before it is trained to Active mode. 1371 * 1372 * @param[in] link NVLink Link pointer 1373 * @param[in] flags Flags to track if the step is sync/async 1374 */ 1375 static void 1376 _nvlink_core_set_link_pre_active_settings 1377 ( 1378 nvlink_link *link, 1379 NvU32 flags 1380 ) 1381 { 1382 if (link == NULL) 1383 return; 1384 1385 // Some settings required before moving to ACTIVE 1386 link->link_handlers->set_dl_link_mode(link, NVLINK_LINKSTATE_PRE_HS, flags); 1387 } 1388 1389 /** 1390 * Miscellaneous post Active mode settings 1391 * Do all the link specific settings once it is trained to Active mode. 1392 * 1393 * @param[in] link NVLink Link pointer 1394 * @param[in] flags Flags to track if the step is sync/async 1395 */ 1396 static void 1397 _nvlink_core_set_link_post_active_settings 1398 ( 1399 nvlink_link *link, 1400 NvU32 flags 1401 ) 1402 { 1403 if (link == NULL) 1404 return; 1405 1406 link->link_handlers->training_complete(link); 1407 1408 link->link_handlers->set_tx_mode(link, NVLINK_SUBLINK_STATE_TX_POST_HS, flags); 1409 1410 link->link_handlers->set_dl_link_mode(link, NVLINK_LINKSTATE_TRAFFIC_SETUP, flags); 1411 1412 link->link_handlers->set_dl_link_mode(link, NVLINK_LINKSTATE_ENABLE_PM, flags); 1413 } 1414