1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "gpu/gpu.h" 25 #include "gpu/ce/kernel_ce.h" 26 #include "gpu/nvlink/kernel_nvlink.h" 27 #include "gpu/ce/kernel_ce_private.h" 28 #include "gpu/bif/kernel_bif.h" 29 #include "platform/chipset/chipset.h" 30 31 #include "published/hopper/gh100/dev_ce.h" 32 #include "published/hopper/gh100/dev_xtl_ep_pcfg_gpu.h" 33 34 #define NV_CE_INVALID_TOPO_IDX 0xFFFF 35 36 // Defines for PCE-LCE mapping algorithm 37 #define NV_CE_MAX_HSHUBS 5 38 #define NV_CE_LCE_MASK_INIT 0xFFFFFFFF 39 #define NV_CE_GRCE_ALLOWED_LCE_MASK 0x03 40 #define NV_CE_MAX_GRCE 2 41 #define NV_CE_EVEN_ASYNC_LCE_MASK 0x55555550 42 #define NV_CE_ODD_ASYNC_LCE_MASK 0xAAAAAAA0 43 #define NV_CE_MAX_LCE_MASK 0x3FF 44 #define NV_CE_PCE_PER_HSHUB 4 45 #define NV_CE_NUM_FBPCE 4 46 #define NV_CE_NUM_PCES_NO_LINK_CASE 12 47 #define NV_CE_MAX_PCE_PER_GRCE 2 48 #define NV_CE_HSHUBNVL_ID_0 2 49 50 /* 51 * Table for setting the PCE2LCE mapping for WAR configs that cannot be implemented 52 * using the algorithm because the config does not conform to the algorithm's set 53 * of requirements/assumptions 54 */ 55 static NVLINK_CE_AUTO_CONFIG_TABLE nvLinkCeAutoConfigTable_GH100[] = 56 { 57 // 58 // #systmem #max #peers Symmetric Switch PCE-LCE GRCE exposeCe 59 // links (links/peer) Config? Config Map Config Mask 60 61 // Default minimal configuration - NOTE: do not add entrys before this 62 // Default is for CG1 63 {0x0, 0x0, 0x0, NV_FALSE, NV_FALSE, {0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6, 64 0x4,0x4,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 65 {0x0, 0x0, 0x0, NV_TRUE, NV_FALSE, {0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6, 66 0x4,0x4,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 67 68 // Switch cases - Ranger mapping 69 {0x0, 0x12, 0x1, NV_TRUE, NV_TRUE, {0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6, 70 0x4,0x4,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 71 {0x0, 0x6, 0x1, NV_TRUE, NV_TRUE, {0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6, 72 0x4,0x4,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 73 // CG4 mapping 74 {0x0, 0x6, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 75 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 76 {0x0, 0x6, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 77 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 78 {0x0, 0x6, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 79 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 80 {0x0, 0x6, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 81 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 82 {0x0, 0x6, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 83 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 84 {0x0, 0x6, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 85 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 86 {0x0, 0x5, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 87 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 88 {0x0, 0x5, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 89 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 90 {0x0, 0x5, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 91 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 92 {0x0, 0x5, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 93 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 94 {0x0, 0x5, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 95 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 96 {0x0, 0x5, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 97 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 98 {0x0, 0x4, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 99 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 100 {0x0, 0x4, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 101 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 102 {0x0, 0x4, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 103 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 104 {0x0, 0x4, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 105 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 106 {0x0, 0x4, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 107 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 108 {0x0, 0x4, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 109 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 110 {0x0, 0x3, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 111 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 112 {0x0, 0x3, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 113 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 114 {0x0, 0x3, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 115 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 116 {0x0, 0x3, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 117 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 118 {0x0, 0x3, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 119 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 120 {0x0, 0x3, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 121 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 122 {0x0, 0x2, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 123 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 124 {0x0, 0x2, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 125 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 126 {0x0, 0x2, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 127 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 128 {0x0, 0x2, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 129 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 130 {0x0, 0x2, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 131 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 132 {0x0, 0x2, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 133 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 134 {0x0, 0x1, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 135 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 136 {0x0, 0x1, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 137 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 138 {0x0, 0x1, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 139 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 140 {0x0, 0x1, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 141 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 142 {0x0, 0x1, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 143 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 144 {0x0, 0x1, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 145 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13} 146 }; 147 148 /*! 149 * @brief Returns the size of the PCE2LCE register array 150 * 151 * 152 * @param[in] pGpu OBJGPU pointer 153 * @param[in] pCe OBJCE pointer 154 * 155 * @return NV_CE_PCE2LCE_CONFIG__SIZE_1 156 * 157 */ 158 NvU32 159 kceGetPce2lceConfigSize1_GH100 160 ( 161 KernelCE *pKCe 162 ) 163 { 164 return NV_CE_PCE2LCE_CONFIG__SIZE_1; 165 } 166 167 /** 168 * @brief This function takes in a link mask and returns the minimum number 169 * of PCE connections required. This is decided based on a round up approach 170 * where each PCE can handle 1.5 links. 171 */ 172 NvU32 173 kceGetNumPceRequired 174 ( 175 NvU32 numLinks 176 ) 177 { 178 switch(numLinks) 179 { 180 case 6: 181 return 4; 182 case 5: 183 case 4: 184 return 3; 185 case 3: 186 return 2; 187 case 2: 188 case 1: 189 default: 190 return 1; 191 } 192 } 193 194 /* 195 * Look up entry in NVLINK_CE_AUTO_CONFIG_TABLE 196 * 197 * @param[in] pGpu OBJGPU pointer 198 * @param[in] pCe OBJCE pointer 199 * @param[in] pCurrentTopo NVLINK_TOPOLOGY_INFO pointer 200 * @param[in] pAutoConfigTable NVLINK_CE_AUTO_CONFIG_TABLE pointer 201 * @param[in] autoConfigNumEntries NvU32 num entries within pAutoConfigTable 202 * @param[out] pIdx NvU32 pointer 203 * @param[out] pExposeCeMask NvU32 pointer 204 * 205 * Returns: NV_TRUE if entry is found 206 * NV_FALSE otheriwse 207 */ 208 NvBool 209 kceGetAutoConfigTableEntry_GH100 210 ( 211 OBJGPU *pGpu, 212 KernelCE *pKCe, 213 NVLINK_TOPOLOGY_PARAMS *pCurrentTopo, 214 NVLINK_CE_AUTO_CONFIG_TABLE *pTable, 215 NvU32 autoConfigNumEntries, 216 NvU32 *pIdx, 217 NvU32 *pExposeCeMask 218 ) 219 { 220 NvU32 i; 221 222 // 223 // The auto config table entries will only be applicable 224 // from this function in SHH cases. Rather than 225 // introduced a new entry in the table to note SHH, 226 // in order to preserve backwards compatibility this 227 // function will only attempt to map if we are confirmed 228 // to be in SHH path. 229 // 230 if (!gpuIsSelfHosted(pGpu)) 231 { 232 return NV_FALSE; 233 } 234 235 for (i = 0; i < autoConfigNumEntries; i++) 236 { 237 if ((pTable[i].sysmemLinks == pCurrentTopo->sysmemLinks ) && 238 (pTable[i].maxLinksPerPeer == pCurrentTopo->maxLinksPerPeer) && 239 (pTable[i].bSymmetric == pCurrentTopo->bSymmetric ) && 240 (pTable[i].bSwitchConfig == pCurrentTopo->bSwitchConfig ) && 241 (pTable[i].numPeers == pCurrentTopo->numPeers )) 242 { 243 *pIdx = i; 244 *pExposeCeMask = pTable[i].exposeCeMask; 245 return NV_TRUE; 246 } 247 } 248 return NV_FALSE; 249 } 250 251 /** 252 * @brief This function returns the pceIndex for a particular link ID 253 * Must always be called with the hshub ID for the calling link ID 254 * 255 * @param[in] pGpu OBJGPU pointer 256 * @param[in] pKCe KernelCE pointer 257 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB 258 * @param[out] pceIndex Pointer to caller pceIndex 259 * @param[out] pHshubId Pointer to caller HSHUB ID 260 */ 261 static void 262 _ceGetAlgorithmPceIndex 263 ( 264 OBJGPU *pGpu, 265 KernelCE *pKCe, 266 NvU32 *pceAvailableMaskPerHshub, 267 NvU32 *pceIndex, 268 NvU8 *pHshubId 269 ) 270 { 271 NvU8 pHshubIdRequested; 272 NvU32 i; 273 274 if ((pceIndex != NULL) && *pceIndex >= kceGetPce2lceConfigSize1_HAL(pKCe)) 275 { 276 NV_PRINTF(LEVEL_ERROR, "Invalid PCE request. pceIndex = %d pceCnt = %d\n", *pceIndex, kceGetPce2lceConfigSize1_HAL(pKCe)); 277 return; 278 } 279 280 if (!(NVBIT32(*pceIndex) & pceAvailableMaskPerHshub[*pHshubId])) 281 { 282 // 283 // 1. We couldn't find an applicable strided PCE in given HSHUB 284 // So, we'll assign the next consecutive PCE on the same HSHUB 285 // 286 *pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[*pHshubId]); 287 if (!(NVBIT32(*pceIndex) & pceAvailableMaskPerHshub[*pHshubId])) 288 { 289 // 2. If this is not a valid PCE on given HSHUB, assign PCE from alternative HSHUB 290 pHshubIdRequested = *pHshubId; 291 for (i = pHshubIdRequested + 1; i != pHshubIdRequested; i++) { 292 if (i > 4) { 293 i = 1; 294 continue; 295 } 296 297 *pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[i]); 298 if (NVBIT32(*pceIndex) & pceAvailableMaskPerHshub[i]) { 299 break; 300 } 301 } 302 303 if (i == pHshubIdRequested) 304 { 305 // If we've reached this point, then we have no more available PCEs to assign 306 NV_PRINTF(LEVEL_ERROR, "No more available PCEs to assign!\n"); 307 NV_ASSERT(0); 308 } 309 } 310 } 311 return; 312 } 313 314 /** 315 * @brief This function assigns LCE 2 and 3 mappings for C2C cases. 316 * 317 * @param[in] pGpu OBJGPU pointer 318 * @param[in] pKCe KernelCE pointer 319 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB 320 * @param[out] pLocalPceLceMap Pointer to PCE-LCE array 321 * @param[out] pLocalExposeCeMask Pointer to LCE Mask 322 */ 323 NV_STATUS 324 kceMapPceLceForC2C_GH100 325 ( 326 OBJGPU *pGpu, 327 KernelCE *pKCe, 328 NvU32 *pceAvailableMaskPerHshub, 329 NvU32 *pLocalPceLceMap, 330 NvU32 *pLocalExposeCeMask 331 ) 332 { 333 NV_STATUS status = NV_OK; 334 KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu); 335 NvU32 pceIndex, i, hshubId, lceMask, lceIndex; 336 NvU32 numNvLinkPeers = 0; 337 NvU32 selectPcePerHshub = 2; 338 NvBool c2cEnabled = pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_IS_C2C_LINK_UP); 339 340 numNvLinkPeers = pKCe->nvlinkNumPeers; 341 if (gpuIsCCFeatureEnabled(pGpu) || (c2cEnabled && numNvLinkPeers == 0 && IS_MIG_IN_USE(pGpu))) 342 { 343 lceMask = NVBIT32(2) | NVBIT32(3); 344 *pLocalExposeCeMask |= lceMask; 345 346 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask); 347 pceIndex = NVBIT32(0); 348 pLocalPceLceMap[pceIndex] = lceIndex; 349 lceMask &= (~(NVBIT32(lceIndex))); 350 351 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask); 352 pceIndex = NVBIT32(1); 353 pLocalPceLceMap[pceIndex] = lceIndex; 354 } 355 else if (c2cEnabled && numNvLinkPeers == 0) 356 { 357 lceMask = NVBIT32(2); 358 *pLocalExposeCeMask |= lceMask; 359 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask); 360 361 for (hshubId = 2; hshubId < NV_CE_MAX_HSHUBS; hshubId++) 362 { 363 for (i = 0; i < selectPcePerHshub; i++) 364 { 365 pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[hshubId]); 366 if (pceIndex < kceGetPce2lceConfigSize1_HAL(pKCe)) 367 { 368 pceAvailableMaskPerHshub[hshubId] &= (~(NVBIT32(pceIndex))); 369 pLocalPceLceMap[pceIndex] = lceIndex; 370 } 371 } 372 } 373 374 lceMask = NVBIT32(4); 375 *pLocalExposeCeMask |= lceMask; 376 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask); 377 378 for (hshubId = 2; hshubId < NV_CE_MAX_HSHUBS; hshubId++) 379 { 380 for (i = 0; i < selectPcePerHshub; i++) 381 { 382 pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[hshubId]); 383 if (pceIndex < kceGetPce2lceConfigSize1_HAL(pKCe)) 384 { 385 pceAvailableMaskPerHshub[hshubId] &= (~(NVBIT32(pceIndex))); 386 pLocalPceLceMap[pceIndex] = lceIndex; 387 } 388 } 389 } 390 } 391 else 392 { 393 status = NV_WARN_NOTHING_TO_DO; 394 } 395 396 return status; 397 } 398 399 /** 400 * @brief This function checks for root port gen speed or GPU 401 * gen speed to determine if we should apply genX+ mapping 402 * or genX- mapping 403 * 404 * @param[in] pGpu OBJGPU pointer 405 * @param[in] pKCe KernelCE pointer 406 * @param[in] checkGen gen X for query 407 */ 408 NvBool 409 kceIsGenXorHigherSupported_GH100 410 ( 411 OBJGPU *pGpu, 412 KernelCE *pKCe, 413 NvU32 checkGen 414 ) 415 { 416 OBJSYS *pSys = SYS_GET_INSTANCE(); 417 OBJCL *pCl = SYS_GET_CL(pSys); 418 NvU8 genSpeed = 0; 419 NvU32 busSpeed = 0; 420 NV_STATUS status = NV_OK; 421 NvBool bIsGenXorHigher = NV_FALSE; 422 423 status = clPcieGetRootGenSpeed(pGpu, pCl, &genSpeed); 424 if (status != NV_OK) 425 { 426 NV_PRINTF(LEVEL_ERROR, "Could not get root gen speed - check for GPU gen speed!\n"); 427 // Check for GPU gen speed 428 if (GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_EP_PCFG_GPU_LINK_CONTROL_STATUS, &busSpeed) != NV_OK) 429 { 430 NV_PRINTF(LEVEL_ERROR, "Unable to read NV_EP_PCFG_GPU_LINK_CONTROL_STATUS from config space.\n"); 431 return bIsGenXorHigher; 432 } 433 genSpeed = GPU_DRF_VAL(_EP_PCFG_GPU, _LINK_CONTROL_STATUS, _CURRENT_LINK_SPEED, busSpeed); 434 } 435 NV_PRINTF(LEVEL_INFO, "Gen Speed = %d\n", genSpeed); 436 437 if ((genSpeed >= checkGen)) 438 { 439 bIsGenXorHigher = NV_TRUE; 440 } 441 442 return bIsGenXorHigher; 443 } 444 445 /** 446 * @brief This function assigns PCE-LCE mappings for GRCE LCEs 0 and 1. 447 * This function additionally takes care of mappings for LCE 2 and 3 448 * in the default case. 449 * 450 * @param[in] pGpu OBJGPU pointer 451 * @param[in] pKCe KernelCE pointer 452 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB 453 * @param[out] pLocalPceLceMap Pointer to PCE-LCE array 454 * @param[out] pLocalExposeCeMask Pointer to LCE Mask 455 */ 456 void 457 kceMapPceLceForGRCE_GH100 458 ( 459 OBJGPU *pGpu, 460 KernelCE *pKCe, 461 NvU32 *pceAvailableMaskPerHshub, 462 NvU32 *pLocalPceLceMap, 463 NvU32 *pLocalExposeCeMask, 464 NvU32 *pLocalGrceMap, 465 NvU32 fbPceMask 466 ) 467 { 468 KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu); 469 NvU32 grceIdx, pceIndex, i; 470 NvU32 lceIndex = 0; 471 NvU32 lceMask = 0; 472 NvU32 numNvLinkPeers = 0; 473 NvU32 grceMappings[NV_CE_NUM_FBPCE] = {12, 14, 13, 15}; 474 NvBool gen5OrHigher = kceIsGenXorHigherSupported_HAL(pGpu, pKCe, 5); 475 NvBool c2cEnabled = pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_IS_C2C_LINK_UP); 476 477 numNvLinkPeers = pKCe->nvlinkNumPeers; 478 479 if (gpuIsCCFeatureEnabled(pGpu) || (c2cEnabled && numNvLinkPeers == 0)) 480 { 481 lceMask = NVBIT32(0) | NVBIT32(1); 482 *pLocalExposeCeMask |= lceMask; 483 484 for (grceIdx = 0; grceIdx < NV_CE_MAX_GRCE; grceIdx++) 485 { 486 for (i = 0; i < NV_CE_MAX_PCE_PER_GRCE; i++) 487 { 488 pceIndex = grceMappings[grceIdx * 2 + i]; 489 490 // 491 // floorswept PCE or 492 // PCIe <= Gen4 experience high latency and requires a 493 // different mapping for LCE2 and LCE3 compared to Gen5. 494 // In PCIe <= Gen4 cases, only link 1 PCE to LCE by 495 // skipping every other PCE in the grceMappings array. 496 // 497 if (pceIndex == 0 || (!gen5OrHigher && (i % 2 == 1))) 498 continue; 499 500 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask); 501 pLocalPceLceMap[pceIndex] = lceIndex; 502 } 503 504 lceMask &= (~(NVBIT32(lceIndex))); 505 } 506 } 507 else 508 { 509 // Default case which will result in sharing LCE 2 and 3 with LCE 0 and 1 510 lceMask = NVBIT32(2) | NVBIT32(3); 511 *pLocalExposeCeMask |= lceMask; 512 for (grceIdx = 0; grceIdx < NV_CE_MAX_GRCE; grceIdx++) 513 { 514 for (i = 0; i < NV_CE_MAX_PCE_PER_GRCE; i++) 515 { 516 pceIndex = grceMappings[grceIdx * 2 + i]; 517 518 // floorswept PCE or account for PCIe latency in Gen <= 4 519 if (pceIndex == 0 || (!gen5OrHigher && (i % 2 == 1))) 520 continue; 521 522 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask); 523 pLocalPceLceMap[pceIndex] = lceIndex; 524 } 525 526 // update lceMask now that all PCEs are assigned to this LCE 527 lceMask &= (~(NVBIT32(lceIndex))); 528 } 529 530 // GRCE Cases 531 lceMask = kceGetGrceSupportedLceMask_HAL(pGpu, pKCe); 532 *pLocalExposeCeMask |= lceMask; 533 534 for (grceIdx = 0; grceIdx < NV_CE_MAX_GRCE; grceIdx++) 535 { 536 for (i = 0; i < NV_CE_MAX_PCE_PER_GRCE; i++) 537 { 538 pceIndex = grceMappings[grceIdx * 2 + i]; 539 fbPceMask &= (~(NVBIT32(pceIndex))); 540 541 // floorswept PCE 542 if (pceIndex == 0 || (!gen5OrHigher && (i % 2 == 1))) 543 continue; 544 545 // Sharing use case 546 if ((NVBIT32(pLocalPceLceMap[pceIndex])) & *pLocalExposeCeMask) 547 { 548 // GRCE is shared - set the status and shared LCE # in register field 549 lceIndex = pLocalPceLceMap[pceIndex]; 550 pLocalGrceMap[grceIdx] = DRF_NUM(_CE, _GRCE_CONFIG, _SHARED, 1) | 551 DRF_NUM(_CE, _GRCE_CONFIG, _SHARED_LCE, lceIndex); 552 } 553 else 554 { 555 // GRCE got its own FBHUB PCE 556 // Store the LCE in the associated PCE for GRCE 557 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask); 558 pLocalPceLceMap[pceIndex] = lceIndex; 559 // Reflect non-sharing status in register field 560 pLocalGrceMap[grceIdx] = DRF_NUM(_CE, _GRCE_CONFIG, _SHARED, 0) | 561 DRF_DEF(_CE, _GRCE_CONFIG, _SHARED_LCE, _NONE); 562 } 563 } 564 565 // update lceMask now that all PCEs are assigned to this LCE 566 lceMask &= (~(NVBIT32(lceIndex))); 567 } 568 } 569 } 570 571 /** 572 * @brief This function performs PCE-LCE mappings in the CC case where 573 * a 1-1 mapping is applied. 574 * 575 * @param[in] pGpu OBJGPU pointer 576 * @param[in] pKCe KernelCE pointer 577 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB 578 * @param[out] pLocalPceLceMap Pointer to PCE-LCE array 579 * @param[out] pLocalExposeCeMask Pointer to LCE Mask 580 */ 581 static void 582 kceMapPceLceForCC 583 ( 584 OBJGPU *pGpu, 585 KernelCE *pKCe, 586 NvU32 *pceAvailableMaskPerHshub, 587 NvU32 *pLocalPceLceMap, 588 NvU32 *pLocalGrceMap, 589 NvU32 *pLocalExposeCeMask 590 ) 591 { 592 NvU32 pceIndex = 0; 593 NvU32 maxLceIdx, lceMask, lceIndex, grceIdx; 594 595 // Apply 1-1 mapping for async LCEs 596 lceMask = NV_CE_MAX_LCE_MASK & (~kceGetGrceSupportedLceMask_HAL(pGpu, pKCe)); 597 maxLceIdx = lceMask; 598 HIGHESTBITIDX_32(maxLceIdx); 599 *pLocalExposeCeMask |= lceMask; 600 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask); 601 for(; lceIndex <= maxLceIdx; lceIndex++) 602 { 603 pLocalPceLceMap[pceIndex] = lceIndex; 604 lceMask &= (~(NVBIT32(lceIndex))); 605 pceIndex++; 606 } 607 608 609 // Map GRCEs as non sharing. At this point, no PCEs have been mapped 610 lceMask = kceGetGrceSupportedLceMask_HAL(pGpu, pKCe); 611 *pLocalExposeCeMask |= lceMask; 612 for (grceIdx = 0; grceIdx < NV_CE_MAX_GRCE; grceIdx++) 613 { 614 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask); 615 if (((NVBIT32(lceIndex) & NV_CE_MAX_LCE_MASK) != 0) && (pceIndex < kceGetPce2lceConfigSize1_HAL(pKCe))) 616 { 617 pLocalPceLceMap[pceIndex] = lceIndex; 618 lceMask &= (~(NVBIT32(lceIndex))); 619 pLocalGrceMap[grceIdx] = DRF_NUM(_CE, _GRCE_CONFIG, _SHARED, 0) | 620 DRF_DEF(_CE, _GRCE_CONFIG, _SHARED_LCE, _NONE); 621 pceIndex++; 622 } 623 } 624 } 625 626 /** 627 * @brief This function assigns PCE-LCE mappings for NVLink peers 628 * Based on HSHUBs that the links associated with a peer connect to, 629 * algorithm will attempt to assign a PCE from associated HSHUB taking into 630 * account striding as well. 631 * 632 * @param[in] pGpu OBJGPU pointer 633 * @param[in] pKCe KernelCE pointer 634 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB 635 * @param[out] pLocalPceLceMap Pointer to PCE-LCE array 636 * @param[out] pLocalExposeCeMask Pointer to LCE Mask 637 * 638 * Returns NV_OK if successful in assigning PCEs and LCEs for each of the NVLink peers 639 */ 640 NV_STATUS 641 kceMapPceLceForNvlinkPeers_GH100 642 ( 643 OBJGPU *pGpu, 644 KernelCE *pKCe, 645 NvU32 *pceAvailableMaskPerHshub, 646 NvU32 *pLocalPceLceMap, 647 NvU32 *pLocalExposeCeMask 648 ) 649 { 650 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 651 OBJSYS *pSys = SYS_GET_INSTANCE(); 652 NV_STATUS status = NV_OK; 653 NvU32 lceMask = 0; 654 NvU32 pceMask = 0; 655 NvU32 peerLinkMask = 0; 656 KernelCE *pKCeLce = NULL; 657 NvBool bPeerAssigned = NV_FALSE; 658 NvU32 peerAvailableLceMask = NV_CE_LCE_MASK_INIT; 659 OBJGPU *pRemoteGpu; 660 NvU32 numPcePerLink; 661 NvU32 lceIndex, pceIndex; 662 NvU8 hshubId = 0, i; 663 NvU32 linkId, gpuMask, gpuInstance = 0, j; 664 665 NV2080_CTRL_INTERNAL_HSHUB_GET_HSHUB_ID_FOR_LINKS_PARAMS params; 666 667 if (pKernelNvlink == NULL) 668 { 669 return NV_WARN_NOTHING_TO_DO; 670 } 671 672 peerAvailableLceMask = kceGetNvlinkPeerSupportedLceMask_HAL(pGpu, pKCe, peerAvailableLceMask); 673 pKCe->nvlinkNumPeers = 0; 674 675 if (knvlinkIsGpuConnectedToNvswitch(pGpu, pKernelNvlink)) 676 { 677 // 678 // On NVSwitch systems, we only create 1 aperture for all p2p connections. 679 // For PCE2LCE mapping, we should only assign 1 LCE for this connection. 680 // 681 // Since we mark the loopback connections in peerLinkMasks with the appropriate 682 // links (see _nvlinkUpdateSwitchLinkMasks), we can use that to calculate 683 // the PCE2LCE config. 684 // 685 gpuMask = NVBIT32(pGpu->gpuInstance); 686 } 687 else 688 { 689 // On direct connected systems, we'll loop over each GPU in the system 690 // and assign a peer LCE for each connection 691 (void)gpumgrGetGpuAttachInfo(NULL, &gpuMask); 692 } 693 694 while ((pRemoteGpu = gpumgrGetNextGpu(gpuMask, &gpuInstance)) != NULL) 695 { 696 NvU32 numLinksToPeer = knvlinkGetNumLinksToPeer(pGpu, pKernelNvlink, 697 pRemoteGpu); 698 NvU32 maxLceCnt = NV_CE_MAX_LCE_MASK; 699 700 if (numLinksToPeer == 0) 701 { 702 continue; 703 } 704 705 pceMask = 0; 706 lceMask = 0; 707 708 if (peerAvailableLceMask == 0) 709 { 710 // 711 // peerAvailableLceMask is initialized to even async LCEs at the 712 // top of the function. 713 // As a result, if at any point in the loop, this mask == 0, 714 // it implies we have used up all even async LCEs and should move to 715 // using odd async LCEs. 716 // 717 peerAvailableLceMask = kceGetNvlinkPeerSupportedLceMask_HAL(pGpu, pKCe, peerAvailableLceMask); 718 } 719 720 // Each peer gets 1 LCE 721 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(peerAvailableLceMask); 722 HIGHESTBITIDX_32(maxLceCnt); 723 if (lceIndex < maxLceCnt) 724 { 725 lceMask |= NVBIT32(lceIndex); 726 // Clear out the chosen LCE 727 peerAvailableLceMask &= (~(NVBIT32(lceIndex))); 728 } 729 730 pKCe->nvlinkNumPeers++; 731 732 peerLinkMask = knvlinkGetLinkMaskToPeer(pGpu, pKernelNvlink, pRemoteGpu); 733 if (peerLinkMask == 0) 734 { 735 NV_PRINTF(LEVEL_INFO, "GPU%d has nvlink disabled. Skip programming\n", pRemoteGpu->gpuInstance); 736 continue; 737 } 738 739 portMemSet(¶ms, 0, sizeof(params)); 740 params.linkMask = peerLinkMask; 741 742 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 743 NV2080_CTRL_CMD_INTERNAL_HSHUB_GET_HSHUB_ID_FOR_LINKS, 744 (void *)¶ms, sizeof(params)); 745 NV_ASSERT_OK_OR_RETURN(status); 746 747 // Iterate through links by HSHUB 748 NvU32 linksPerHshub[NV_CE_MAX_HSHUBS] = {0}; 749 750 FOR_EACH_INDEX_IN_MASK(32, linkId, peerLinkMask) 751 { 752 hshubId = params.hshubIds[linkId]; 753 // Update link count for this hshub 754 linksPerHshub[hshubId]++; 755 } 756 FOR_EACH_INDEX_IN_MASK_END; 757 758 for (i = 0; i < NV_CE_MAX_HSHUBS; i++) 759 { 760 if (linksPerHshub[i] == 0) 761 continue; 762 763 pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[i]); 764 numPcePerLink = kceGetNumPceRequired(linksPerHshub[i]); 765 766 for (j = 0; j < numPcePerLink; j++) 767 { 768 _ceGetAlgorithmPceIndex(pGpu, pKCe, pceAvailableMaskPerHshub, &pceIndex, &i); 769 pceMask |= NVBIT32(pceIndex); 770 // Clear out the assigned PCE 771 pceAvailableMaskPerHshub[i] &= (~(NVBIT32(pceIndex))); 772 } 773 774 } 775 776 // Now, assign the PCE-LCE association for the current peer 777 if (pceMask != 0) 778 { 779 // We just need at least one peer to set this to TRUE 780 bPeerAssigned = NV_TRUE; 781 782 FOR_EACH_INDEX_IN_MASK(32, pceIndex, pceMask) 783 { 784 pLocalPceLceMap[pceIndex] = lceIndex; 785 NV_PRINTF(LEVEL_INFO, "GPU%d <-> GPU%d PCE Index: %d LCE Index: %d\n", 786 pGpu->gpuInstance, pRemoteGpu->gpuInstance, pceIndex, lceIndex); 787 } 788 FOR_EACH_INDEX_IN_MASK_END; 789 790 // Store lceMask in the exposeCeMask before moving on 791 *pLocalExposeCeMask |= lceMask; 792 } 793 794 pKCeLce = GPU_GET_KCE(pGpu, lceIndex); 795 pKCeLce->nvlinkPeerMask |= NVBIT(pRemoteGpu->gpuInstance); 796 797 // 798 // Bug 200659256 - Looping over GPUs rather than peers (CL 28776130) 799 // does not handle multi-GPUs/Peer as is the case on switch systems. 800 // We must only take this loop once on switch systems to account for this. 801 // If we need to support multiple peer connections with switch systems 802 // in the future, this code must be revisited 803 // 804 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 805 { 806 break; 807 } 808 809 } 810 811 if (bPeerAssigned == NV_FALSE) 812 { 813 status = NV_WARN_NOTHING_TO_DO; 814 } 815 816 return status; 817 } 818 819 /** 820 * @brief Some clients rely on LCE 4 also being turned on when there 821 * are no NVLink peers. This function sets up the default links. 822 * 823 * @param[in] pGpu OBJGPU pointer 824 * @param[in] pKCe KernelCE pointer 825 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB 826 * @param[out] pLocalPceLceMap Pointer to PCE-LCE array 827 * @param[out] pLocalExposeCeMask Pointer to LCE Mask 828 * 829 * Returns NV_OK if successful in assigning PCEs to a default async LCE (>= 4) 830 */ 831 NV_STATUS 832 kceMapAsyncLceDefault_GH100 833 ( 834 OBJGPU *pGpu, 835 KernelCE *pKCe, 836 NvU32 *pceAvailableMaskPerHshub, 837 NvU32 *pLocalPceLceMap, 838 NvU32 *pLocalExposeCeMask, 839 NvU32 numDefaultPces 840 ) 841 { 842 NvU32 peerAvailableLceMask = NV_CE_LCE_MASK_INIT; 843 NvU32 lceMask = 0; 844 NvU32 pceMask = 0; 845 NvU32 lceIndex, pceIndex, hshubId, i; 846 NvU32 maxLceCnt = NV_CE_MAX_LCE_MASK; 847 848 peerAvailableLceMask = kceGetNvlinkPeerSupportedLceMask_HAL(pGpu, pKCe, peerAvailableLceMask); 849 hshubId = 1; 850 851 // 852 // If no peers were found, then no async LCEs (>= 4) will be turned on. 853 // However, some clients rely on LCE 4 being present even without any 854 // NVLink peers being found. So, turn on the 1st available async LCE (>= 4) 855 // Reference bug 3042556 856 // 857 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(peerAvailableLceMask); 858 HIGHESTBITIDX_32(maxLceCnt); 859 if (lceIndex < maxLceCnt) 860 { 861 lceMask |= NVBIT32(lceIndex); 862 // Clear out the chosen LCE 863 peerAvailableLceMask &= (~(NVBIT32(lceIndex))); 864 } 865 866 // Assign PCEs to this LCE based on input request 867 for (i = 0; i < numDefaultPces; i++) 868 { 869 if (i % NV_CE_PCE_PER_HSHUB == 0) 870 hshubId++; 871 872 pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[hshubId]); 873 if (pceIndex < kceGetPce2lceConfigSize1_HAL(pKCe)) 874 { 875 pceMask |= NVBIT32(pceIndex); 876 pceAvailableMaskPerHshub[hshubId] &= (~(NVBIT32(pceIndex))); 877 } 878 } 879 880 FOR_EACH_INDEX_IN_MASK(32, pceIndex, pceMask) 881 { 882 pLocalPceLceMap[pceIndex] = lceIndex; 883 NV_PRINTF(LEVEL_INFO, "GPU%d <-> GPU%d PCE Index: %d LCE Index: %d\n", 884 pGpu->gpuInstance, pGpu->gpuInstance, pceIndex, lceIndex); 885 } 886 FOR_EACH_INDEX_IN_MASK_END; 887 888 // Store lceMask in the exposeCeMask before moving on 889 *pLocalExposeCeMask |= lceMask; 890 891 return NV_OK; 892 893 } 894 895 NV_STATUS 896 kceGetMappings_GH100 897 ( 898 OBJGPU *pGpu, 899 KernelCE *pKCe, 900 NVLINK_TOPOLOGY_PARAMS *pTopoParams, 901 NvU32 *pLocalPceLceMap, 902 NvU32 *pLocalGrceMap, 903 NvU32 *pExposeCeMask 904 ) 905 { 906 NV_STATUS status = NV_OK; 907 NV_STATUS statusC2C = NV_OK; 908 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 909 NvU32 topoIdx = NV_CE_INVALID_TOPO_IDX; 910 NvBool bEntryExists = NV_FALSE; 911 NvU32 pce2lceConfigSize1 = kceGetPce2lceConfigSize1_HAL(pKCe); 912 NvU32 grceConfigSize1 = kceGetGrceConfigSize1_HAL(pKCe); 913 NvU32 pceIdx, grceIdx; 914 915 // 916 // In the self hosted case, utilize table entries 917 // with pre defined mappings. Calling from the parent would result in 918 // using the incorrect autoconfig table so instead set the necessary 919 // values here if config is found in the table. 920 // 921 if (gpuIsSelfHosted(pGpu) && !(pGpu->getProperty(pGpu, PDB_PROP_GPU_SKIP_TABLE_CE_MAP))) 922 { 923 bEntryExists = kceGetAutoConfigTableEntry_HAL(pGpu, pKCe, pTopoParams, nvLinkCeAutoConfigTable_GH100, 924 NV_ARRAY_ELEMENTS(nvLinkCeAutoConfigTable_GH100), 925 &topoIdx, pExposeCeMask); 926 if (bEntryExists) 927 { 928 // Since entry exists, fill local variables with the associated table entry 929 for (pceIdx = 0; pceIdx < pce2lceConfigSize1; pceIdx++) 930 { 931 pLocalPceLceMap[pceIdx] = nvLinkCeAutoConfigTable_GH100[topoIdx].pceLceMap[pceIdx]; 932 } 933 for (grceIdx = 0; grceIdx < grceConfigSize1; grceIdx++) 934 { 935 pLocalGrceMap[grceIdx] = nvLinkCeAutoConfigTable_GH100[topoIdx].grceConfig[grceIdx]; 936 } 937 938 pTopoParams->maxTopoIdx = topoIdx; 939 pTopoParams->sysmemLinks = nvLinkCeAutoConfigTable_GH100[topoIdx].sysmemLinks; 940 pTopoParams->maxLinksPerPeer = nvLinkCeAutoConfigTable_GH100[topoIdx].maxLinksPerPeer; 941 pTopoParams->numPeers = nvLinkCeAutoConfigTable_GH100[topoIdx].numPeers; 942 pTopoParams->bSymmetric = nvLinkCeAutoConfigTable_GH100[topoIdx].bSymmetric; 943 pTopoParams->bSwitchConfig = nvLinkCeAutoConfigTable_GH100[topoIdx].bSwitchConfig; 944 945 return NV_OK; 946 } 947 948 } 949 950 // In CC case a 1-1 mapping should be applied and other mappings are not required 951 if (gpuIsCCFeatureEnabled(pGpu)) 952 { 953 kceMapPceLceForCC(pGpu, pKCe, pTopoParams->pceAvailableMaskPerHshub, 954 pLocalPceLceMap, pLocalGrceMap, pExposeCeMask); 955 goto returnSuccess; 956 } 957 958 //Prepare the per-HSHUB/FBHUB available PCE mask 959 kceGetAvailableHubPceMask(pGpu, pKCe, pTopoParams); 960 961 // Assign PCEs to "PEER"s if nvlink is enabled 962 if (pKernelNvlink && !knvlinkIsForcedConfig(pGpu, pKernelNvlink)) 963 { 964 status = kceMapPceLceForNvlinkPeers_HAL(pGpu, pKCe, 965 pTopoParams->pceAvailableMaskPerHshub, 966 pLocalPceLceMap, 967 pExposeCeMask); 968 } 969 else 970 { 971 status = NV_WARN_NOTHING_TO_DO; 972 } 973 974 // Special C2C cases for LCE 2 and 3 975 statusC2C = kceMapPceLceForC2C_HAL(pGpu, pKCe, 976 pTopoParams->pceAvailableMaskPerHshub, 977 pLocalPceLceMap, pExposeCeMask); 978 979 // Assign PCEs for GRCE case 980 kceMapPceLceForGRCE_HAL(pGpu, pKCe, 981 pTopoParams->pceAvailableMaskPerHshub, 982 pLocalPceLceMap, pExposeCeMask, pLocalGrceMap, pTopoParams->fbhubPceMask); 983 984 if ((status == NV_WARN_NOTHING_TO_DO && statusC2C == NV_WARN_NOTHING_TO_DO) || 985 (status == NV_ERR_NOT_SUPPORTED && statusC2C == NV_ERR_NOT_SUPPORTED)) 986 { 987 // If there's no NVLink peers available, still expose an additional async LCE 988 status = kceMapAsyncLceDefault_HAL(pGpu, pKCe, 989 pTopoParams->pceAvailableMaskPerHshub, 990 pLocalPceLceMap, 991 pExposeCeMask, 992 NV_CE_NUM_PCES_NO_LINK_CASE); 993 } 994 995 NV_PRINTF(LEVEL_INFO, "status = %d, statusC2C = %d\n", status, statusC2C); 996 997 returnSuccess: 998 return NV_OK; 999 } 1000 1001 NV_STATUS kceGetP2PCes_GH100(KernelCE *pKCe, OBJGPU *pGpu, NvU32 gpuMask, NvU32 *nvlinkP2PCeMask) 1002 { 1003 // 1004 // Currently Bug 4103154 requires an updated algorithm described below 1005 // to assign the proper LCE. Cases without MODS enabled can default back 1006 // to the previous version. 1007 // 1008 return kceGetP2PCes_GV100(pKCe, pGpu, gpuMask, nvlinkP2PCeMask); 1009 1010 NvU32 gpuCount = gpumgrGetSubDeviceCount(gpuMask); 1011 NvU32 minP2PLce = (NV_CE_EVEN_ASYNC_LCE_MASK | NV_CE_ODD_ASYNC_LCE_MASK) & NV_CE_MAX_LCE_MASK; 1012 NvU32 i; 1013 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 1014 1015 if (pKernelNvlink == NULL) 1016 { 1017 return NV_WARN_NOTHING_TO_DO; 1018 } 1019 1020 if (knvlinkIsGpuConnectedToNvswitch(pGpu, pKernelNvlink)) 1021 { 1022 return kceGetP2PCes_GV100(pKCe, pGpu, gpuMask, nvlinkP2PCeMask); 1023 } 1024 1025 LOWESTBITIDX_32(minP2PLce); 1026 *nvlinkP2PCeMask = 0; 1027 1028 if (gpuCount == 1) 1029 { 1030 *nvlinkP2PCeMask |= NVBIT(minP2PLce); 1031 for (i = minP2PLce; i < gpuGetNumCEs(pGpu); i++) 1032 { 1033 *nvlinkP2PCeMask |= NVBIT(i); 1034 1035 } 1036 } 1037 else if (gpuCount > 2) 1038 { 1039 // if gpuCount > 2, this is an invalid request. Print warning and return NV_OK 1040 NV_PRINTF(LEVEL_INFO, "GPU %d invalid request for gpuCount %d\n", gpuGetInstance(pGpu), gpuCount); 1041 return NV_ERR_INVALID_STATE; 1042 } 1043 else 1044 { 1045 OBJGPU *pRemoteGpu = NULL; 1046 KernelCE *pKCeLoop = NULL; 1047 NvU32 peerLinkMask = 0; 1048 NvU32 gpuInstance = 0; 1049 NvU32 phyLinkId, status, targetPceMask, numPces; 1050 1051 // 1052 // The LCE returned should be the LCE which has the most PCEs mapped 1053 // on the given HSHUB. This HSHUB should be determined by 1054 // tracking where the majority of links are connected. 1055 // 1056 NvU32 linksPerHshub[NV_CE_MAX_HSHUBS] = {0}; 1057 NvU32 maxLinksConnectedHshub = 0; 1058 NvU32 maxConnectedHshubId = NV_CE_MAX_HSHUBS; 1059 NvU32 lceAssignedMask = 0; 1060 KernelCE *maxLcePerHshub[NV_CE_MAX_HSHUBS] = {0}; 1061 1062 NV2080_CTRL_INTERNAL_HSHUB_GET_HSHUB_ID_FOR_LINKS_PARAMS params; 1063 1064 if (pKernelNvlink != NULL) 1065 { 1066 // Get the remote GPU 1067 while ((pRemoteGpu = gpumgrGetNextGpu(gpuMask, &gpuInstance)) != NULL) 1068 { 1069 if (pRemoteGpu != pGpu) 1070 break; 1071 } 1072 1073 NV_ASSERT_OR_RETURN(pRemoteGpu != NULL, NV_ERR_INVALID_STATE); 1074 gpuInstance = gpuGetInstance(pRemoteGpu); 1075 1076 peerLinkMask = knvlinkGetLinkMaskToPeer(pGpu, pKernelNvlink, pRemoteGpu); 1077 } 1078 1079 portMemSet(¶ms, 0, sizeof(params)); 1080 params.linkMask = peerLinkMask; 1081 1082 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1083 NV2080_CTRL_CMD_INTERNAL_HSHUB_GET_HSHUB_ID_FOR_LINKS, 1084 (void *)¶ms, sizeof(params)); 1085 NV_ASSERT_OK_OR_RETURN(status); 1086 1087 1088 FOR_EACH_INDEX_IN_MASK(32, phyLinkId, peerLinkMask) 1089 { 1090 NvU32 hshubId = params.hshubIds[phyLinkId]; 1091 linksPerHshub[hshubId]++; 1092 1093 if (linksPerHshub[hshubId] > maxLinksConnectedHshub) 1094 { 1095 maxLinksConnectedHshub = linksPerHshub[hshubId]; 1096 maxConnectedHshubId = hshubId; 1097 } 1098 } 1099 FOR_EACH_INDEX_IN_MASK_END; 1100 1101 // 1102 // Iterate through all Async LCEs to track which HSHUB should 1103 // be using which LCE. This is decided based on the majority. If 1104 // there is a tie, then LCE with the lower index is preferred. 1105 // 1106 KCE_ITER_ALL_BEGIN(pGpu, pKCeLoop, minP2PLce) 1107 NvU32 localMaxPcePerHshub = 0; 1108 KernelCE *localMaxLcePerHshub; 1109 NvU32 localMaxHshub = NV_CE_MAX_HSHUBS; 1110 1111 // if LCE is stubbed or LCE is already assigned to another peer 1112 if (pKCeLoop->bStubbed) 1113 { 1114 continue; 1115 } 1116 1117 // LCE is already assigned to this peer 1118 if ((pKCeLoop->nvlinkPeerMask & NVBIT(gpuInstance)) != 0) 1119 { 1120 maxLcePerHshub[maxConnectedHshubId] = pKCeLoop; 1121 break; 1122 } 1123 // LCE is already assigned to another peer 1124 else if (pKCeLoop->nvlinkPeerMask != 0) 1125 { 1126 continue; 1127 } 1128 1129 NV2080_CTRL_CE_GET_CE_PCE_MASK_PARAMS params = {0}; 1130 1131 params.ceEngineType = NV2080_ENGINE_TYPE_COPY(pKCeLoop->publicID); 1132 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 1133 NV2080_CTRL_CMD_CE_GET_CE_PCE_MASK, 1134 (void *)¶ms, sizeof(params)); 1135 NV_ASSERT_OK_OR_RETURN(status); 1136 1137 // 1138 // An LCE may be utilized across several HSHUBs. Loop through all HSHUBs 1139 // in order to decide which HSHUB holds the majority of this specific LCE. 1140 // To help with this, create a mask of PCEs only on the HSHUB which the peer 1141 // is most connected to by shifting the HSHUB PCE mask 1142 // 1143 1144 for (i = NV_CE_HSHUBNVL_ID_0; i < NV_CE_MAX_HSHUBS; i++) 1145 { 1146 targetPceMask = params.pceMask & ((NVBIT(NV_CE_PCE_PER_HSHUB) - 1) << ((i - NV_CE_HSHUBNVL_ID_0) * NV_CE_PCE_PER_HSHUB)); 1147 numPces = nvPopCount32(targetPceMask); 1148 if (numPces > localMaxPcePerHshub && !(lceAssignedMask & NVBIT(pKCeLoop->publicID))) 1149 { 1150 localMaxPcePerHshub = numPces; 1151 localMaxLcePerHshub = pKCeLoop; 1152 localMaxHshub = i; 1153 } 1154 } 1155 1156 if (localMaxHshub < NV_CE_MAX_HSHUBS) 1157 { 1158 maxLcePerHshub[localMaxHshub] = localMaxLcePerHshub; 1159 lceAssignedMask |= NVBIT(localMaxLcePerHshub->publicID); 1160 } 1161 1162 KCE_ITER_END 1163 1164 if (maxLcePerHshub[maxConnectedHshubId] != NULL) 1165 { 1166 NV_PRINTF(LEVEL_INFO, 1167 "GPU %d Assigning Peer %d to preferred LCE %d\n", 1168 gpuGetInstance(pGpu), gpuInstance, 1169 maxLcePerHshub[maxConnectedHshubId]->publicID); 1170 } 1171 else 1172 { 1173 // 1174 // In the event that the preferred HSHUB's primary LCE is not available, 1175 // choose the first available LCE which was found and set that index as 1176 // the new preferred hshub. 1177 // 1178 for (i = 0; i < NV_CE_MAX_HSHUBS; i++) 1179 { 1180 if (maxLcePerHshub[i] != NULL) 1181 { 1182 NV_PRINTF(LEVEL_INFO, 1183 "GPU %d Assigning Peer %d to first available LCE %d\n", 1184 gpuGetInstance(pGpu), gpuInstance, 1185 maxLcePerHshub[i]->publicID); 1186 maxConnectedHshubId = i; 1187 break; 1188 } 1189 } 1190 } 1191 1192 if (maxConnectedHshubId < NV_CE_MAX_HSHUBS) 1193 { 1194 maxLcePerHshub[maxConnectedHshubId]->nvlinkPeerMask = NVBIT(gpuInstance); 1195 *nvlinkP2PCeMask = NVBIT(maxLcePerHshub[maxConnectedHshubId]->publicID); 1196 } 1197 } 1198 1199 return NV_OK; 1200 } 1201