1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "gpu/gpu.h" 25 #include "gpu/ce/kernel_ce.h" 26 #include "gpu/nvlink/kernel_nvlink.h" 27 #include "gpu/ce/kernel_ce_private.h" 28 #include "gpu/bif/kernel_bif.h" 29 #include "platform/chipset/chipset.h" 30 31 #include "published/hopper/gh100/dev_ce.h" 32 #include "published/hopper/gh100/dev_xtl_ep_pcfg_gpu.h" 33 34 #define NV_CE_INVALID_TOPO_IDX 0xFFFF 35 36 // Defines for PCE-LCE mapping algorithm 37 #define NV_CE_MAX_HSHUBS 5 38 #define NV_CE_LCE_MASK_INIT 0xFFFFFFFF 39 #define NV_CE_GRCE_ALLOWED_LCE_MASK 0x03 40 #define NV_CE_MAX_GRCE 2 41 #define NV_CE_EVEN_ASYNC_LCE_MASK 0x55555550 42 #define NV_CE_ODD_ASYNC_LCE_MASK 0xAAAAAAA0 43 #define NV_CE_MAX_LCE_MASK 0x3FF 44 #define NV_CE_PCE_PER_HSHUB 4 45 #define NV_CE_NUM_FBPCE 4 46 #define NV_CE_NUM_PCES_NO_LINK_CASE 12 47 #define NV_CE_MAX_PCE_PER_GRCE 2 48 49 /* 50 * Table for setting the PCE2LCE mapping for WAR configs that cannot be implemented 51 * using the algorithm because the config does not conform to the algorithm's set 52 * of requirements/assumptions 53 */ 54 static NVLINK_CE_AUTO_CONFIG_TABLE nvLinkCeAutoConfigTable_GH100[] = 55 { 56 // 57 // #systmem #max #peers Symmetric Switch PCE-LCE GRCE exposeCe 58 // links (links/peer) Config? Config Map Config Mask 59 60 // Default minimal configuration - NOTE: do not add entrys before this 61 // Default is for CG1 62 {0x0, 0x0, 0x0, NV_FALSE, NV_FALSE, {0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6, 63 0x4,0x4,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 64 {0x0, 0x0, 0x0, NV_TRUE, NV_FALSE, {0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6, 65 0x4,0x4,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 66 67 // Switch cases - Ranger mapping 68 {0x0, 0x12, 0x1, NV_TRUE, NV_TRUE, {0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6, 69 0x4,0x4,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 70 {0x0, 0x6, 0x1, NV_TRUE, NV_TRUE, {0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6, 71 0x4,0x4,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 72 // CG4 mapping 73 {0x0, 0x6, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 74 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 75 {0x0, 0x6, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 76 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 77 {0x0, 0x6, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 78 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 79 {0x0, 0x6, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 80 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 81 {0x0, 0x6, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 82 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 83 {0x0, 0x6, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 84 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 85 {0x0, 0x5, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 86 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 87 {0x0, 0x5, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 88 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 89 {0x0, 0x5, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 90 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 91 {0x0, 0x5, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 92 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 93 {0x0, 0x5, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 94 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 95 {0x0, 0x5, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 96 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 97 {0x0, 0x4, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 98 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 99 {0x0, 0x4, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 100 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 101 {0x0, 0x4, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 102 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 103 {0x0, 0x4, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 104 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 105 {0x0, 0x4, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 106 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 107 {0x0, 0x4, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 108 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 109 {0x0, 0x3, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 110 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 111 {0x0, 0x3, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 112 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 113 {0x0, 0x3, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 114 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 115 {0x0, 0x3, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 116 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 117 {0x0, 0x3, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 118 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 119 {0x0, 0x3, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 120 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 121 {0x0, 0x2, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 122 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 123 {0x0, 0x2, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 124 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 125 {0x0, 0x2, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 126 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 127 {0x0, 0x2, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 128 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 129 {0x0, 0x2, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 130 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 131 {0x0, 0x2, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 132 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 133 {0x0, 0x1, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 134 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 135 {0x0, 0x1, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4, 136 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153}, 137 {0x0, 0x1, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 138 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 139 {0x0, 0x1, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 140 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53}, 141 {0x0, 0x1, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 142 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}, 143 {0x0, 0x1, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4, 144 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13} 145 }; 146 147 /*! 148 * @brief Returns the size of the PCE2LCE register array 149 * 150 * 151 * @param[in] pGpu OBJGPU pointer 152 * @param[in] pCe OBJCE pointer 153 * 154 * @return NV_CE_PCE2LCE_CONFIG__SIZE_1 155 * 156 */ 157 NvU32 158 kceGetPce2lceConfigSize1_GH100 159 ( 160 KernelCE *pKCe 161 ) 162 { 163 return NV_CE_PCE2LCE_CONFIG__SIZE_1; 164 } 165 166 /** 167 * @brief This function takes in a link mask and returns the minimum number 168 * of PCE connections required. This is decided based on a round up approach 169 * where each PCE can handle 1.5 links. 170 */ 171 NvU32 172 kceGetNumPceRequired 173 ( 174 NvU32 numLinks 175 ) 176 { 177 switch(numLinks) 178 { 179 case 6: 180 return 4; 181 case 5: 182 case 4: 183 return 3; 184 case 3: 185 return 2; 186 case 2: 187 case 1: 188 default: 189 return 1; 190 } 191 } 192 193 /* 194 * Look up entry in NVLINK_CE_AUTO_CONFIG_TABLE 195 * 196 * @param[in] pGpu OBJGPU pointer 197 * @param[in] pCe OBJCE pointer 198 * @param[in] pCurrentTopo NVLINK_TOPOLOGY_INFO pointer 199 * @param[in] pAutoConfigTable NVLINK_CE_AUTO_CONFIG_TABLE pointer 200 * @param[in] autoConfigNumEntries NvU32 num entries within pAutoConfigTable 201 * @param[out] pIdx NvU32 pointer 202 * @param[out] pExposeCeMask NvU32 pointer 203 * 204 * Returns: NV_TRUE if entry is found 205 * NV_FALSE otheriwse 206 */ 207 NvBool 208 kceGetAutoConfigTableEntry_GH100 209 ( 210 OBJGPU *pGpu, 211 KernelCE *pKCe, 212 NVLINK_TOPOLOGY_PARAMS *pCurrentTopo, 213 NVLINK_CE_AUTO_CONFIG_TABLE *pTable, 214 NvU32 autoConfigNumEntries, 215 NvU32 *pIdx, 216 NvU32 *pExposeCeMask 217 ) 218 { 219 NvU32 i; 220 221 // 222 // The auto config table entries will only be applicable 223 // from this function in SHH cases. Rather than 224 // introduced a new entry in the table to note SHH, 225 // in order to preserve backwards compatibility this 226 // function will only attempt to map if we are confirmed 227 // to be in SHH path. 228 // 229 if (!gpuIsSelfHosted(pGpu)) 230 { 231 return NV_FALSE; 232 } 233 234 for (i = 0; i < autoConfigNumEntries; i++) 235 { 236 if ((pTable[i].sysmemLinks == pCurrentTopo->sysmemLinks ) && 237 (pTable[i].maxLinksPerPeer == pCurrentTopo->maxLinksPerPeer) && 238 (pTable[i].bSymmetric == pCurrentTopo->bSymmetric ) && 239 (pTable[i].bSwitchConfig == pCurrentTopo->bSwitchConfig ) && 240 (pTable[i].numPeers == pCurrentTopo->numPeers )) 241 { 242 *pIdx = i; 243 *pExposeCeMask = pTable[i].exposeCeMask; 244 return NV_TRUE; 245 } 246 } 247 return NV_FALSE; 248 } 249 250 /** 251 * @brief This function returns the pceIndex for a particular link ID 252 * Must always be called with the hshub ID for the calling link ID 253 * 254 * @param[in] pGpu OBJGPU pointer 255 * @param[in] pKCe KernelCE pointer 256 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB 257 * @param[out] pceIndex Pointer to caller pceIndex 258 * @param[out] pHshubId Pointer to caller HSHUB ID 259 */ 260 static void 261 _ceGetAlgorithmPceIndex 262 ( 263 OBJGPU *pGpu, 264 KernelCE *pKCe, 265 NvU32 *pceAvailableMaskPerHshub, 266 NvU32 *pceIndex, 267 NvU8 *pHshubId 268 ) 269 { 270 NvU8 pHshubIdRequested; 271 NvU32 i; 272 273 if ((pceIndex != NULL) && *pceIndex >= kceGetPce2lceConfigSize1_HAL(pKCe)) 274 { 275 NV_PRINTF(LEVEL_ERROR, "Invalid PCE request. pceIndex = %d pceCnt = %d\n", *pceIndex, kceGetPce2lceConfigSize1_HAL(pKCe)); 276 return; 277 } 278 279 if (!(NVBIT32(*pceIndex) & pceAvailableMaskPerHshub[*pHshubId])) 280 { 281 // 282 // 1. We couldn't find an applicable strided PCE in given HSHUB 283 // So, we'll assign the next consecutive PCE on the same HSHUB 284 // 285 *pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[*pHshubId]); 286 if (!(NVBIT32(*pceIndex) & pceAvailableMaskPerHshub[*pHshubId])) 287 { 288 // 2. If this is not a valid PCE on given HSHUB, assign PCE from alternative HSHUB 289 pHshubIdRequested = *pHshubId; 290 for (i = pHshubIdRequested + 1; i != pHshubIdRequested; i++) { 291 if (i > 4) { 292 i = 1; 293 continue; 294 } 295 296 *pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[i]); 297 if (NVBIT32(*pceIndex) & pceAvailableMaskPerHshub[i]) { 298 break; 299 } 300 } 301 302 if (i == pHshubIdRequested) 303 { 304 // If we've reached this point, then we have no more available PCEs to assign 305 NV_PRINTF(LEVEL_ERROR, "No more available PCEs to assign!\n"); 306 NV_ASSERT(0); 307 } 308 } 309 } 310 return; 311 } 312 313 /** 314 * @brief This function assigns LCE 2 and 3 mappings for C2C cases. 315 * 316 * @param[in] pGpu OBJGPU pointer 317 * @param[in] pKCe KernelCE pointer 318 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB 319 * @param[out] pLocalPceLceMap Pointer to PCE-LCE array 320 * @param[out] pLocalExposeCeMask Pointer to LCE Mask 321 */ 322 NV_STATUS 323 kceMapPceLceForC2C_GH100 324 ( 325 OBJGPU *pGpu, 326 KernelCE *pKCe, 327 NvU32 *pceAvailableMaskPerHshub, 328 NvU32 *pLocalPceLceMap, 329 NvU32 *pLocalExposeCeMask 330 ) 331 { 332 NV_STATUS status = NV_OK; 333 KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu); 334 NvU32 pceIndex, i, hshubId, lceMask, lceIndex; 335 NvU32 numNvLinkPeers = 0; 336 NvU32 selectPcePerHshub = 2; 337 NvBool c2cEnabled = pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_IS_C2C_LINK_UP); 338 339 numNvLinkPeers = pKCe->nvlinkNumPeers; 340 if (gpuIsCCFeatureEnabled(pGpu) || (c2cEnabled && numNvLinkPeers == 0 && IS_MIG_IN_USE(pGpu))) 341 { 342 lceMask = NVBIT32(2) | NVBIT32(3); 343 *pLocalExposeCeMask |= lceMask; 344 345 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask); 346 pceIndex = NVBIT32(0); 347 pLocalPceLceMap[pceIndex] = lceIndex; 348 lceMask &= (~(NVBIT32(lceIndex))); 349 350 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask); 351 pceIndex = NVBIT32(1); 352 pLocalPceLceMap[pceIndex] = lceIndex; 353 } 354 else if (c2cEnabled && numNvLinkPeers == 0) 355 { 356 lceMask = NVBIT32(2); 357 *pLocalExposeCeMask |= lceMask; 358 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask); 359 360 for (hshubId = 2; hshubId < NV_CE_MAX_HSHUBS; hshubId++) 361 { 362 for (i = 0; i < selectPcePerHshub; i++) 363 { 364 pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[hshubId]); 365 if (pceIndex < kceGetPce2lceConfigSize1_HAL(pKCe)) 366 { 367 pceAvailableMaskPerHshub[hshubId] &= (~(NVBIT32(pceIndex))); 368 pLocalPceLceMap[pceIndex] = lceIndex; 369 } 370 } 371 } 372 373 lceMask = NVBIT32(4); 374 *pLocalExposeCeMask |= lceMask; 375 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask); 376 377 for (hshubId = 2; hshubId < NV_CE_MAX_HSHUBS; hshubId++) 378 { 379 for (i = 0; i < selectPcePerHshub; i++) 380 { 381 pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[hshubId]); 382 if (pceIndex < kceGetPce2lceConfigSize1_HAL(pKCe)) 383 { 384 pceAvailableMaskPerHshub[hshubId] &= (~(NVBIT32(pceIndex))); 385 pLocalPceLceMap[pceIndex] = lceIndex; 386 } 387 } 388 } 389 } 390 else 391 { 392 status = NV_WARN_NOTHING_TO_DO; 393 } 394 395 return status; 396 } 397 398 /** 399 * @brief This function checks for root port gen speed or GPU 400 * gen speed to determine if we should apply genX+ mapping 401 * or genX- mapping 402 * 403 * @param[in] pGpu OBJGPU pointer 404 * @param[in] pKCe KernelCE pointer 405 * @param[in] checkGen gen X for query 406 */ 407 NvBool 408 kceIsGenXorHigherSupported_GH100 409 ( 410 OBJGPU *pGpu, 411 KernelCE *pKCe, 412 NvU32 checkGen 413 ) 414 { 415 OBJSYS *pSys = SYS_GET_INSTANCE(); 416 OBJCL *pCl = SYS_GET_CL(pSys); 417 NvU8 genSpeed = 0; 418 NvU32 busSpeed = 0; 419 NV_STATUS status = NV_OK; 420 NvBool bIsGenXorHigher = NV_FALSE; 421 422 status = clPcieGetRootGenSpeed(pGpu, pCl, &genSpeed); 423 if (status != NV_OK) 424 { 425 NV_PRINTF(LEVEL_ERROR, "Could not get root gen speed - check for GPU gen speed!\n"); 426 // Check for GPU gen speed 427 if (GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_EP_PCFG_GPU_LINK_CONTROL_STATUS, &busSpeed) != NV_OK) 428 { 429 NV_PRINTF(LEVEL_ERROR, "Unable to read NV_EP_PCFG_GPU_LINK_CONTROL_STATUS from config space.\n"); 430 return bIsGenXorHigher; 431 } 432 genSpeed = GPU_DRF_VAL(_EP_PCFG_GPU, _LINK_CONTROL_STATUS, _CURRENT_LINK_SPEED, busSpeed); 433 } 434 NV_PRINTF(LEVEL_INFO, "Gen Speed = %d\n", genSpeed); 435 436 if ((genSpeed >= checkGen)) 437 { 438 bIsGenXorHigher = NV_TRUE; 439 } 440 441 return bIsGenXorHigher; 442 } 443 444 /** 445 * @brief This function assigns PCE-LCE mappings for GRCE LCEs 0 and 1. 446 * This function additionally takes care of mappings for LCE 2 and 3 447 * in the default case. 448 * 449 * @param[in] pGpu OBJGPU pointer 450 * @param[in] pKCe KernelCE pointer 451 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB 452 * @param[out] pLocalPceLceMap Pointer to PCE-LCE array 453 * @param[out] pLocalExposeCeMask Pointer to LCE Mask 454 */ 455 void 456 kceMapPceLceForGRCE_GH100 457 ( 458 OBJGPU *pGpu, 459 KernelCE *pKCe, 460 NvU32 *pceAvailableMaskPerHshub, 461 NvU32 *pLocalPceLceMap, 462 NvU32 *pLocalExposeCeMask, 463 NvU32 *pLocalGrceMap, 464 NvU32 fbPceMask 465 ) 466 { 467 KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu); 468 NvU32 grceIdx, pceIndex, i; 469 NvU32 lceIndex = 0; 470 NvU32 lceMask = 0; 471 NvU32 numNvLinkPeers = 0; 472 NvU32 grceMappings[NV_CE_NUM_FBPCE] = {12, 14, 13, 15}; 473 NvBool gen5OrHigher = kceIsGenXorHigherSupported_HAL(pGpu, pKCe, 5); 474 NvBool c2cEnabled = pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_IS_C2C_LINK_UP); 475 476 numNvLinkPeers = pKCe->nvlinkNumPeers; 477 478 if (gpuIsCCFeatureEnabled(pGpu) || (c2cEnabled && numNvLinkPeers == 0)) 479 { 480 lceMask = NVBIT32(0) | NVBIT32(1); 481 *pLocalExposeCeMask |= lceMask; 482 483 for (grceIdx = 0; grceIdx < NV_CE_MAX_GRCE; grceIdx++) 484 { 485 for (i = 0; i < NV_CE_MAX_PCE_PER_GRCE; i++) 486 { 487 pceIndex = grceMappings[grceIdx * 2 + i]; 488 489 // 490 // floorswept PCE or 491 // PCIe <= Gen4 experience high latency and requires a 492 // different mapping for LCE2 and LCE3 compared to Gen5. 493 // In PCIe <= Gen4 cases, only link 1 PCE to LCE by 494 // skipping every other PCE in the grceMappings array. 495 // 496 if (pceIndex == 0 || (!gen5OrHigher && (i % 2 == 1))) 497 continue; 498 499 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask); 500 pLocalPceLceMap[pceIndex] = lceIndex; 501 } 502 503 lceMask &= (~(NVBIT32(lceIndex))); 504 } 505 } 506 else 507 { 508 // Default case which will result in sharing LCE 2 and 3 with LCE 0 and 1 509 lceMask = NVBIT32(2) | NVBIT32(3); 510 *pLocalExposeCeMask |= lceMask; 511 for (grceIdx = 0; grceIdx < NV_CE_MAX_GRCE; grceIdx++) 512 { 513 for (i = 0; i < NV_CE_MAX_PCE_PER_GRCE; i++) 514 { 515 pceIndex = grceMappings[grceIdx * 2 + i]; 516 517 // floorswept PCE or account for PCIe latency in Gen <= 4 518 if (pceIndex == 0 || (!gen5OrHigher && (i % 2 == 1))) 519 continue; 520 521 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask); 522 pLocalPceLceMap[pceIndex] = lceIndex; 523 } 524 525 // update lceMask now that all PCEs are assigned to this LCE 526 lceMask &= (~(NVBIT32(lceIndex))); 527 } 528 529 // GRCE Cases 530 lceMask = kceGetGrceSupportedLceMask_HAL(pGpu, pKCe); 531 *pLocalExposeCeMask |= lceMask; 532 533 for (grceIdx = 0; grceIdx < NV_CE_MAX_GRCE; grceIdx++) 534 { 535 for (i = 0; i < NV_CE_MAX_PCE_PER_GRCE; i++) 536 { 537 pceIndex = grceMappings[grceIdx * 2 + i]; 538 fbPceMask &= (~(NVBIT32(pceIndex))); 539 540 // floorswept PCE 541 if (pceIndex == 0 || (!gen5OrHigher && (i % 2 == 1))) 542 continue; 543 544 // Sharing use case 545 if ((NVBIT32(pLocalPceLceMap[pceIndex])) & *pLocalExposeCeMask) 546 { 547 // GRCE is shared - set the status and shared LCE # in register field 548 lceIndex = pLocalPceLceMap[pceIndex]; 549 pLocalGrceMap[grceIdx] = DRF_NUM(_CE, _GRCE_CONFIG, _SHARED, 1) | 550 DRF_NUM(_CE, _GRCE_CONFIG, _SHARED_LCE, lceIndex); 551 } 552 else 553 { 554 // GRCE got its own FBHUB PCE 555 // Store the LCE in the associated PCE for GRCE 556 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask); 557 pLocalPceLceMap[pceIndex] = lceIndex; 558 // Reflect non-sharing status in register field 559 pLocalGrceMap[grceIdx] = DRF_NUM(_CE, _GRCE_CONFIG, _SHARED, 0) | 560 DRF_DEF(_CE, _GRCE_CONFIG, _SHARED_LCE, _NONE); 561 } 562 } 563 564 // update lceMask now that all PCEs are assigned to this LCE 565 lceMask &= (~(NVBIT32(lceIndex))); 566 } 567 } 568 } 569 570 /** 571 * @brief This function assigns PCE-LCE mappings for NVLink peers 572 * Based on HSHUBs that the links associated with a peer connect to, 573 * algorithm will attempt to assign a PCE from associated HSHUB taking into 574 * account striding as well. 575 * 576 * @param[in] pGpu OBJGPU pointer 577 * @param[in] pKCe KernelCE pointer 578 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB 579 * @param[out] pLocalPceLceMap Pointer to PCE-LCE array 580 * @param[out] pLocalExposeCeMask Pointer to LCE Mask 581 * 582 * Returns NV_OK if successful in assigning PCEs and LCEs for each of the NVLink peers 583 */ 584 NV_STATUS 585 kceMapPceLceForNvlinkPeers_GH100 586 ( 587 OBJGPU *pGpu, 588 KernelCE *pKCe, 589 NvU32 *pceAvailableMaskPerHshub, 590 NvU32 *pLocalPceLceMap, 591 NvU32 *pLocalExposeCeMask 592 ) 593 { 594 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 595 OBJSYS *pSys = SYS_GET_INSTANCE(); 596 NV_STATUS status = NV_OK; 597 NvU32 lceMask = 0; 598 NvU32 pceMask = 0; 599 NvU32 peerLinkMask = 0; 600 KernelCE *pKCeLce = NULL; 601 NvBool bPeerAssigned = NV_FALSE; 602 NvU32 peerAvailableLceMask = NV_CE_LCE_MASK_INIT; 603 OBJGPU *pRemoteGpu; 604 NvU32 numPcePerLink; 605 NvU32 lceIndex, pceIndex; 606 NvU8 hshubId = 0, i; 607 NvU32 linkId, gpuMask, gpuInstance = 0, j; 608 609 NV2080_CTRL_INTERNAL_HSHUB_GET_HSHUB_ID_FOR_LINKS_PARAMS params; 610 611 if (pKernelNvlink == NULL) 612 { 613 return NV_WARN_NOTHING_TO_DO; 614 } 615 616 peerAvailableLceMask = kceGetNvlinkPeerSupportedLceMask_HAL(pGpu, pKCe, peerAvailableLceMask); 617 pKCe->nvlinkNumPeers = 0; 618 619 if (knvlinkIsGpuConnectedToNvswitch(pGpu, pKernelNvlink)) 620 { 621 // 622 // On NVSwitch systems, we only create 1 aperture for all p2p connections. 623 // For PCE2LCE mapping, we should only assign 1 LCE for this connection. 624 // 625 // Since we mark the loopback connections in peerLinkMasks with the appropriate 626 // links (see _nvlinkUpdateSwitchLinkMasks), we can use that to calculate 627 // the PCE2LCE config. 628 // 629 gpuMask = NVBIT32(pGpu->gpuInstance); 630 } 631 else 632 { 633 // On direct connected systems, we'll loop over each GPU in the system 634 // and assign a peer LCE for each connection 635 (void)gpumgrGetGpuAttachInfo(NULL, &gpuMask); 636 } 637 638 while ((pRemoteGpu = gpumgrGetNextGpu(gpuMask, &gpuInstance)) != NULL) 639 { 640 NvU32 numLinksToPeer = knvlinkGetNumLinksToPeer(pGpu, pKernelNvlink, 641 pRemoteGpu); 642 NvU32 maxLceCnt = NV_CE_MAX_LCE_MASK; 643 644 if (numLinksToPeer == 0) 645 { 646 continue; 647 } 648 649 pceMask = 0; 650 lceMask = 0; 651 652 if (peerAvailableLceMask == 0) 653 { 654 // 655 // peerAvailableLceMask is initialized to even async LCEs at the 656 // top of the function. 657 // As a result, if at any point in the loop, this mask == 0, 658 // it implies we have used up all even async LCEs and should move to 659 // using odd async LCEs. 660 // 661 peerAvailableLceMask = kceGetNvlinkPeerSupportedLceMask_HAL(pGpu, pKCe, peerAvailableLceMask); 662 } 663 664 // Each peer gets 1 LCE 665 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(peerAvailableLceMask); 666 HIGHESTBITIDX_32(maxLceCnt); 667 if (lceIndex < maxLceCnt) 668 { 669 lceMask |= NVBIT32(lceIndex); 670 // Clear out the chosen LCE 671 peerAvailableLceMask &= (~(NVBIT32(lceIndex))); 672 } 673 674 pKCe->nvlinkNumPeers++; 675 676 peerLinkMask = knvlinkGetLinkMaskToPeer(pGpu, pKernelNvlink, pRemoteGpu); 677 if (peerLinkMask == 0) 678 { 679 NV_PRINTF(LEVEL_INFO, "GPU%d has nvlink disabled. Skip programming\n", pRemoteGpu->gpuInstance); 680 continue; 681 } 682 683 portMemSet(¶ms, 0, sizeof(params)); 684 params.linkMask = peerLinkMask; 685 686 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink, 687 NV2080_CTRL_CMD_INTERNAL_HSHUB_GET_HSHUB_ID_FOR_LINKS, 688 (void *)¶ms, sizeof(params)); 689 NV_ASSERT_OK_OR_RETURN(status); 690 691 // Iterate through links by HSHUB 692 NvU32 linksPerHshub[NV_CE_MAX_HSHUBS] = {0}; 693 694 FOR_EACH_INDEX_IN_MASK(32, linkId, peerLinkMask) 695 { 696 hshubId = params.hshubIds[linkId]; 697 // Update link count for this hshub 698 linksPerHshub[hshubId]++; 699 } 700 FOR_EACH_INDEX_IN_MASK_END; 701 702 for (i = 0; i < NV_CE_MAX_HSHUBS; i++) 703 { 704 if (linksPerHshub[i] == 0) 705 continue; 706 707 pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[i]); 708 numPcePerLink = kceGetNumPceRequired(linksPerHshub[i]); 709 710 for (j = 0; j < numPcePerLink; j++) 711 { 712 _ceGetAlgorithmPceIndex(pGpu, pKCe, pceAvailableMaskPerHshub, &pceIndex, &i); 713 pceMask |= NVBIT32(pceIndex); 714 // Clear out the assigned PCE 715 pceAvailableMaskPerHshub[i] &= (~(NVBIT32(pceIndex))); 716 } 717 718 } 719 720 // Now, assign the PCE-LCE association for the current peer 721 if (pceMask != 0) 722 { 723 // We just need at least one peer to set this to TRUE 724 bPeerAssigned = NV_TRUE; 725 726 FOR_EACH_INDEX_IN_MASK(32, pceIndex, pceMask) 727 { 728 pLocalPceLceMap[pceIndex] = lceIndex; 729 NV_PRINTF(LEVEL_INFO, "GPU%d <-> GPU%d PCE Index: %d LCE Index: %d\n", 730 pGpu->gpuInstance, pRemoteGpu->gpuInstance, pceIndex, lceIndex); 731 } 732 FOR_EACH_INDEX_IN_MASK_END; 733 734 // Store lceMask in the exposeCeMask before moving on 735 *pLocalExposeCeMask |= lceMask; 736 } 737 738 pKCeLce = GPU_GET_KCE(pGpu, lceIndex); 739 pKCeLce->nvlinkPeerMask |= NVBIT(pRemoteGpu->gpuInstance); 740 741 // 742 // Bug 200659256 - Looping over GPUs rather than peers (CL 28776130) 743 // does not handle multi-GPUs/Peer as is the case on switch systems. 744 // We must only take this loop once on switch systems to account for this. 745 // If we need to support multiple peer connections with switch systems 746 // in the future, this code must be revisited 747 // 748 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED)) 749 { 750 break; 751 } 752 753 } 754 755 if (bPeerAssigned == NV_FALSE) 756 { 757 status = NV_WARN_NOTHING_TO_DO; 758 } 759 760 return status; 761 } 762 763 /** 764 * @brief Some clients rely on LCE 4 also being turned on when there 765 * are no NVLink peers. This function sets up the default links. 766 * 767 * @param[in] pGpu OBJGPU pointer 768 * @param[in] pKCe KernelCE pointer 769 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB 770 * @param[out] pLocalPceLceMap Pointer to PCE-LCE array 771 * @param[out] pLocalExposeCeMask Pointer to LCE Mask 772 * 773 * Returns NV_OK if successful in assigning PCEs to a default async LCE (>= 4) 774 */ 775 NV_STATUS 776 kceMapAsyncLceDefault_GH100 777 ( 778 OBJGPU *pGpu, 779 KernelCE *pKCe, 780 NvU32 *pceAvailableMaskPerHshub, 781 NvU32 *pLocalPceLceMap, 782 NvU32 *pLocalExposeCeMask, 783 NvU32 numDefaultPces 784 ) 785 { 786 NvU32 peerAvailableLceMask = NV_CE_LCE_MASK_INIT; 787 NvU32 lceMask = 0; 788 NvU32 pceMask = 0; 789 NvU32 lceIndex, pceIndex, hshubId, i; 790 NvU32 maxLceCnt = NV_CE_MAX_LCE_MASK; 791 792 peerAvailableLceMask = kceGetNvlinkPeerSupportedLceMask_HAL(pGpu, pKCe, peerAvailableLceMask); 793 hshubId = 1; 794 795 // 796 // If no peers were found, then no async LCEs (>= 4) will be turned on. 797 // However, some clients rely on LCE 4 being present even without any 798 // NVLink peers being found. So, turn on the 1st available async LCE (>= 4) 799 // Reference bug 3042556 800 // 801 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(peerAvailableLceMask); 802 HIGHESTBITIDX_32(maxLceCnt); 803 if (lceIndex < maxLceCnt) 804 { 805 lceMask |= NVBIT32(lceIndex); 806 // Clear out the chosen LCE 807 peerAvailableLceMask &= (~(NVBIT32(lceIndex))); 808 } 809 810 // Assign PCEs to this LCE based on input request 811 for (i = 0; i < numDefaultPces; i++) 812 { 813 if (i % NV_CE_PCE_PER_HSHUB == 0) 814 hshubId++; 815 816 pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[hshubId]); 817 if (pceIndex < kceGetPce2lceConfigSize1_HAL(pKCe)) 818 { 819 pceMask |= NVBIT32(pceIndex); 820 pceAvailableMaskPerHshub[hshubId] &= (~(NVBIT32(pceIndex))); 821 } 822 } 823 824 FOR_EACH_INDEX_IN_MASK(32, pceIndex, pceMask) 825 { 826 pLocalPceLceMap[pceIndex] = lceIndex; 827 NV_PRINTF(LEVEL_INFO, "GPU%d <-> GPU%d PCE Index: %d LCE Index: %d\n", 828 pGpu->gpuInstance, pGpu->gpuInstance, pceIndex, lceIndex); 829 } 830 FOR_EACH_INDEX_IN_MASK_END; 831 832 // Store lceMask in the exposeCeMask before moving on 833 *pLocalExposeCeMask |= lceMask; 834 835 return NV_OK; 836 837 } 838 839 NV_STATUS 840 kceGetMappings_GH100 841 ( 842 OBJGPU *pGpu, 843 KernelCE *pKCe, 844 NVLINK_TOPOLOGY_PARAMS *pTopoParams, 845 NvU32 *pLocalPceLceMap, 846 NvU32 *pLocalGrceMap, 847 NvU32 *pExposeCeMask 848 ) 849 { 850 NV_STATUS status = NV_OK; 851 NV_STATUS statusC2C = NV_OK; 852 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu); 853 NvU32 topoIdx = NV_CE_INVALID_TOPO_IDX; 854 NvBool bEntryExists = NV_FALSE; 855 NvU32 pce2lceConfigSize1 = kceGetPce2lceConfigSize1_HAL(pKCe); 856 NvU32 grceConfigSize1 = kceGetGrceConfigSize1_HAL(pKCe); 857 NvU32 pceIdx, grceIdx; 858 859 // 860 // In the self hosted case, utilize table entries 861 // with pre defined mappings. Calling from the parent would result in 862 // using the incorrect autoconfig table so instead set the necessary 863 // values here if config is found in the table. 864 // 865 if (gpuIsSelfHosted(pGpu) && !(pGpu->getProperty(pGpu, PDB_PROP_GPU_SKIP_TABLE_CE_MAP))) 866 { 867 bEntryExists = kceGetAutoConfigTableEntry_HAL(pGpu, pKCe, pTopoParams, nvLinkCeAutoConfigTable_GH100, 868 NV_ARRAY_ELEMENTS(nvLinkCeAutoConfigTable_GH100), 869 &topoIdx, pExposeCeMask); 870 if (bEntryExists) 871 { 872 // Since entry exists, fill local variables with the associated table entry 873 for (pceIdx = 0; pceIdx < pce2lceConfigSize1; pceIdx++) 874 { 875 pLocalPceLceMap[pceIdx] = nvLinkCeAutoConfigTable_GH100[topoIdx].pceLceMap[pceIdx]; 876 } 877 for (grceIdx = 0; grceIdx < grceConfigSize1; grceIdx++) 878 { 879 pLocalGrceMap[grceIdx] = nvLinkCeAutoConfigTable_GH100[topoIdx].grceConfig[grceIdx]; 880 } 881 882 pTopoParams->maxTopoIdx = topoIdx; 883 pTopoParams->sysmemLinks = nvLinkCeAutoConfigTable_GH100[topoIdx].sysmemLinks; 884 pTopoParams->maxLinksPerPeer = nvLinkCeAutoConfigTable_GH100[topoIdx].maxLinksPerPeer; 885 pTopoParams->numPeers = nvLinkCeAutoConfigTable_GH100[topoIdx].numPeers; 886 pTopoParams->bSymmetric = nvLinkCeAutoConfigTable_GH100[topoIdx].bSymmetric; 887 pTopoParams->bSwitchConfig = nvLinkCeAutoConfigTable_GH100[topoIdx].bSwitchConfig; 888 889 return NV_OK; 890 } 891 892 } 893 894 //Prepare the per-HSHUB/FBHUB available PCE mask 895 kceGetAvailableHubPceMask(pGpu, pKCe, pTopoParams); 896 897 // Assign PCEs to "PEER"s if nvlink is enabled 898 if (pKernelNvlink && !knvlinkIsForcedConfig(pGpu, pKernelNvlink)) 899 { 900 status = kceMapPceLceForNvlinkPeers_HAL(pGpu, pKCe, 901 pTopoParams->pceAvailableMaskPerHshub, 902 pLocalPceLceMap, 903 pExposeCeMask); 904 } 905 else 906 { 907 status = NV_WARN_NOTHING_TO_DO; 908 } 909 910 // Special C2C cases for LCE 2 and 3 911 statusC2C = kceMapPceLceForC2C_HAL(pGpu, pKCe, 912 pTopoParams->pceAvailableMaskPerHshub, 913 pLocalPceLceMap, pExposeCeMask); 914 915 // Assign PCEs for GRCE case 916 kceMapPceLceForGRCE_HAL(pGpu, pKCe, 917 pTopoParams->pceAvailableMaskPerHshub, 918 pLocalPceLceMap, pExposeCeMask, pLocalGrceMap, pTopoParams->fbhubPceMask); 919 920 if ((status == NV_WARN_NOTHING_TO_DO && statusC2C == NV_WARN_NOTHING_TO_DO) || 921 (status == NV_ERR_NOT_SUPPORTED && statusC2C == NV_ERR_NOT_SUPPORTED)) 922 { 923 // If there's no NVLink peers available, still expose an additional async LCE 924 status = kceMapAsyncLceDefault_HAL(pGpu, pKCe, 925 pTopoParams->pceAvailableMaskPerHshub, 926 pLocalPceLceMap, 927 pExposeCeMask, 928 NV_CE_NUM_PCES_NO_LINK_CASE); 929 } 930 931 NV_PRINTF(LEVEL_INFO, "status = %d, statusC2C = %d\n", status, statusC2C); 932 return NV_OK; 933 } 934