1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "gpu/gpu.h"
25 #include "gpu/ce/kernel_ce.h"
26 #include "gpu/nvlink/kernel_nvlink.h"
27 #include "gpu/ce/kernel_ce_private.h"
28 #include "gpu/bif/kernel_bif.h"
29 #include "platform/chipset/chipset.h"
30
31 #include "published/hopper/gh100/dev_ce.h"
32 #include "published/hopper/gh100/dev_xtl_ep_pcfg_gpu.h"
33
34 #define NV_CE_INVALID_TOPO_IDX 0xFFFF
35
36 // Defines for PCE-LCE mapping algorithm
37 #define NV_CE_MAX_HSHUBS 5
38 #define NV_CE_LCE_MASK_INIT 0xFFFFFFFF
39 #define NV_CE_GRCE_ALLOWED_LCE_MASK 0x03
40 #define NV_CE_MAX_GRCE 2
41 #define NV_CE_EVEN_ASYNC_LCE_MASK 0x55555550
42 #define NV_CE_ODD_ASYNC_LCE_MASK 0xAAAAAAA0
43 #define NV_CE_MAX_LCE_MASK 0x3FF
44 #define NV_CE_PCE_PER_HSHUB 4
45 #define NV_CE_NUM_FBPCE 4
46 #define NV_CE_NUM_PCES_NO_LINK_CASE 12
47 #define NV_CE_MAX_PCE_PER_GRCE 2
48 #define NV_CE_HSHUBNVL_ID_0 2
49
50 /*
51 * Table for setting the PCE2LCE mapping for WAR configs that cannot be implemented
52 * using the algorithm because the config does not conform to the algorithm's set
53 * of requirements/assumptions
54 */
55 static NVLINK_CE_AUTO_CONFIG_TABLE nvLinkCeAutoConfigTable_GH100[] =
56 {
57 //
58 // #systmem #max #peers Symmetric Switch PCE-LCE GRCE exposeCe
59 // links (links/peer) Config? Config Map Config Mask
60
61 // Default minimal configuration - NOTE: do not add entrys before this
62 // Default is for CG1
63 {0x0, 0x0, 0x0, NV_FALSE, NV_FALSE, {0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6,
64 0x4,0x4,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53},
65 {0x0, 0x0, 0x0, NV_TRUE, NV_FALSE, {0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6,
66 0x4,0x4,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53},
67
68 // Switch cases - Ranger mapping
69 {0x0, 0x12, 0x1, NV_TRUE, NV_TRUE, {0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6,
70 0x4,0x4,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53},
71 {0x0, 0x6, 0x1, NV_TRUE, NV_TRUE, {0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6,
72 0x4,0x4,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53},
73 // CG4 mapping
74 {0x0, 0x6, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4,
75 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153},
76 {0x0, 0x6, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4,
77 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153},
78 {0x0, 0x6, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
79 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53},
80 {0x0, 0x6, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
81 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53},
82 {0x0, 0x6, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
83 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13},
84 {0x0, 0x6, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
85 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13},
86 {0x0, 0x5, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4,
87 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153},
88 {0x0, 0x5, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4,
89 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153},
90 {0x0, 0x5, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
91 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53},
92 {0x0, 0x5, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
93 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53},
94 {0x0, 0x5, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
95 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13},
96 {0x0, 0x5, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
97 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13},
98 {0x0, 0x4, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4,
99 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153},
100 {0x0, 0x4, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4,
101 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153},
102 {0x0, 0x4, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
103 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53},
104 {0x0, 0x4, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
105 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53},
106 {0x0, 0x4, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
107 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13},
108 {0x0, 0x4, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
109 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13},
110 {0x0, 0x3, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4,
111 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153},
112 {0x0, 0x3, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4,
113 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153},
114 {0x0, 0x3, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
115 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53},
116 {0x0, 0x3, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
117 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53},
118 {0x0, 0x3, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
119 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13},
120 {0x0, 0x3, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
121 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13},
122 {0x0, 0x2, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4,
123 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153},
124 {0x0, 0x2, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4,
125 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153},
126 {0x0, 0x2, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
127 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53},
128 {0x0, 0x2, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
129 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53},
130 {0x0, 0x2, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
131 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13},
132 {0x0, 0x2, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
133 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13},
134 {0x0, 0x1, 0x3, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4,
135 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153},
136 {0x0, 0x1, 0x3, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0x8,0x8,0x8,0x4,
137 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x153},
138 {0x0, 0x1, 0x2, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
139 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53},
140 {0x0, 0x1, 0x2, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
141 0x4,0x6,0x4,0x6,0x4,0x6,0x4,0x6}, {0x4,0x6}, 0x53},
142 {0x0, 0x1, 0x1, NV_TRUE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
143 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13},
144 {0x0, 0x1, 0x1, NV_FALSE, NV_FALSE, {0x4,0x4,0x4,0x4,0xF,0xF,0xF,0x4,
145 0x4,0xF,0x4,0xF,0x4,0xF,0x4,0xF}, {0x4,0xF}, 0x13}
146 };
147
148 /*!
149 * @brief Returns the size of the PCE2LCE register array
150 *
151 *
152 * @param[in] pGpu OBJGPU pointer
153 * @param[in] pCe OBJCE pointer
154 *
155 * @return NV_CE_PCE2LCE_CONFIG__SIZE_1
156 *
157 */
158 NvU32
kceGetPce2lceConfigSize1_GH100(KernelCE * pKCe)159 kceGetPce2lceConfigSize1_GH100
160 (
161 KernelCE *pKCe
162 )
163 {
164 return NV_CE_PCE2LCE_CONFIG__SIZE_1;
165 }
166
167 /**
168 * @brief This function takes in a link mask and returns the minimum number
169 * of PCE connections required. This is decided based on a round up approach
170 * where each PCE can handle 1.5 links.
171 */
172 NvU32
kceGetNumPceRequired(NvU32 numLinks)173 kceGetNumPceRequired
174 (
175 NvU32 numLinks
176 )
177 {
178 switch(numLinks)
179 {
180 case 6:
181 return 4;
182 case 5:
183 case 4:
184 return 3;
185 case 3:
186 return 2;
187 case 2:
188 case 1:
189 default:
190 return 1;
191 }
192 }
193
194 /*
195 * Look up entry in NVLINK_CE_AUTO_CONFIG_TABLE
196 *
197 * @param[in] pGpu OBJGPU pointer
198 * @param[in] pCe OBJCE pointer
199 * @param[in] pCurrentTopo NVLINK_TOPOLOGY_INFO pointer
200 * @param[in] pAutoConfigTable NVLINK_CE_AUTO_CONFIG_TABLE pointer
201 * @param[in] autoConfigNumEntries NvU32 num entries within pAutoConfigTable
202 * @param[out] pIdx NvU32 pointer
203 * @param[out] pExposeCeMask NvU32 pointer
204 *
205 * Returns: NV_TRUE if entry is found
206 * NV_FALSE otheriwse
207 */
208 NvBool
kceGetAutoConfigTableEntry_GH100(OBJGPU * pGpu,KernelCE * pKCe,NVLINK_TOPOLOGY_PARAMS * pCurrentTopo,NVLINK_CE_AUTO_CONFIG_TABLE * pTable,NvU32 autoConfigNumEntries,NvU32 * pIdx,NvU32 * pExposeCeMask)209 kceGetAutoConfigTableEntry_GH100
210 (
211 OBJGPU *pGpu,
212 KernelCE *pKCe,
213 NVLINK_TOPOLOGY_PARAMS *pCurrentTopo,
214 NVLINK_CE_AUTO_CONFIG_TABLE *pTable,
215 NvU32 autoConfigNumEntries,
216 NvU32 *pIdx,
217 NvU32 *pExposeCeMask
218 )
219 {
220 NvU32 i;
221
222 //
223 // The auto config table entries will only be applicable
224 // from this function in SHH cases. Rather than
225 // introduced a new entry in the table to note SHH,
226 // in order to preserve backwards compatibility this
227 // function will only attempt to map if we are confirmed
228 // to be in SHH path.
229 //
230 if (!gpuIsSelfHosted(pGpu))
231 {
232 return NV_FALSE;
233 }
234
235 for (i = 0; i < autoConfigNumEntries; i++)
236 {
237 if ((pTable[i].sysmemLinks == pCurrentTopo->sysmemLinks ) &&
238 (pTable[i].maxLinksPerPeer == pCurrentTopo->maxLinksPerPeer) &&
239 (pTable[i].bSymmetric == pCurrentTopo->bSymmetric ) &&
240 (pTable[i].bSwitchConfig == pCurrentTopo->bSwitchConfig ) &&
241 (pTable[i].numPeers == pCurrentTopo->numPeers ))
242 {
243 *pIdx = i;
244 *pExposeCeMask = pTable[i].exposeCeMask;
245 return NV_TRUE;
246 }
247 }
248 return NV_FALSE;
249 }
250
251 /**
252 * @brief This function returns the pceIndex for a particular link ID
253 * Must always be called with the hshub ID for the calling link ID
254 *
255 * @param[in] pGpu OBJGPU pointer
256 * @param[in] pKCe KernelCE pointer
257 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB
258 * @param[out] pceIndex Pointer to caller pceIndex
259 * @param[out] pHshubId Pointer to caller HSHUB ID
260 */
261 static void
_ceGetAlgorithmPceIndex(OBJGPU * pGpu,KernelCE * pKCe,NvU32 * pceAvailableMaskPerHshub,NvU32 * pceIndex,NvU8 * pHshubId)262 _ceGetAlgorithmPceIndex
263 (
264 OBJGPU *pGpu,
265 KernelCE *pKCe,
266 NvU32 *pceAvailableMaskPerHshub,
267 NvU32 *pceIndex,
268 NvU8 *pHshubId
269 )
270 {
271 NvU8 pHshubIdRequested;
272 NvU32 i;
273
274 if ((pceIndex != NULL) && *pceIndex >= kceGetPce2lceConfigSize1_HAL(pKCe))
275 {
276 NV_PRINTF(LEVEL_ERROR, "Invalid PCE request. pceIndex = %d pceCnt = %d\n", *pceIndex, kceGetPce2lceConfigSize1_HAL(pKCe));
277 return;
278 }
279
280 if (!(NVBIT32(*pceIndex) & pceAvailableMaskPerHshub[*pHshubId]))
281 {
282 //
283 // 1. We couldn't find an applicable strided PCE in given HSHUB
284 // So, we'll assign the next consecutive PCE on the same HSHUB
285 //
286 *pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[*pHshubId]);
287 if (!(NVBIT32(*pceIndex) & pceAvailableMaskPerHshub[*pHshubId]))
288 {
289 // 2. If this is not a valid PCE on given HSHUB, assign PCE from alternative HSHUB
290 pHshubIdRequested = *pHshubId;
291 for (i = pHshubIdRequested + 1; i != pHshubIdRequested; i++) {
292 if (i > 4) {
293 i = 1;
294 continue;
295 }
296
297 *pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[i]);
298 if (NVBIT32(*pceIndex) & pceAvailableMaskPerHshub[i]) {
299 break;
300 }
301 }
302
303 if (i == pHshubIdRequested)
304 {
305 // If we've reached this point, then we have no more available PCEs to assign
306 NV_PRINTF(LEVEL_ERROR, "No more available PCEs to assign!\n");
307 NV_ASSERT(0);
308 }
309 }
310 }
311 return;
312 }
313
314 /**
315 * @brief This function assigns LCE 2 and 3 mappings for C2C cases.
316 *
317 * @param[in] pGpu OBJGPU pointer
318 * @param[in] pKCe KernelCE pointer
319 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB
320 * @param[out] pLocalPceLceMap Pointer to PCE-LCE array
321 * @param[out] pLocalExposeCeMask Pointer to LCE Mask
322 */
323 NV_STATUS
kceMapPceLceForC2C_GH100(OBJGPU * pGpu,KernelCE * pKCe,NvU32 * pceAvailableMaskPerHshub,NvU32 * pLocalPceLceMap,NvU32 * pLocalExposeCeMask)324 kceMapPceLceForC2C_GH100
325 (
326 OBJGPU *pGpu,
327 KernelCE *pKCe,
328 NvU32 *pceAvailableMaskPerHshub,
329 NvU32 *pLocalPceLceMap,
330 NvU32 *pLocalExposeCeMask
331 )
332 {
333 NV_STATUS status = NV_OK;
334 KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
335 NvU32 pceIndex, i, hshubId, lceMask, lceIndex;
336 NvU32 numNvLinkPeers = 0;
337 NvU32 selectPcePerHshub = 2;
338 NvBool c2cEnabled = pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_IS_C2C_LINK_UP);
339
340 numNvLinkPeers = pKCe->nvlinkNumPeers;
341 if (gpuIsCCFeatureEnabled(pGpu) || (c2cEnabled && numNvLinkPeers == 0 && IS_MIG_IN_USE(pGpu)))
342 {
343 lceMask = NVBIT32(2) | NVBIT32(3);
344 *pLocalExposeCeMask |= lceMask;
345
346 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask);
347 pceIndex = NVBIT32(0);
348 pLocalPceLceMap[pceIndex] = lceIndex;
349 lceMask &= (~(NVBIT32(lceIndex)));
350
351 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask);
352 pceIndex = NVBIT32(1);
353 pLocalPceLceMap[pceIndex] = lceIndex;
354 }
355 else if (c2cEnabled && numNvLinkPeers == 0)
356 {
357 lceMask = NVBIT32(2);
358 *pLocalExposeCeMask |= lceMask;
359 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask);
360
361 for (hshubId = 2; hshubId < NV_CE_MAX_HSHUBS; hshubId++)
362 {
363 for (i = 0; i < selectPcePerHshub; i++)
364 {
365 pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[hshubId]);
366 if (pceIndex < kceGetPce2lceConfigSize1_HAL(pKCe))
367 {
368 pceAvailableMaskPerHshub[hshubId] &= (~(NVBIT32(pceIndex)));
369 pLocalPceLceMap[pceIndex] = lceIndex;
370 }
371 }
372 }
373
374 lceMask = NVBIT32(4);
375 *pLocalExposeCeMask |= lceMask;
376 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask);
377
378 for (hshubId = 2; hshubId < NV_CE_MAX_HSHUBS; hshubId++)
379 {
380 for (i = 0; i < selectPcePerHshub; i++)
381 {
382 pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[hshubId]);
383 if (pceIndex < kceGetPce2lceConfigSize1_HAL(pKCe))
384 {
385 pceAvailableMaskPerHshub[hshubId] &= (~(NVBIT32(pceIndex)));
386 pLocalPceLceMap[pceIndex] = lceIndex;
387 }
388 }
389 }
390 }
391 else
392 {
393 status = NV_WARN_NOTHING_TO_DO;
394 }
395
396 return status;
397 }
398
399 /**
400 * @brief This function checks for root port gen speed or GPU
401 * gen speed to determine if we should apply genX+ mapping
402 * or genX- mapping
403 *
404 * @param[in] pGpu OBJGPU pointer
405 * @param[in] pKCe KernelCE pointer
406 * @param[in] checkGen gen X for query
407 */
408 NvBool
kceIsGenXorHigherSupported_GH100(OBJGPU * pGpu,KernelCE * pKCe,NvU32 checkGen)409 kceIsGenXorHigherSupported_GH100
410 (
411 OBJGPU *pGpu,
412 KernelCE *pKCe,
413 NvU32 checkGen
414 )
415 {
416 OBJSYS *pSys = SYS_GET_INSTANCE();
417 OBJCL *pCl = SYS_GET_CL(pSys);
418 NvU8 genSpeed = 0;
419 NvU32 busSpeed = 0;
420 NV_STATUS status = NV_OK;
421 NvBool bIsGenXorHigher = NV_FALSE;
422
423 status = clPcieGetRootGenSpeed(pGpu, pCl, &genSpeed);
424 if (status != NV_OK)
425 {
426 NV_PRINTF(LEVEL_ERROR, "Could not get root gen speed - check for GPU gen speed!\n");
427 // Check for GPU gen speed
428 if (GPU_BUS_CFG_CYCLE_RD32(pGpu, NV_EP_PCFG_GPU_LINK_CONTROL_STATUS, &busSpeed) != NV_OK)
429 {
430 NV_PRINTF(LEVEL_ERROR, "Unable to read NV_EP_PCFG_GPU_LINK_CONTROL_STATUS from config space.\n");
431 return bIsGenXorHigher;
432 }
433 genSpeed = GPU_DRF_VAL(_EP_PCFG_GPU, _LINK_CONTROL_STATUS, _CURRENT_LINK_SPEED, busSpeed);
434 }
435 NV_PRINTF(LEVEL_INFO, "Gen Speed = %d\n", genSpeed);
436
437 if ((genSpeed >= checkGen))
438 {
439 bIsGenXorHigher = NV_TRUE;
440 }
441
442 return bIsGenXorHigher;
443 }
444
445 /**
446 * @brief This function assigns PCE-LCE mappings for GRCE LCEs 0 and 1.
447 * This function additionally takes care of mappings for LCE 2 and 3
448 * in the default case.
449 *
450 * @param[in] pGpu OBJGPU pointer
451 * @param[in] pKCe KernelCE pointer
452 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB
453 * @param[out] pLocalPceLceMap Pointer to PCE-LCE array
454 * @param[out] pLocalExposeCeMask Pointer to LCE Mask
455 */
456 void
kceMapPceLceForGRCE_GH100(OBJGPU * pGpu,KernelCE * pKCe,NvU32 * pceAvailableMaskPerHshub,NvU32 * pLocalPceLceMap,NvU32 * pLocalExposeCeMask,NvU32 * pLocalGrceMap,NvU32 fbPceMask)457 kceMapPceLceForGRCE_GH100
458 (
459 OBJGPU *pGpu,
460 KernelCE *pKCe,
461 NvU32 *pceAvailableMaskPerHshub,
462 NvU32 *pLocalPceLceMap,
463 NvU32 *pLocalExposeCeMask,
464 NvU32 *pLocalGrceMap,
465 NvU32 fbPceMask
466 )
467 {
468 KernelBif *pKernelBif = GPU_GET_KERNEL_BIF(pGpu);
469 NvU32 grceIdx, pceIndex, i;
470 NvU32 lceIndex = 0;
471 NvU32 lceMask = 0;
472 NvU32 numNvLinkPeers = 0;
473 NvU32 grceMappings[NV_CE_NUM_FBPCE] = {12, 14, 13, 15};
474 NvBool gen5OrHigher = kceIsGenXorHigherSupported_HAL(pGpu, pKCe, 5);
475 NvBool c2cEnabled = pKernelBif->getProperty(pKernelBif, PDB_PROP_KBIF_IS_C2C_LINK_UP);
476
477 numNvLinkPeers = pKCe->nvlinkNumPeers;
478
479 if (gpuIsCCFeatureEnabled(pGpu) || (c2cEnabled && numNvLinkPeers == 0))
480 {
481 lceMask = NVBIT32(0) | NVBIT32(1);
482 *pLocalExposeCeMask |= lceMask;
483
484 for (grceIdx = 0; grceIdx < NV_CE_MAX_GRCE; grceIdx++)
485 {
486 for (i = 0; i < NV_CE_MAX_PCE_PER_GRCE; i++)
487 {
488 pceIndex = grceMappings[grceIdx * 2 + i];
489
490 //
491 // floorswept PCE or
492 // PCIe <= Gen4 experience high latency and requires a
493 // different mapping for LCE2 and LCE3 compared to Gen5.
494 // In PCIe <= Gen4 cases, only link 1 PCE to LCE by
495 // skipping every other PCE in the grceMappings array.
496 //
497 if (pceIndex == 0 || (!gen5OrHigher && (i % 2 == 1)))
498 continue;
499
500 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask);
501 pLocalPceLceMap[pceIndex] = lceIndex;
502 }
503
504 lceMask &= (~(NVBIT32(lceIndex)));
505 }
506 }
507 else
508 {
509 // Default case which will result in sharing LCE 2 and 3 with LCE 0 and 1
510 lceMask = NVBIT32(2) | NVBIT32(3);
511 *pLocalExposeCeMask |= lceMask;
512 for (grceIdx = 0; grceIdx < NV_CE_MAX_GRCE; grceIdx++)
513 {
514 for (i = 0; i < NV_CE_MAX_PCE_PER_GRCE; i++)
515 {
516 pceIndex = grceMappings[grceIdx * 2 + i];
517
518 // floorswept PCE or account for PCIe latency in Gen <= 4
519 if (pceIndex == 0 || (!gen5OrHigher && (i % 2 == 1)))
520 continue;
521
522 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask);
523 pLocalPceLceMap[pceIndex] = lceIndex;
524 }
525
526 // update lceMask now that all PCEs are assigned to this LCE
527 lceMask &= (~(NVBIT32(lceIndex)));
528 }
529
530 // GRCE Cases
531 lceMask = kceGetGrceSupportedLceMask_HAL(pGpu, pKCe);
532 *pLocalExposeCeMask |= lceMask;
533
534 for (grceIdx = 0; grceIdx < NV_CE_MAX_GRCE; grceIdx++)
535 {
536 for (i = 0; i < NV_CE_MAX_PCE_PER_GRCE; i++)
537 {
538 pceIndex = grceMappings[grceIdx * 2 + i];
539 fbPceMask &= (~(NVBIT32(pceIndex)));
540
541 // floorswept PCE
542 if (pceIndex == 0 || (!gen5OrHigher && (i % 2 == 1)))
543 continue;
544
545 // Sharing use case
546 if ((NVBIT32(pLocalPceLceMap[pceIndex])) & *pLocalExposeCeMask)
547 {
548 // GRCE is shared - set the status and shared LCE # in register field
549 lceIndex = pLocalPceLceMap[pceIndex];
550 pLocalGrceMap[grceIdx] = DRF_NUM(_CE, _GRCE_CONFIG, _SHARED, 1) |
551 DRF_NUM(_CE, _GRCE_CONFIG, _SHARED_LCE, lceIndex);
552 }
553 else
554 {
555 // GRCE got its own FBHUB PCE
556 // Store the LCE in the associated PCE for GRCE
557 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask);
558 pLocalPceLceMap[pceIndex] = lceIndex;
559 // Reflect non-sharing status in register field
560 pLocalGrceMap[grceIdx] = DRF_NUM(_CE, _GRCE_CONFIG, _SHARED, 0) |
561 DRF_DEF(_CE, _GRCE_CONFIG, _SHARED_LCE, _NONE);
562 }
563 }
564
565 // update lceMask now that all PCEs are assigned to this LCE
566 lceMask &= (~(NVBIT32(lceIndex)));
567 }
568 }
569 }
570
571 /**
572 * @brief This function performs PCE-LCE mappings in the CC case where
573 * a 1-1 mapping is applied.
574 *
575 * @param[in] pGpu OBJGPU pointer
576 * @param[in] pKCe KernelCE pointer
577 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB
578 * @param[out] pLocalPceLceMap Pointer to PCE-LCE array
579 * @param[out] pLocalExposeCeMask Pointer to LCE Mask
580 */
581 static void
kceMapPceLceForCC(OBJGPU * pGpu,KernelCE * pKCe,NvU32 * pceAvailableMaskPerHshub,NvU32 * pLocalPceLceMap,NvU32 * pLocalGrceMap,NvU32 * pLocalExposeCeMask)582 kceMapPceLceForCC
583 (
584 OBJGPU *pGpu,
585 KernelCE *pKCe,
586 NvU32 *pceAvailableMaskPerHshub,
587 NvU32 *pLocalPceLceMap,
588 NvU32 *pLocalGrceMap,
589 NvU32 *pLocalExposeCeMask
590 )
591 {
592 NvU32 pceIndex = 0;
593 NvU32 maxLceIdx, lceMask, lceIndex, grceIdx;
594
595 // Apply 1-1 mapping for async LCEs
596 lceMask = NV_CE_MAX_LCE_MASK & (~kceGetGrceSupportedLceMask_HAL(pGpu, pKCe));
597 maxLceIdx = lceMask;
598 HIGHESTBITIDX_32(maxLceIdx);
599 *pLocalExposeCeMask |= lceMask;
600 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask);
601 for(; lceIndex <= maxLceIdx; lceIndex++)
602 {
603 pLocalPceLceMap[pceIndex] = lceIndex;
604 lceMask &= (~(NVBIT32(lceIndex)));
605 pceIndex++;
606 }
607
608
609 // Map GRCEs as non sharing. At this point, no PCEs have been mapped
610 lceMask = kceGetGrceSupportedLceMask_HAL(pGpu, pKCe);
611 *pLocalExposeCeMask |= lceMask;
612 for (grceIdx = 0; grceIdx < NV_CE_MAX_GRCE; grceIdx++)
613 {
614 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(lceMask);
615 if (((NVBIT32(lceIndex) & NV_CE_MAX_LCE_MASK) != 0) && (pceIndex < kceGetPce2lceConfigSize1_HAL(pKCe)))
616 {
617 pLocalPceLceMap[pceIndex] = lceIndex;
618 lceMask &= (~(NVBIT32(lceIndex)));
619 pLocalGrceMap[grceIdx] = DRF_NUM(_CE, _GRCE_CONFIG, _SHARED, 0) |
620 DRF_DEF(_CE, _GRCE_CONFIG, _SHARED_LCE, _NONE);
621 pceIndex++;
622 }
623 }
624 }
625
626 /**
627 * @brief This function assigns PCE-LCE mappings for NVLink peers
628 * Based on HSHUBs that the links associated with a peer connect to,
629 * algorithm will attempt to assign a PCE from associated HSHUB taking into
630 * account striding as well.
631 *
632 * @param[in] pGpu OBJGPU pointer
633 * @param[in] pKCe KernelCE pointer
634 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB
635 * @param[out] pLocalPceLceMap Pointer to PCE-LCE array
636 * @param[out] pLocalExposeCeMask Pointer to LCE Mask
637 *
638 * Returns NV_OK if successful in assigning PCEs and LCEs for each of the NVLink peers
639 */
640 NV_STATUS
kceMapPceLceForNvlinkPeers_GH100(OBJGPU * pGpu,KernelCE * pKCe,NvU32 * pceAvailableMaskPerHshub,NvU32 * pLocalPceLceMap,NvU32 * pLocalExposeCeMask)641 kceMapPceLceForNvlinkPeers_GH100
642 (
643 OBJGPU *pGpu,
644 KernelCE *pKCe,
645 NvU32 *pceAvailableMaskPerHshub,
646 NvU32 *pLocalPceLceMap,
647 NvU32 *pLocalExposeCeMask
648 )
649 {
650 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu);
651 OBJSYS *pSys = SYS_GET_INSTANCE();
652 NV_STATUS status = NV_OK;
653 NvU32 lceMask = 0;
654 NvU32 pceMask = 0;
655 NvU32 peerLinkMask = 0;
656 KernelCE *pKCeLce = NULL;
657 NvBool bPeerAssigned = NV_FALSE;
658 NvU32 peerAvailableLceMask = NV_CE_LCE_MASK_INIT;
659 OBJGPU *pRemoteGpu;
660 NvU32 numPcePerLink;
661 NvU32 lceIndex, pceIndex;
662 NvU8 hshubId = 0, i;
663 NvU32 linkId, gpuMask, gpuInstance = 0, j;
664
665 NV2080_CTRL_INTERNAL_HSHUB_GET_HSHUB_ID_FOR_LINKS_PARAMS params;
666
667 if (pKernelNvlink == NULL)
668 {
669 return NV_WARN_NOTHING_TO_DO;
670 }
671
672 peerAvailableLceMask = kceGetNvlinkPeerSupportedLceMask_HAL(pGpu, pKCe, peerAvailableLceMask);
673 pKCe->nvlinkNumPeers = 0;
674
675 if (knvlinkIsGpuConnectedToNvswitch(pGpu, pKernelNvlink))
676 {
677 //
678 // On NVSwitch systems, we only create 1 aperture for all p2p connections.
679 // For PCE2LCE mapping, we should only assign 1 LCE for this connection.
680 //
681 // Since we mark the loopback connections in peerLinkMasks with the appropriate
682 // links (see _nvlinkUpdateSwitchLinkMasks), we can use that to calculate
683 // the PCE2LCE config.
684 //
685 gpuMask = NVBIT32(pGpu->gpuInstance);
686 }
687 else
688 {
689 // On direct connected systems, we'll loop over each GPU in the system
690 // and assign a peer LCE for each connection
691 (void)gpumgrGetGpuAttachInfo(NULL, &gpuMask);
692 }
693
694 while ((pRemoteGpu = gpumgrGetNextGpu(gpuMask, &gpuInstance)) != NULL)
695 {
696 NvU32 numLinksToPeer = knvlinkGetNumLinksToPeer(pGpu, pKernelNvlink,
697 pRemoteGpu);
698 NvU32 maxLceCnt = NV_CE_MAX_LCE_MASK;
699
700 if (numLinksToPeer == 0)
701 {
702 continue;
703 }
704
705 pceMask = 0;
706 lceMask = 0;
707
708 if (peerAvailableLceMask == 0)
709 {
710 //
711 // peerAvailableLceMask is initialized to even async LCEs at the
712 // top of the function.
713 // As a result, if at any point in the loop, this mask == 0,
714 // it implies we have used up all even async LCEs and should move to
715 // using odd async LCEs.
716 //
717 peerAvailableLceMask = kceGetNvlinkPeerSupportedLceMask_HAL(pGpu, pKCe, peerAvailableLceMask);
718 }
719
720 // Each peer gets 1 LCE
721 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(peerAvailableLceMask);
722 HIGHESTBITIDX_32(maxLceCnt);
723 if (lceIndex < maxLceCnt)
724 {
725 lceMask |= NVBIT32(lceIndex);
726 // Clear out the chosen LCE
727 peerAvailableLceMask &= (~(NVBIT32(lceIndex)));
728 }
729
730 pKCe->nvlinkNumPeers++;
731
732 peerLinkMask = knvlinkGetLinkMaskToPeer(pGpu, pKernelNvlink, pRemoteGpu);
733 if (peerLinkMask == 0)
734 {
735 NV_PRINTF(LEVEL_INFO, "GPU%d has nvlink disabled. Skip programming\n", pRemoteGpu->gpuInstance);
736 continue;
737 }
738
739 portMemSet(¶ms, 0, sizeof(params));
740 params.linkMask = peerLinkMask;
741
742 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
743 NV2080_CTRL_CMD_INTERNAL_HSHUB_GET_HSHUB_ID_FOR_LINKS,
744 (void *)¶ms, sizeof(params));
745 NV_ASSERT_OK_OR_RETURN(status);
746
747 // Iterate through links by HSHUB
748 NvU32 linksPerHshub[NV_CE_MAX_HSHUBS] = {0};
749
750 FOR_EACH_INDEX_IN_MASK(32, linkId, peerLinkMask)
751 {
752 hshubId = params.hshubIds[linkId];
753 // Update link count for this hshub
754 linksPerHshub[hshubId]++;
755 }
756 FOR_EACH_INDEX_IN_MASK_END;
757
758 for (i = 0; i < NV_CE_MAX_HSHUBS; i++)
759 {
760 if (linksPerHshub[i] == 0)
761 continue;
762
763 pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[i]);
764 numPcePerLink = kceGetNumPceRequired(linksPerHshub[i]);
765
766 for (j = 0; j < numPcePerLink; j++)
767 {
768 _ceGetAlgorithmPceIndex(pGpu, pKCe, pceAvailableMaskPerHshub, &pceIndex, &i);
769 pceMask |= NVBIT32(pceIndex);
770 // Clear out the assigned PCE
771 pceAvailableMaskPerHshub[i] &= (~(NVBIT32(pceIndex)));
772 }
773
774 }
775
776 // Now, assign the PCE-LCE association for the current peer
777 if (pceMask != 0)
778 {
779 // We just need at least one peer to set this to TRUE
780 bPeerAssigned = NV_TRUE;
781
782 FOR_EACH_INDEX_IN_MASK(32, pceIndex, pceMask)
783 {
784 pLocalPceLceMap[pceIndex] = lceIndex;
785 NV_PRINTF(LEVEL_INFO, "GPU%d <-> GPU%d PCE Index: %d LCE Index: %d\n",
786 pGpu->gpuInstance, pRemoteGpu->gpuInstance, pceIndex, lceIndex);
787 }
788 FOR_EACH_INDEX_IN_MASK_END;
789
790 // Store lceMask in the exposeCeMask before moving on
791 *pLocalExposeCeMask |= lceMask;
792 }
793
794 pKCeLce = GPU_GET_KCE(pGpu, lceIndex);
795 pKCeLce->nvlinkPeerMask |= NVBIT(pRemoteGpu->gpuInstance);
796
797 //
798 // Bug 200659256 - Looping over GPUs rather than peers (CL 28776130)
799 // does not handle multi-GPUs/Peer as is the case on switch systems.
800 // We must only take this loop once on switch systems to account for this.
801 // If we need to support multiple peer connections with switch systems
802 // in the future, this code must be revisited
803 //
804 if (pSys->getProperty(pSys, PDB_PROP_SYS_FABRIC_IS_EXTERNALLY_MANAGED))
805 {
806 break;
807 }
808
809 }
810
811 if (bPeerAssigned == NV_FALSE)
812 {
813 status = NV_WARN_NOTHING_TO_DO;
814 }
815
816 return status;
817 }
818
819 /**
820 * @brief Some clients rely on LCE 4 also being turned on when there
821 * are no NVLink peers. This function sets up the default links.
822 *
823 * @param[in] pGpu OBJGPU pointer
824 * @param[in] pKCe KernelCE pointer
825 * @param[in] pceAvailableMaskPerHshub Pointer to CEs available per HSHUB
826 * @param[out] pLocalPceLceMap Pointer to PCE-LCE array
827 * @param[out] pLocalExposeCeMask Pointer to LCE Mask
828 *
829 * Returns NV_OK if successful in assigning PCEs to a default async LCE (>= 4)
830 */
831 NV_STATUS
kceMapAsyncLceDefault_GH100(OBJGPU * pGpu,KernelCE * pKCe,NvU32 * pceAvailableMaskPerHshub,NvU32 * pLocalPceLceMap,NvU32 * pLocalExposeCeMask,NvU32 numDefaultPces)832 kceMapAsyncLceDefault_GH100
833 (
834 OBJGPU *pGpu,
835 KernelCE *pKCe,
836 NvU32 *pceAvailableMaskPerHshub,
837 NvU32 *pLocalPceLceMap,
838 NvU32 *pLocalExposeCeMask,
839 NvU32 numDefaultPces
840 )
841 {
842 NvU32 peerAvailableLceMask = NV_CE_LCE_MASK_INIT;
843 NvU32 lceMask = 0;
844 NvU32 pceMask = 0;
845 NvU32 lceIndex, pceIndex, hshubId, i;
846 NvU32 maxLceCnt = NV_CE_MAX_LCE_MASK;
847
848 peerAvailableLceMask = kceGetNvlinkPeerSupportedLceMask_HAL(pGpu, pKCe, peerAvailableLceMask);
849 hshubId = 1;
850
851 //
852 // If no peers were found, then no async LCEs (>= 4) will be turned on.
853 // However, some clients rely on LCE 4 being present even without any
854 // NVLink peers being found. So, turn on the 1st available async LCE (>= 4)
855 // Reference bug 3042556
856 //
857 lceIndex = CE_GET_LOWEST_AVAILABLE_IDX(peerAvailableLceMask);
858 HIGHESTBITIDX_32(maxLceCnt);
859 if (lceIndex < maxLceCnt)
860 {
861 lceMask |= NVBIT32(lceIndex);
862 // Clear out the chosen LCE
863 peerAvailableLceMask &= (~(NVBIT32(lceIndex)));
864 }
865
866 // Assign PCEs to this LCE based on input request
867 for (i = 0; i < numDefaultPces; i++)
868 {
869 if (i % NV_CE_PCE_PER_HSHUB == 0)
870 hshubId++;
871
872 pceIndex = CE_GET_LOWEST_AVAILABLE_IDX(pceAvailableMaskPerHshub[hshubId]);
873 if (pceIndex < kceGetPce2lceConfigSize1_HAL(pKCe))
874 {
875 pceMask |= NVBIT32(pceIndex);
876 pceAvailableMaskPerHshub[hshubId] &= (~(NVBIT32(pceIndex)));
877 }
878 }
879
880 FOR_EACH_INDEX_IN_MASK(32, pceIndex, pceMask)
881 {
882 pLocalPceLceMap[pceIndex] = lceIndex;
883 NV_PRINTF(LEVEL_INFO, "GPU%d <-> GPU%d PCE Index: %d LCE Index: %d\n",
884 pGpu->gpuInstance, pGpu->gpuInstance, pceIndex, lceIndex);
885 }
886 FOR_EACH_INDEX_IN_MASK_END;
887
888 // Store lceMask in the exposeCeMask before moving on
889 *pLocalExposeCeMask |= lceMask;
890
891 return NV_OK;
892
893 }
894
895 NV_STATUS
kceGetMappings_GH100(OBJGPU * pGpu,KernelCE * pKCe,NVLINK_TOPOLOGY_PARAMS * pTopoParams,NvU32 * pLocalPceLceMap,NvU32 * pLocalGrceMap,NvU32 * pExposeCeMask)896 kceGetMappings_GH100
897 (
898 OBJGPU *pGpu,
899 KernelCE *pKCe,
900 NVLINK_TOPOLOGY_PARAMS *pTopoParams,
901 NvU32 *pLocalPceLceMap,
902 NvU32 *pLocalGrceMap,
903 NvU32 *pExposeCeMask
904 )
905 {
906 NV_STATUS status = NV_OK;
907 NV_STATUS statusC2C = NV_OK;
908 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu);
909 NvU32 topoIdx = NV_CE_INVALID_TOPO_IDX;
910 NvBool bEntryExists = NV_FALSE;
911 NvU32 pce2lceConfigSize1 = kceGetPce2lceConfigSize1_HAL(pKCe);
912 NvU32 grceConfigSize1 = kceGetGrceConfigSize1_HAL(pKCe);
913 NvU32 pceIdx, grceIdx;
914
915 //
916 // In the self hosted case, utilize table entries
917 // with pre defined mappings. Calling from the parent would result in
918 // using the incorrect autoconfig table so instead set the necessary
919 // values here if config is found in the table.
920 //
921 if (gpuIsSelfHosted(pGpu) && !(pGpu->getProperty(pGpu, PDB_PROP_GPU_SKIP_TABLE_CE_MAP)))
922 {
923 bEntryExists = kceGetAutoConfigTableEntry_HAL(pGpu, pKCe, pTopoParams, nvLinkCeAutoConfigTable_GH100,
924 NV_ARRAY_ELEMENTS(nvLinkCeAutoConfigTable_GH100),
925 &topoIdx, pExposeCeMask);
926 if (bEntryExists)
927 {
928 // Since entry exists, fill local variables with the associated table entry
929 for (pceIdx = 0; pceIdx < pce2lceConfigSize1; pceIdx++)
930 {
931 pLocalPceLceMap[pceIdx] = nvLinkCeAutoConfigTable_GH100[topoIdx].pceLceMap[pceIdx];
932 }
933 for (grceIdx = 0; grceIdx < grceConfigSize1; grceIdx++)
934 {
935 pLocalGrceMap[grceIdx] = nvLinkCeAutoConfigTable_GH100[topoIdx].grceConfig[grceIdx];
936 }
937
938 pTopoParams->maxTopoIdx = topoIdx;
939 pTopoParams->sysmemLinks = nvLinkCeAutoConfigTable_GH100[topoIdx].sysmemLinks;
940 pTopoParams->maxLinksPerPeer = nvLinkCeAutoConfigTable_GH100[topoIdx].maxLinksPerPeer;
941 pTopoParams->numPeers = nvLinkCeAutoConfigTable_GH100[topoIdx].numPeers;
942 pTopoParams->bSymmetric = nvLinkCeAutoConfigTable_GH100[topoIdx].bSymmetric;
943 pTopoParams->bSwitchConfig = nvLinkCeAutoConfigTable_GH100[topoIdx].bSwitchConfig;
944
945 return NV_OK;
946 }
947
948 }
949
950 // In CC case a 1-1 mapping should be applied and other mappings are not required
951 if (gpuIsCCFeatureEnabled(pGpu))
952 {
953 kceMapPceLceForCC(pGpu, pKCe, pTopoParams->pceAvailableMaskPerConnectingHub,
954 pLocalPceLceMap, pLocalGrceMap, pExposeCeMask);
955 goto returnSuccess;
956 }
957
958 //Prepare the per-HSHUB/FBHUB available PCE mask
959 kceGetAvailableHubPceMask(pGpu, pKCe, pTopoParams);
960
961 // Assign PCEs to "PEER"s if nvlink is enabled
962 if (pKernelNvlink && !knvlinkIsForcedConfig(pGpu, pKernelNvlink))
963 {
964 status = kceMapPceLceForNvlinkPeers_HAL(pGpu, pKCe,
965 pTopoParams->pceAvailableMaskPerConnectingHub,
966 pLocalPceLceMap,
967 pExposeCeMask);
968 }
969 else
970 {
971 status = NV_WARN_NOTHING_TO_DO;
972 }
973
974 // Special C2C cases for LCE 2 and 3
975 statusC2C = kceMapPceLceForC2C_HAL(pGpu, pKCe,
976 pTopoParams->pceAvailableMaskPerConnectingHub,
977 pLocalPceLceMap, pExposeCeMask);
978
979 // Assign PCEs for GRCE case
980 kceMapPceLceForGRCE_HAL(pGpu, pKCe,
981 pTopoParams->pceAvailableMaskPerConnectingHub,
982 pLocalPceLceMap, pExposeCeMask, pLocalGrceMap, pTopoParams->fbhubPceMask);
983
984 if ((status == NV_WARN_NOTHING_TO_DO && statusC2C == NV_WARN_NOTHING_TO_DO) ||
985 (status == NV_ERR_NOT_SUPPORTED && statusC2C == NV_ERR_NOT_SUPPORTED))
986 {
987 // If there's no NVLink peers available, still expose an additional async LCE
988 status = kceMapAsyncLceDefault_HAL(pGpu, pKCe,
989 pTopoParams->pceAvailableMaskPerConnectingHub,
990 pLocalPceLceMap,
991 pExposeCeMask,
992 NV_CE_NUM_PCES_NO_LINK_CASE);
993 }
994
995 NV_PRINTF(LEVEL_INFO, "status = %d, statusC2C = %d\n", status, statusC2C);
996
997 returnSuccess:
998 return NV_OK;
999 }
1000
kceGetP2PCes_GH100(KernelCE * pKCe,OBJGPU * pGpu,NvU32 gpuMask,NvU32 * nvlinkP2PCeMask)1001 NV_STATUS kceGetP2PCes_GH100(KernelCE *pKCe, OBJGPU *pGpu, NvU32 gpuMask, NvU32 *nvlinkP2PCeMask)
1002 {
1003 //
1004 // Currently Bug 4103154 requires an updated algorithm described below
1005 // in the else case to assign the proper LCE for the direct connected systems.
1006 //
1007
1008 NvU32 gpuCount = gpumgrGetSubDeviceCount(gpuMask);
1009 NvU32 minP2PLce = (NV_CE_EVEN_ASYNC_LCE_MASK | NV_CE_ODD_ASYNC_LCE_MASK) & NV_CE_MAX_LCE_MASK;
1010 NvU32 i;
1011 KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu);
1012 NvBool bSwitchConfig = NV_FALSE;
1013
1014 if (pKernelNvlink == NULL)
1015 {
1016 return NV_WARN_NOTHING_TO_DO;
1017 }
1018
1019 bSwitchConfig = knvlinkIsGpuConnectedToNvswitch(pGpu, pKernelNvlink);
1020
1021 LOWESTBITIDX_32(minP2PLce);
1022 *nvlinkP2PCeMask = 0;
1023
1024 if ((gpuCount == 1) && !bSwitchConfig)
1025 {
1026 *nvlinkP2PCeMask |= NVBIT(minP2PLce);
1027 for (i = minP2PLce; i < gpuGetNumCEs(pGpu); i++)
1028 {
1029 *nvlinkP2PCeMask |= NVBIT(i);
1030
1031 }
1032 }
1033 //
1034 // For cases where we have an nvswitch connected, we will assign
1035 // the LCE with max PCEs
1036 //
1037 else if (bSwitchConfig)
1038 {
1039 KernelCE *pKCeMaxPces = NULL;
1040 KernelCE *pTargetCe = NULL;
1041 KernelCE *pKCeLoop = NULL;
1042 NvU32 gpuInstance = 0;
1043 NvU32 maxPces = 0;
1044
1045 KCE_ITER_BEGIN(pGpu, pKCe, pKCeLoop, minP2PLce)
1046
1047 if (pKCeLoop->bStubbed)
1048 {
1049 continue;
1050 }
1051
1052 NV2080_CTRL_CE_GET_CE_PCE_MASK_PARAMS params = {0};
1053
1054 // We will use LCE with most PCEs
1055 params.ceEngineType = NV2080_ENGINE_TYPE_COPY(pKCeLoop->publicID);
1056 NV_STATUS rmStatus = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
1057 NV2080_CTRL_CMD_CE_GET_CE_PCE_MASK,
1058 (void *)¶ms, sizeof(params));
1059 NV_ASSERT_OK_OR_RETURN(rmStatus);
1060 NvU32 numPces = nvPopCount32(params.pceMask);
1061
1062 if (numPces > maxPces)
1063 {
1064 pKCeMaxPces = pKCeLoop;
1065 maxPces = numPces;
1066 }
1067 KCE_ITER_END
1068
1069 // For GPU connected to nvswitch, optimal LCE is always LCE with max PCE
1070
1071 if (pKCeMaxPces != NULL)
1072 {
1073 pTargetCe = pKCeMaxPces;
1074 }
1075
1076 if (pTargetCe != NULL)
1077 {
1078 // assign LCE to peer
1079 if (pTargetCe->nvlinkPeerMask == 0)
1080 {
1081 pTargetCe->nvlinkPeerMask = NVBIT(gpuInstance);
1082 }
1083
1084 NV_PRINTF(LEVEL_INFO,
1085 "GPU %d Assigning Peer %d to LCE %d\n",
1086 gpuGetInstance(pGpu), gpuInstance,
1087 pTargetCe->publicID);
1088
1089 *nvlinkP2PCeMask = NVBIT(pTargetCe->publicID);
1090 }
1091 }
1092 else if (gpuCount > 2)
1093 {
1094 // if gpuCount > 2, this is an invalid request. Print warning and return NV_OK
1095 NV_PRINTF(LEVEL_INFO, "GPU %d invalid request for gpuCount %d\n", gpuGetInstance(pGpu), gpuCount);
1096 return NV_ERR_INVALID_STATE;
1097 }
1098 else
1099 {
1100 OBJGPU *pRemoteGpu = NULL;
1101 KernelCE *pKCeLoop = NULL;
1102 NvU32 peerLinkMask = 0;
1103 NvU32 gpuInstance = 0;
1104 NvU32 phyLinkId, status, targetPceMask, numPces;
1105
1106 //
1107 // The LCE returned should be the LCE which has the most PCEs mapped
1108 // on the given HSHUB. This HSHUB should be determined by
1109 // tracking where the majority of links are connected.
1110 //
1111 NvU32 linksPerHshub[NV_CE_MAX_HSHUBS] = {0};
1112 NvU32 maxLinksConnectedHshub = 0;
1113 NvU32 maxConnectedHshubId = NV_CE_MAX_HSHUBS;
1114 NvU32 lceAssignedMask = 0;
1115 KernelCE *maxLcePerHshub[NV_CE_MAX_HSHUBS] = {0};
1116
1117 NV2080_CTRL_INTERNAL_HSHUB_GET_HSHUB_ID_FOR_LINKS_PARAMS params;
1118
1119 if (pKernelNvlink != NULL)
1120 {
1121 // Get the remote GPU
1122 while ((pRemoteGpu = gpumgrGetNextGpu(gpuMask, &gpuInstance)) != NULL)
1123 {
1124 if (pRemoteGpu != pGpu)
1125 break;
1126 }
1127
1128 NV_ASSERT_OR_RETURN(pRemoteGpu != NULL, NV_ERR_INVALID_STATE);
1129 gpuInstance = gpuGetInstance(pRemoteGpu);
1130
1131 peerLinkMask = knvlinkGetLinkMaskToPeer(pGpu, pKernelNvlink, pRemoteGpu);
1132 }
1133
1134 portMemSet(¶ms, 0, sizeof(params));
1135 params.linkMask = peerLinkMask;
1136
1137 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
1138 NV2080_CTRL_CMD_INTERNAL_HSHUB_GET_HSHUB_ID_FOR_LINKS,
1139 (void *)¶ms, sizeof(params));
1140 NV_ASSERT_OK_OR_RETURN(status);
1141
1142 FOR_EACH_INDEX_IN_MASK(32, phyLinkId, peerLinkMask)
1143 {
1144 NvU32 hshubId = params.hshubIds[phyLinkId];
1145 linksPerHshub[hshubId]++;
1146
1147 if (linksPerHshub[hshubId] > maxLinksConnectedHshub)
1148 {
1149 maxLinksConnectedHshub = linksPerHshub[hshubId];
1150 maxConnectedHshubId = hshubId;
1151 }
1152 }
1153 FOR_EACH_INDEX_IN_MASK_END;
1154
1155 //
1156 // Iterate through all Async LCEs to track which HSHUB should
1157 // be using which LCE. This is decided based on the majority. If
1158 // there is a tie, then LCE with the lower index is preferred.
1159 //
1160 KCE_ITER_BEGIN(pGpu, pKCe, pKCeLoop, minP2PLce)
1161 NvU32 localMaxPcePerHshub = 0;
1162 KernelCE *localMaxLcePerHshub;
1163 NvU32 localMaxHshub = NV_CE_MAX_HSHUBS;
1164
1165 // if LCE is stubbed or LCE is already assigned to another peer
1166 if (pKCeLoop->bStubbed)
1167 {
1168 continue;
1169 }
1170
1171 // LCE is already assigned to this peer
1172 if ((pKCeLoop->nvlinkPeerMask & NVBIT(gpuInstance)) != 0)
1173 {
1174 maxLcePerHshub[maxConnectedHshubId] = pKCeLoop;
1175 break;
1176 }
1177 // LCE is already assigned to another peer
1178 else if (pKCeLoop->nvlinkPeerMask != 0)
1179 {
1180 continue;
1181 }
1182
1183 NV2080_CTRL_CE_GET_CE_PCE_MASK_PARAMS params = {0};
1184
1185 params.ceEngineType = NV2080_ENGINE_TYPE_COPY(pKCeLoop->publicID);
1186 status = knvlinkExecGspRmRpc(pGpu, pKernelNvlink,
1187 NV2080_CTRL_CMD_CE_GET_CE_PCE_MASK,
1188 (void *)¶ms, sizeof(params));
1189 NV_ASSERT_OK_OR_RETURN(status);
1190
1191 //
1192 // An LCE may be utilized across several HSHUBs. Loop through all HSHUBs
1193 // in order to decide which HSHUB holds the majority of this specific LCE.
1194 // To help with this, create a mask of PCEs only on the HSHUB which the peer
1195 // is most connected to by shifting the HSHUB PCE mask
1196 //
1197
1198 for (i = NV_CE_HSHUBNVL_ID_0; i < NV_CE_MAX_HSHUBS; i++)
1199 {
1200 targetPceMask = params.pceMask & ((NVBIT(NV_CE_PCE_PER_HSHUB) - 1) << ((i - NV_CE_HSHUBNVL_ID_0) * NV_CE_PCE_PER_HSHUB));
1201 numPces = nvPopCount32(targetPceMask);
1202 if (numPces > localMaxPcePerHshub && !(lceAssignedMask & NVBIT(pKCeLoop->publicID)))
1203 {
1204 localMaxPcePerHshub = numPces;
1205 localMaxLcePerHshub = pKCeLoop;
1206 localMaxHshub = i;
1207 }
1208 }
1209
1210 if (localMaxHshub < NV_CE_MAX_HSHUBS)
1211 {
1212 maxLcePerHshub[localMaxHshub] = localMaxLcePerHshub;
1213 lceAssignedMask |= NVBIT(localMaxLcePerHshub->publicID);
1214 }
1215
1216 KCE_ITER_END
1217
1218 if (maxLcePerHshub[maxConnectedHshubId] != NULL)
1219 {
1220 NV_PRINTF(LEVEL_INFO,
1221 "GPU %d Assigning Peer %d to preferred LCE %d\n",
1222 gpuGetInstance(pGpu), gpuInstance,
1223 maxLcePerHshub[maxConnectedHshubId]->publicID);
1224 }
1225 else
1226 {
1227 //
1228 // In the event that the preferred HSHUB's primary LCE is not available,
1229 // use the first available LCE and set that index as the new preferred hshub
1230 //
1231 maxLcePerHshub[maxConnectedHshubId] = GPU_GET_KCE(pGpu, maxConnectedHshubId);
1232
1233 NV_PRINTF(LEVEL_INFO,
1234 "GPU %d Assigning Peer %d to first available LCE %d\n",
1235 gpuGetInstance(pGpu), gpuInstance,
1236 maxLcePerHshub[maxConnectedHshubId]->publicID);
1237 }
1238
1239 if (maxConnectedHshubId < NV_CE_MAX_HSHUBS)
1240 {
1241 maxLcePerHshub[maxConnectedHshubId]->nvlinkPeerMask = NVBIT(gpuInstance);
1242 *nvlinkP2PCeMask = NVBIT(maxLcePerHshub[maxConnectedHshubId]->publicID);
1243 }
1244 }
1245
1246 return NV_OK;
1247 }
1248