1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "gpu_mgr/gpu_mgr.h"
25 #include "gpu/gpu.h"
26 #include "gpu/gpu_child_class_defs.h"
27 #include "gpu_mgr/gpu_mgr_sli.h"
28 #include "gpu/bif/kernel_bif.h"
29 #include "jt.h"
30 #include "published/maxwell/gm107/dev_bus.h"
31 #include "published/maxwell/gm107/dev_nv_xve.h"
32 #include "published/maxwell/gm107/dev_nv_xve1.h"
33 #include "published/maxwell/gm107/dev_fuse.h"
34 #include "published/maxwell/gm107/dev_pri_ringstation_sys.h"
35
36 /*!
37 * @brief Returns SR-IOV capabilities
38 *
39 * @param[in] pGpu OBJGPU pointer
40 * @param[out] pParams Pointer for get_sriov_caps params
41 *
42 * @returns NV_OK always
43 */
44 NV_STATUS
gpuGetSriovCaps_GM107(OBJGPU * pGpu,NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS * pParams)45 gpuGetSriovCaps_GM107
46 (
47 OBJGPU *pGpu,
48 NV0080_CTRL_GPU_GET_SRIOV_CAPS_PARAMS *pParams
49 )
50 {
51 pParams->bSriovEnabled = NV_FALSE;
52 return NV_OK;
53 }
54
55 /*!
56 * @brief Read fuse for display supported status.
57 * Some chips not marked displayless do not support display
58 */
59 NvBool
gpuFuseSupportsDisplay_GM107(OBJGPU * pGpu)60 gpuFuseSupportsDisplay_GM107
61 (
62 OBJGPU *pGpu
63 )
64 {
65 return GPU_FLD_TEST_DRF_DEF(pGpu, _FUSE, _STATUS_OPT_DISPLAY, _DATA, _ENABLE);
66 }
67
68 /*!
69 * @brief gpuReadBusConfigRegEx_GM107
70 *
71 * param[in] pGpu The GPU object pointer
72 * param[in] index NvU32
73 * param[in] *data NvU32 *
74 * param[in] pThreadState thread state node pointer
75 */
76 NV_STATUS
gpuReadBusConfigRegEx_GM107(OBJGPU * pGpu,NvU32 index,NvU32 * data,THREAD_STATE_NODE * pThreadState)77 gpuReadBusConfigRegEx_GM107
78 (
79 OBJGPU *pGpu,
80 NvU32 index,
81 NvU32 *data,
82 THREAD_STATE_NODE *pThreadState
83 )
84 {
85
86 if (index > (PCIE_CONFIG_SPACE_SIZE - sizeof(NvU32)))
87 {
88 NV_PRINTF(LEVEL_ERROR,
89 "Offset 0x%08x exceeds range!\n",
90 index);
91 NV_ASSERT(0);
92 return NV_ERR_GENERIC;
93 }
94
95 *data = GPU_REG_RD32_EX(pGpu, DEVICE_BASE(NV_PCFG) + index, pThreadState);
96
97 return NV_OK;
98 }
99
100 /*!
101 * @brief gpuReadBusConfigReg_GM107()
102 *
103 * param[in] pGpu The GPU object pointer
104 * param[in] index NvU32
105 * param[in] *data NvU32 *
106 */
107 NV_STATUS
gpuReadBusConfigReg_GM107(OBJGPU * pGpu,NvU32 index,NvU32 * data)108 gpuReadBusConfigReg_GM107
109 (
110 OBJGPU *pGpu,
111 NvU32 index,
112 NvU32 *data
113 )
114 {
115 return gpuReadBusConfigRegEx_HAL(pGpu, index, data, NULL);
116 }
117
118 /*!
119 * @brief gpuWriteBusConfigReg_GM107
120 *
121 * param[in] pGpu The GPU object pointer
122 * param[in] index NvU32
123 * param[in] value NvU32
124 */
125 NV_STATUS
gpuWriteBusConfigReg_GM107(OBJGPU * pGpu,NvU32 index,NvU32 value)126 gpuWriteBusConfigReg_GM107
127 (
128 OBJGPU *pGpu,
129 NvU32 index,
130 NvU32 value
131 )
132 {
133
134 if (index > (PCIE_CONFIG_SPACE_SIZE - sizeof(NvU32)))
135 {
136 NV_PRINTF(LEVEL_ERROR,
137 "Offset 0x%08x exceeds range!\n",
138 index);
139 NV_ASSERT(0);
140 return NV_ERR_GENERIC;
141 }
142
143 GPU_REG_WR32(pGpu, DEVICE_BASE(NV_PCFG) + index, value);
144
145 return NV_OK;
146 }
147
148 NV_STATUS
gpuReadFunctionConfigReg_GM107(OBJGPU * pGpu,NvU32 function,NvU32 index,NvU32 * data)149 gpuReadFunctionConfigReg_GM107
150 (
151 OBJGPU *pGpu,
152 NvU32 function,
153 NvU32 index,
154 NvU32 *data
155 )
156 {
157 NvU32 retval;
158
159 if (index > (PCIE_CONFIG_SPACE_SIZE - sizeof(NvU32)))
160 {
161 NV_PRINTF(LEVEL_ERROR, "Offset 0x%08x exceeds range!\n", index);
162
163 return NV_ERR_GENERIC;
164 }
165
166 switch (function)
167 {
168 case 0:
169 {
170 retval = GPU_REG_RD32(pGpu, DEVICE_BASE(NV_PCFG) + index);
171 break;
172 }
173
174 case 1:
175 {
176 if (IS_FMODEL(pGpu))
177 {
178 //
179 // Function 1 is not modeled on fmodel
180 //
181 *data = 0;
182 return NV_OK;
183 }
184 else
185 {
186 retval = GPU_REG_RD32(pGpu, DEVICE_BASE(NV_PCFG1) + index);
187 }
188 break;
189 }
190
191 default:
192 NV_PRINTF(LEVEL_ERROR,
193 "attempt to read cfg space of non-existant function %x\n",
194 function);
195 return NV_ERR_GENERIC;
196 }
197
198 *data = retval;
199 return NV_OK;
200 }
201
202
203 NV_STATUS
gpuWriteFunctionConfigReg_GM107(OBJGPU * pGpu,NvU32 function,NvU32 index,NvU32 data)204 gpuWriteFunctionConfigReg_GM107
205 (
206 OBJGPU *pGpu,
207 NvU32 function,
208 NvU32 index,
209 NvU32 data
210 )
211 {
212 return gpuWriteFunctionConfigRegEx_HAL(pGpu, function, index, data,
213 NULL /* threadstate */);
214 }
215
216 NV_STATUS
gpuWriteFunctionConfigRegEx_GM107(OBJGPU * pGpu,NvU32 function,NvU32 index,NvU32 data,THREAD_STATE_NODE * pThreadState)217 gpuWriteFunctionConfigRegEx_GM107
218 (
219 OBJGPU *pGpu,
220 NvU32 function,
221 NvU32 index,
222 NvU32 data,
223 THREAD_STATE_NODE *pThreadState
224 )
225 {
226 if (index > (PCIE_CONFIG_SPACE_SIZE - sizeof(NvU32)))
227 {
228 NV_PRINTF(LEVEL_ERROR, "Offset 0x%08x exceeds range!\n", index);
229
230 return NV_ERR_INVALID_ARGUMENT;
231 }
232
233 switch (function)
234 {
235 case 0:
236 {
237 GPU_REG_WR32_EX(pGpu, DEVICE_BASE(NV_PCFG) + index, data, pThreadState);
238 break;
239 }
240
241 case 1:
242 {
243 //
244 // Function 1 is not modeled on fmodel
245 //
246 if (!IS_FMODEL(pGpu))
247 {
248 GPU_REG_WR32_EX(pGpu, DEVICE_BASE(NV_PCFG1) + index, data, pThreadState);
249 }
250 break;
251 }
252
253 default:
254 NV_PRINTF(LEVEL_ERROR,
255 "attempt to read cfg space of non-existant function %x\n",
256 function);
257 return NV_ERR_INVALID_ARGUMENT;
258 }
259
260 return NV_OK;
261 }
262
263 /*!
264 * @brief Perform gpu-dependent error handling for error during register read sanity check
265 *
266 * @param[in] pGpu GPU object pointer
267 * @param[in] addr Value address
268 * @param[in] value Value read during check
269 */
270 void
gpuHandleSanityCheckRegReadError_GM107(OBJGPU * pGpu,NvU32 addr,NvU32 value)271 gpuHandleSanityCheckRegReadError_GM107
272 (
273 OBJGPU *pGpu,
274 NvU32 addr,
275 NvU32 value
276 )
277 {
278 #if NV_PRINTF_ENABLED
279 //
280 // Read the interrupt status using the direct OS reg read call so we don't recurs
281 // if we happen to see GPU_READ_PRI_ERROR_CODE there as well (bug 799876)
282 //
283 NvU32 intr = osGpuReadReg032(pGpu, NV_PBUS_INTR_0);
284
285 // To be sure, filter this down further by checking the related pri interrupts:
286 if (FLD_TEST_DRF(_PBUS, _INTR_0, _PRI_SQUASH, _PENDING, intr) ||
287 FLD_TEST_DRF(_PBUS, _INTR_0, _PRI_FECSERR, _PENDING, intr) ||
288 FLD_TEST_DRF(_PBUS, _INTR_0, _PRI_TIMEOUT, _PENDING, intr))
289 {
290 #if NV_PRINTF_STRINGS_ALLOWED
291 const char *errorString = "Unknown SYS_PRI_ERROR_CODE";
292
293 gpuGetSanityCheckRegReadError_HAL(pGpu, value,
294 &errorString);
295 NV_PRINTF(LEVEL_ERROR,
296 "Possible bad register read: addr: 0x%x, regvalue: 0x%x, error code: %s\n",
297 addr, value, errorString);
298 #else // NV_PRINTF_STRINGS_ALLOWED
299 NV_PRINTF(LEVEL_ERROR,
300 "Possible bad register read: addr: 0x%x, regvalue: 0x%x\n",
301 addr, value);
302 #endif // NV_PRINTF_STRINGS_ALLOWED
303 }
304 #endif // NV_PRINTF_ENABLED
305 }
306
307 void
gpuGetIdInfo_GM107(OBJGPU * pGpu)308 gpuGetIdInfo_GM107(OBJGPU *pGpu)
309 {
310 NvU32 data;
311
312 if (NV_OK != GPU_BUS_CFG_RD32(pGpu, NV_XVE_REV_ID, &data))
313 {
314 NV_PRINTF(LEVEL_ERROR, "unable to read NV_XVE_REV_ID\n");
315 return;
316 }
317
318 // we only need the FIB and MASK values
319 pGpu->idInfo.PCIRevisionID = (data & ~GPU_DRF_SHIFTMASK(NV_XVE_REV_ID_CLASS_CODE));
320
321 if (NV_OK != GPU_BUS_CFG_RD32(pGpu, NV_XVE_SUBSYSTEM, &data))
322 {
323 NV_PRINTF(LEVEL_ERROR, "unable to read NV_XVE_SUBSYSTEM\n");
324 return;
325 }
326 pGpu->idInfo.PCISubDeviceID = data;
327
328 if (NV_OK != GPU_BUS_CFG_RD32(pGpu, NV_XVE_ID, &data))
329 {
330 NV_PRINTF(LEVEL_ERROR, "unable to read NV_XVE_ID\n");
331 return;
332 }
333
334 pGpu->idInfo.PCIDeviceID = data;
335
336 }
337
338 // GM200 used on all later GPUs
339
340 //
341 // Lists the order of GPU children for engine state transitions (StateInit, StateLoad,
342 // StateUnload and StateDestroy). This list controls only the engine order. Engine
343 // presence is defined by gpuGetChildrenPresent_HAL. Engines in this list that aren't in the
344 // gpuGetChildrenPresent_HAL list are ignored.
345 //
346 // List entries contain {CLASS-ID, flags} pairs.
347 //
348 // Valid flags are:
349 // GCO_ALL - entry is used for all list types
350 // GCO_LIST_INIT - entry is used for init ordering (DO NOT USE)
351 // GCO_LIST_LOAD - entry is used for load and postload ordering (DO NOT USE)
352 // GCO_LIST_UNLOAD - entry is used for unload and preunload ordering (DO NOT USE)
353 // GCO_LIST_DESTROY - entry is used for destroy order (DO NOT USE)
354 //
355 // For UNLOAD and DESTROY the list is played back in reverse from LOAD and INIT.
356 //
357 // IMPORTANT:
358 // <1> GCO_ALL is the recommended flag to use for all engine types. Engines should
359 // always have a consist order. If there are complicated dependencies that cannot
360 // be resolved using this list, please use callbacks (such as fifoAddSchedulingHandler)
361 // <1> DO NOT FORK THIS LIST. The goal is to have a single ordered list across all
362 // chips. Inconsistent ordering makes it challenging to modify shared code to work
363 // across all variations.
364 //
365 static const GPUCHILDORDER
366 gpuChildOrderList_GM200[] =
367 {
368 {classId(OBJVBIOS), GCO_ALL},
369 {classId(ConfidentialCompute), GCO_ALL},
370 {classId(Pxuc), GCO_ALL},
371 {classId(OBJBIF), GCO_ALL},
372 {classId(KernelBif), GCO_ALL},
373 {classId(Nne), GCO_ALL},
374 {classId(NvDebugDump), GCO_ALL},
375 {classId(ClockManager), GCO_ALL},
376 {classId(Pmgr), GCO_ALL},
377 {classId(OBJVOLT), GCO_ALL},
378 {classId(OBJMC), GCO_ALL},
379 {classId(KernelMc), GCO_ALL},
380 {classId(PrivRing), GCO_ALL},
381 {classId(SwIntr), GCO_ALL},
382 {classId(Intr), GCO_ALL},
383 {classId(OBJTMR), GCO_ALL},
384 {classId(Therm), GCO_ALL},
385 {classId(OBJHSHUBMANAGER), GCO_ALL},
386 {classId(Hshub), GCO_ALL},
387 {classId(MemorySystem), GCO_ALL},
388 {classId(KernelMemorySystem), GCO_ALL},
389 {classId(MemoryManager), GCO_ALL},
390 {classId(Nvlink), GCO_ALL},
391 {classId(KernelNvlink), GCO_ALL},
392 {classId(OBJHDACODEC), GCO_ALL},
393 {classId(OBJGMMU), GCO_ALL},
394 {classId(KernelGmmu), GCO_ALL},
395 {classId(OBJVMMU), GCO_ALL},
396 {classId(KernelSec2), GCO_ALL},
397 {classId(KernelGsp), GCO_ALL},
398 {classId(OBJBUS), GCO_ALL},
399 {classId(KernelBus), GCO_ALL},
400 {classId(OBJLSFM), GCO_LIST_LOAD | GCO_LIST_UNLOAD | GCO_LIST_DESTROY}, // LOAD LSFM must be before ACR and any managed Falcon.
401 {classId(OBJACR), GCO_LIST_LOAD | GCO_LIST_UNLOAD | GCO_LIST_DESTROY},
402 {classId(Pmu), GCO_LIST_LOAD | GCO_LIST_UNLOAD | GCO_LIST_DESTROY},
403 {classId(KernelPmu), GCO_LIST_LOAD | GCO_LIST_UNLOAD | GCO_LIST_DESTROY},
404 {classId(Gsp), GCO_ALL},
405 {classId(OBJFSP), GCO_ALL},
406 {classId(KernelFsp), GCO_ALL},
407 {classId(OBJFBFLCN), GCO_ALL},
408 {classId(Lpwr ), GCO_LIST_LOAD | GCO_LIST_UNLOAD | GCO_LIST_DESTROY},
409 {classId(Perf), GCO_LIST_LOAD | GCO_LIST_UNLOAD | GCO_LIST_DESTROY}, // LOAD Perf is after PMU for perfmon_sampling to work
410 {classId(KernelPerf), GCO_LIST_LOAD | GCO_LIST_UNLOAD | GCO_LIST_DESTROY},
411 {classId(OBJDISP), GCO_LIST_DESTROY},
412 {classId(KernelDisplay), GCO_LIST_DESTROY},
413 {classId(OBJHDA), GCO_LIST_DESTROY},
414 {classId(Fan), GCO_LIST_DESTROY},
415 {classId(VirtMemAllocator), GCO_ALL},
416 {classId(OBJDISP), GCO_LIST_INIT},
417 {classId(KernelDisplay), GCO_LIST_INIT},
418 {classId(OBJHDA), GCO_LIST_INIT},
419 {classId(Fan), GCO_LIST_INIT},
420 {classId(GraphicsManager), GCO_ALL},
421 {classId(MIGManager), GCO_ALL},
422 {classId(KernelMIGManager), GCO_ALL},
423 {classId(KernelGraphicsManager), GCO_ALL},
424 {classId(Graphics), GCO_ALL}, // INIT GR has to be initialized before LSFM because
425 // the ucode image pointers needed by LSFM are only
426 // known after GR has loaded the netlist.
427
428 {classId(KernelGraphics), GCO_ALL},
429 {classId(OBJLSFM), GCO_LIST_INIT},
430 {classId(OBJACR), GCO_LIST_INIT},
431 {classId(Pmu), GCO_LIST_INIT},
432 {classId(KernelPmu), GCO_LIST_INIT},
433 {classId(Lpwr ), GCO_LIST_INIT},
434 {classId(Perf), GCO_LIST_INIT},
435 {classId(KernelPerf), GCO_LIST_INIT},
436 {classId(OBJBSP), GCO_ALL},
437 {classId(OBJCIPHER), GCO_ALL},
438 {classId(OBJDISP), GCO_LIST_LOAD | GCO_LIST_UNLOAD}, // LOAD Display is *after* cipher so that hdcp keys can be loaded .
439 {classId(KernelDisplay), GCO_LIST_LOAD | GCO_LIST_UNLOAD}, // LOAD Display is *after* cipher so that hdcp keys can be loaded .
440 {classId(OBJHDA), GCO_LIST_LOAD | GCO_LIST_UNLOAD},
441 {classId(Fan), GCO_LIST_LOAD | GCO_LIST_UNLOAD},
442 {classId(OBJCE), GCO_ALL},
443 {classId(KernelCE), GCO_ALL},
444 {classId(OBJMSENC), GCO_ALL},
445 {classId(OBJNVJPG), GCO_ALL},
446 {classId(OBJOFA), GCO_ALL},
447 {classId(OBJSEC2), GCO_ALL},
448 {classId(KernelFifo), GCO_ALL},
449 {classId(OBJFIFO), GCO_ALL},
450 {classId(OBJDPAUX), GCO_ALL},
451 {classId(OBJINFOROM), GCO_ALL},
452 {classId(OBJUVM), GCO_ALL},
453 {classId(OBJGPULOG), GCO_LIST_INIT | GCO_LIST_LOAD},
454 {classId(OBJGPUMON), GCO_ALL},
455 {classId(OBJGPULOG), GCO_LIST_UNLOAD | GCO_LIST_DESTROY},
456 {classId(KernelHwpm), GCO_ALL},
457 {classId(OBJHWPM), GCO_ALL},
458 {classId(OBJSWENG), GCO_ALL},
459 {classId(OBJGRIDDISPLAYLESS), GCO_ALL},
460 {classId(KernelCcu), GCO_ALL},
461 };
462
463 const GPUCHILDORDER *
gpuGetChildrenOrder_GM200(OBJGPU * pGpu,NvU32 * pNumEntries)464 gpuGetChildrenOrder_GM200(OBJGPU *pGpu, NvU32 *pNumEntries)
465 {
466 *pNumEntries = NV_ARRAY_ELEMENTS(gpuChildOrderList_GM200);
467 return gpuChildOrderList_GM200;
468 }
469
470 //
471 // List of GPU children that present for the chip. List entries contain
472 // {CLASS-ID, # of instances} pairs, e.g.: {CE, 2} is 2 instance of OBJCE. This
473 // list controls only engine presence. Order is defined by
474 // gpuGetChildrenOrder_HAL.
475 //
476 // IMPORTANT: This function is to be deleted. Engine removal should instead be
477 // handled by <eng>ConstructEngine returning NV_ERR_NOT_SUPPORTED. PLEASE DO NOT
478 // FORK THIS LIST!
479 //
480 // List entries contain {CLASS-ID, # of instances} pairs.
481 //
482 static const GPUCHILDPRESENT gpuChildrenPresent_GM200[] =
483 {
484 GPU_CHILD_PRESENT(OBJTMR, 1),
485 GPU_CHILD_PRESENT(KernelMIGManager, 1),
486 GPU_CHILD_PRESENT(KernelGraphicsManager, 1),
487 GPU_CHILD_PRESENT(KernelRc, 1),
488 GPU_CHILD_PRESENT(Intr, 1),
489 GPU_CHILD_PRESENT(NvDebugDump, 1),
490 GPU_CHILD_PRESENT(OBJGPUMON, 1),
491 GPU_CHILD_PRESENT(OBJSWENG, 1),
492 GPU_CHILD_PRESENT(KernelBif, 1),
493 GPU_CHILD_PRESENT(KernelBus, 1),
494 GPU_CHILD_PRESENT(KernelCE, 3),
495 GPU_CHILD_PRESENT(KernelDisplay, 1),
496 GPU_CHILD_PRESENT(VirtMemAllocator, 1),
497 GPU_CHILD_PRESENT(KernelMemorySystem, 1),
498 GPU_CHILD_PRESENT(MemoryManager, 1),
499 GPU_CHILD_PRESENT(KernelFifo, 1),
500 GPU_CHILD_PRESENT(KernelGmmu, 1),
501 GPU_CHILD_PRESENT(KernelGraphics, 1),
502 GPU_CHILD_PRESENT(KernelHwpm, 1),
503 GPU_CHILD_PRESENT(KernelMc, 1),
504 GPU_CHILD_PRESENT(SwIntr, 1),
505 GPU_CHILD_PRESENT(KernelPerf, 1),
506 GPU_CHILD_PRESENT(KernelPmu, 1),
507 };
508
509 const GPUCHILDPRESENT *
gpuGetChildrenPresent_GM200(OBJGPU * pGpu,NvU32 * pNumEntries)510 gpuGetChildrenPresent_GM200(OBJGPU *pGpu, NvU32 *pNumEntries)
511 {
512 *pNumEntries = NV_ARRAY_ELEMENTS(gpuChildrenPresent_GM200);
513 return gpuChildrenPresent_GM200;
514 }
515
516 /*!
517 * @brief checks for each type of bridge to deterimne what is available,
518 * then selects the SLI bridge to use.
519 *
520 * @param[In] gpuCount The number of GPUs to be checked for SLI links.
521 * @param[In] gpuMaskArg A mask of the GPUs that are to be tested for SLI links.
522 * @param[Out] pSliLinkOutputMask a mask of the GPUs that are attached to the type of
523 * SLI link that is being used.
524 * @param[Out] pSliLinkCircular a boolean indicating if teh SLI link is circular.
525 * @param[Out] pSliLinkEndsMask a mask indicating the endpoints of the SLI link,
526 * if there are any.
527 .*/
528 void
gpuDetectSliLinkFromGpus_GK104(OBJGPU * pGpu,NvU32 gpuCount,NvU32 gpuMaskArg,NvU32 * pSliLinkOutputMask,NvBool * pSliLinkCircular,NvU32 * pSliLinkEndsMask,NvU32 * pVidLinkCount)529 gpuDetectSliLinkFromGpus_GK104
530 (
531 OBJGPU *pGpu,
532 NvU32 gpuCount,
533 NvU32 gpuMaskArg,
534 NvU32 *pSliLinkOutputMask,
535 NvBool *pSliLinkCircular,
536 NvU32 *pSliLinkEndsMask,
537 NvU32 *pVidLinkCount
538 )
539 {
540 NvU32 i;
541 NvU32 sliLinkOutputMask[SLI_MAX_BRIDGE_TYPES] = {0, 0};
542 NvBool bSliLinkCircular[SLI_MAX_BRIDGE_TYPES] = {NV_FALSE, NV_FALSE};
543 NvU32 sliLinkEndsMask[SLI_MAX_BRIDGE_TYPES] = {0, 0};
544 NvU32 vidLinkCount[SLI_MAX_BRIDGE_TYPES] = {0, 0};
545 OBJSYS *pSys = SYS_GET_INSTANCE();
546 OBJGPUMGR *pGpuMgr = SYS_GET_GPUMGR(pSys);
547 OBJGPU *pGpuLoop;
548 OBJGPU *pGpuSaved;
549 NvU32 gpuMask;
550 // Array to store the link detection HAL flag of GpuDetectVidLinkFromGpus_HAL and GpuDetectNvlinkLinkFromGpus_HAL.
551 NvU32 linkHalImpl[SLI_MAX_BRIDGE_TYPES];
552 NvBool bFoundBridge = NV_FALSE;
553
554 // set the return values assuming we will not find an SLI link.
555 *pSliLinkOutputMask = 0;
556 *pSliLinkCircular = NV_FALSE;
557 *pSliLinkEndsMask = 0;
558
559 pGpuMgr->gpuBridgeType = SLI_BT_VIDLINK;
560
561 //
562 // Link detection HAL should have same HAL implementation as HAL flag.
563 // This checks for mismatched HAL implementation flag.
564 //
565 NV_ASSERT_OR_RETURN_VOID(gpuGetSliLinkDetectionHalFlag_HAL(pGpu) == GPU_LINK_DETECTION_HAL_GK104);
566
567 i = 0;
568 gpuMask = gpuMaskArg;
569 pGpuLoop = gpumgrGetNextGpu(gpuMask, &i);
570 if (pGpuLoop != NULL)
571 {
572 pGpuSaved = pGpuLoop;
573 linkHalImpl[SLI_BT_NVLINK] = gpuGetNvlinkLinkDetectionHalFlag_HAL(pGpuLoop);
574
575 }
576 else
577 {
578 return;
579 }
580
581 // run thru the GPUs and see if they are all using the same HAL functions.
582 // if they are different, we can't use the function to check for a bridge
583 pGpuLoop = gpumgrGetNextGpu(gpuMask, &i);
584 while (NULL != pGpuLoop)
585 {
586 if (linkHalImpl[SLI_BT_NVLINK] != gpuGetNvlinkLinkDetectionHalFlag_HAL(pGpuLoop))
587 {
588 linkHalImpl[SLI_BT_NVLINK] = GPU_LINK_DETECTION_HAL_STUB;
589 }
590 pGpuLoop = gpumgrGetNextGpu(gpuMask, &i);
591 }
592
593 if (linkHalImpl[SLI_BT_NVLINK] != GPU_LINK_DETECTION_HAL_STUB)
594 {
595 gpuDetectNvlinkLinkFromGpus_HAL(pGpuSaved, gpuCount, gpuMaskArg,
596 &sliLinkOutputMask[SLI_BT_NVLINK],
597 &bSliLinkCircular[SLI_BT_NVLINK],
598 &sliLinkEndsMask[SLI_BT_NVLINK],
599 &vidLinkCount[SLI_BT_NVLINK]);
600 }
601
602 //
603 // Determine which type of bridge we are going to support.
604 // Currently we only support a single type of SLI bridge in the system.
605 //
606 for (i = 0; i < SLI_MAX_BRIDGE_TYPES; ++i)
607 {
608 if (sliLinkOutputMask[i] != 0)
609 {
610 if (bFoundBridge)
611 {
612 NV_PRINTF(LEVEL_ERROR, "More than one type of SLI bridge detected!\n");
613 NV_ASSERT(0);
614 break;
615 }
616 else
617 {
618 pGpuMgr->gpuBridgeType = (NvU8)i;
619 *pSliLinkOutputMask = sliLinkOutputMask[i];
620 *pSliLinkCircular = bSliLinkCircular[i];
621 *pSliLinkEndsMask = sliLinkEndsMask[i];
622 *pVidLinkCount = vidLinkCount[i];
623 bFoundBridge = NV_TRUE;
624 }
625 }
626 }
627 }
628
629 /*!
630 * @brief Get error that arises during sanity check on a register read value
631 *
632 * @param[in] pGpu GPU object pointer
633 * @param[in] value Register value
634 * @param[out] pErrorString Error string pointer
635 *
636 * @return void
637 */
638 void
gpuGetSanityCheckRegReadError_GM107(OBJGPU * pGpu,NvU32 value,const char ** pErrorString)639 gpuGetSanityCheckRegReadError_GM107
640 (
641 OBJGPU *pGpu,
642 NvU32 value,
643 const char **pErrorString
644 )
645 {
646 #define PRI_ERROR(err) \
647 if (DRF_VAL(_PPRIV, _SYS_PRI_ERROR, _CODE, value) == err) \
648 { \
649 *pErrorString = #err; \
650 }
651 PRI_ERROR(NV_PPRIV_SYS_PRI_ERROR_CODE_HOST_FECS_ERR);
652 PRI_ERROR(NV_PPRIV_SYS_PRI_ERROR_CODE_HOST_PRI_TIMEOUT);
653 PRI_ERROR(NV_PPRIV_SYS_PRI_ERROR_CODE_HOST_FB_ACK_TIMEOUT);
654 PRI_ERROR(NV_PPRIV_SYS_PRI_ERROR_CODE_FECS_PRI_TIMEOUT);
655 PRI_ERROR(NV_PPRIV_SYS_PRI_ERROR_CODE_FECS_PRI_DECODE);
656 PRI_ERROR(NV_PPRIV_SYS_PRI_ERROR_CODE_FECS_PRI_RESET);
657 PRI_ERROR(NV_PPRIV_SYS_PRI_ERROR_CODE_FECS_PRI_FLOORSWEEP);
658 PRI_ERROR(NV_PPRIV_SYS_PRI_ERROR_CODE_FECS_PRI_STUCK_ACK);
659 PRI_ERROR(NV_PPRIV_SYS_PRI_ERROR_CODE_FECS_PRI_0_EXPECTED_ACK);
660 PRI_ERROR(NV_PPRIV_SYS_PRI_ERROR_CODE_FECS_PRI_FENCE_ERROR);
661 PRI_ERROR(NV_PPRIV_SYS_PRI_ERROR_CODE_FECS_PRI_SUBID_ERROR);
662 PRI_ERROR(NV_PPRIV_SYS_PRI_ERROR_CODE_FECS_PRI_ORPHAN);
663 PRI_ERROR(NV_PPRIV_SYS_PRI_ERROR_CODE_FECS_DEAD_RING);
664 PRI_ERROR(NV_PPRIV_SYS_PRI_ERROR_CODE_FECS_TRAP);
665 PRI_ERROR(NV_PPRIV_SYS_PRI_ERROR_CODE_FECS_PRI_CLIENT_ERR);
666 }
667