1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2005-2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24
25
26 #include "nvidia-3d.h"
27 #include "nvidia-3d-surface.h"
28 #include "nvidia-3d-types-priv.h"
29
30 #include "nvidia-3d-fermi.h"
31 #include "nvidia-3d-kepler.h"
32 #include "nvidia-3d-maxwell.h"
33 #include "nvidia-3d-pascal.h"
34 #include "nvidia-3d-volta.h"
35 #include "nvidia-3d-turing.h"
36 #include "nvidia-3d-hopper.h"
37
38 #include "nvidia-push-init.h" // nvPushGetSupportedClassIndex()
39 #include "nvidia-push-utils.h" // nvPushIsAmodel()
40
41 #include <class/clcb97.h> // HOPPER_A
42 #include <class/clc997.h> // ADA_A
43 #include <class/clc797.h> // AMPERE_B
44 #include <class/clc697.h> // AMPERE_A
45 #include <class/clc597.h> // TURING_A
46 #include <class/clc397.h> // VOLTA_A
47 #include <class/clc197.h> // PASCAL_B
48 #include <class/clc097.h> // PASCAL_A
49 #include <class/clb197.h> // MAXWELL_B
50 #include <class/clb097.h> // MAXWELL_A
51
52 #include <ctrl/ctrl2080/ctrl2080gr.h>
53 #include <nvos.h>
54
55 #include "g_maxwell_shader_info.h"
56 #include "g_pascal_shader_info.h"
57 #include "g_volta_shader_info.h"
58 #include "g_turing_shader_info.h"
59 #include "g_ampere_shader_info.h"
60 #include "g_hopper_shader_info.h"
61
62 #define _NV3D_CHANNEL_PROGRAMS_ENTRY(_archLower, _archCamel, _archUpper) \
63 [NV3D_SHADER_ARCH_ ## _archUpper ] = { \
64 .num = NUM_PROGRAMS, \
65 .info = _archCamel ## ProgramInfo, \
66 .maxLocalBytes = _archCamel ## ShaderMaxLocalBytes, \
67 .maxStackBytes = _archCamel ## ShaderMaxStackBytes, \
68 .code.decompressedSize = _archCamel ## ProgramHeapSize, \
69 .code.compressedStart = \
70 ({ extern const unsigned char \
71 _binary_ ## _archLower ## _shaders_xz_start[]; \
72 _binary_ ## _archLower ## _shaders_xz_start; }), \
73 .code.compressedEnd = \
74 ({ extern const unsigned char \
75 _binary_ ## _archLower ## _shaders_xz_end[]; \
76 _binary_ ## _archLower ## _shaders_xz_end; }), \
77 .constants.info = _archCamel ## ConstBufInfo, \
78 .constants.count = \
79 (NvU32)ARRAY_LEN(_archCamel ## ConstBufInfo), \
80 .constants.size = _archCamel ## ConstBufSize, \
81 .constants.sizeAlign = _archCamel ## ConstBufSizeAlign, \
82 }
83
PickProgramsRec(const Nv3dDeviceRec * p3dDevice)84 static Nv3dChannelProgramsRec PickProgramsRec(
85 const Nv3dDeviceRec *p3dDevice)
86 {
87 const Nv3dChannelProgramsRec programsTable[NV3D_SHADER_ARCH_COUNT] = {
88
89 _NV3D_CHANNEL_PROGRAMS_ENTRY(maxwell, Maxwell, MAXWELL),
90 _NV3D_CHANNEL_PROGRAMS_ENTRY(pascal, Pascal, PASCAL),
91 _NV3D_CHANNEL_PROGRAMS_ENTRY(volta, Volta, VOLTA),
92 _NV3D_CHANNEL_PROGRAMS_ENTRY(turing, Turing, TURING),
93 _NV3D_CHANNEL_PROGRAMS_ENTRY(ampere, Ampere, AMPERE),
94 _NV3D_CHANNEL_PROGRAMS_ENTRY(hopper, Hopper, HOPPER),
95 };
96
97 return programsTable[p3dDevice->shaderArch];
98 }
99
100 #undef _NV3D_CHANNEL_PROGRAMS_ENTRY
101
102
QueryThreadsAndWarpsOneSd(Nv3dDevicePtr p3dDevice,NvU32 sd,NvU32 * pMaxWarps,NvU32 * pThreadsPerWarp)103 static NvBool QueryThreadsAndWarpsOneSd(
104 Nv3dDevicePtr p3dDevice,
105 NvU32 sd,
106 NvU32 *pMaxWarps,
107 NvU32 *pThreadsPerWarp)
108 {
109 NvPushDevicePtr pPushDevice = p3dDevice->pPushDevice;
110 const NvPushImports *pImports = pPushDevice->pImports;
111 NvU32 ret;
112
113 NV2080_CTRL_GR_GET_INFO_PARAMS grInfoParams = { 0 };
114 struct {
115 NV2080_CTRL_GR_INFO numSMs;
116 NV2080_CTRL_GR_INFO maxWarpsPerSM;
117 NV2080_CTRL_GR_INFO threadsPerWarp;
118 } grInfo;
119
120 NVMISC_MEMSET(&grInfo, 0, sizeof(grInfo));
121
122 grInfo.numSMs.index =
123 NV2080_CTRL_GR_INFO_INDEX_THREAD_STACK_SCALING_FACTOR;
124 grInfo.maxWarpsPerSM.index =
125 NV2080_CTRL_GR_INFO_INDEX_MAX_WARPS_PER_SM;
126 grInfo.threadsPerWarp.index =
127 NV2080_CTRL_GR_INFO_INDEX_MAX_THREADS_PER_WARP;
128
129 grInfoParams.grInfoListSize =
130 sizeof(grInfo) / sizeof(NV2080_CTRL_GR_INFO);
131
132 grInfoParams.grInfoList = NV_PTR_TO_NvP64(&grInfo);
133
134 ret = pImports->rmApiControl(pPushDevice,
135 pPushDevice->subDevice[sd].handle,
136 NV2080_CTRL_CMD_GR_GET_INFO,
137 &grInfoParams,
138 sizeof(grInfoParams));
139
140 if (ret != NVOS_STATUS_SUCCESS) {
141 return FALSE;
142 }
143
144 *pMaxWarps = grInfo.numSMs.data * grInfo.maxWarpsPerSM.data;
145 *pThreadsPerWarp = grInfo.threadsPerWarp.data;
146
147 return TRUE;
148 }
149
GetMaxThreadsAndWarps(Nv3dDevicePtr p3dDevice)150 static NvBool GetMaxThreadsAndWarps(Nv3dDevicePtr p3dDevice)
151 {
152 NvU32 sd;
153
154 p3dDevice->maxThreadsPerWarp = 0;
155 p3dDevice->maxWarps = 0;
156
157 for (sd = 0; sd < p3dDevice->pPushDevice->numSubDevices; sd++) {
158
159 NvU32 maxWarps, threadsPerWarp;
160
161 if (!QueryThreadsAndWarpsOneSd(p3dDevice, sd,
162 &maxWarps, &threadsPerWarp)) {
163 return FALSE;
164 }
165
166 p3dDevice->maxThreadsPerWarp =
167 NV_MAX(p3dDevice->maxThreadsPerWarp, threadsPerWarp);
168
169 p3dDevice->maxWarps = NV_MAX(p3dDevice->maxWarps, maxWarps);
170 }
171
172 return TRUE;
173 }
174
175 /*!
176 * Get the SM version reported by resman.
177 *
178 * \params pPushDevice The nvidia-push device corresponding to the GPU.
179 *
180 * \return The SM version of this device.
181 */
GetSmVersion(NvPushDevicePtr pPushDevice)182 static NvU32 GetSmVersion(
183 NvPushDevicePtr pPushDevice)
184 {
185 NvU32 sd, smVersion = NV2080_CTRL_GR_INFO_SM_VERSION_NONE;
186
187 if (nvPushIsAModel(pPushDevice)) {
188 /*
189 * On amodel resman cannot tell us the SM version, so we pick
190 * the SM version based on NVAModelConfig.
191 */
192 static const NvU32 table[] = {
193 [NV_AMODEL_MAXWELL] = NV2080_CTRL_GR_INFO_SM_VERSION_5_0,
194 [NV_AMODEL_PASCAL] = NV2080_CTRL_GR_INFO_SM_VERSION_6_0,
195 [NV_AMODEL_VOLTA] = NV2080_CTRL_GR_INFO_SM_VERSION_7_0,
196 [NV_AMODEL_TURING] = NV2080_CTRL_GR_INFO_SM_VERSION_7_5,
197 [NV_AMODEL_AMPERE] = NV2080_CTRL_GR_INFO_SM_VERSION_8_2,
198 [NV_AMODEL_ADA] = NV2080_CTRL_GR_INFO_SM_VERSION_8_9,
199 [NV_AMODEL_HOPPER] = NV2080_CTRL_GR_INFO_SM_VERSION_9_0,
200 };
201
202 if (pPushDevice->amodelConfig >= ARRAY_LEN(table)) {
203 return NV2080_CTRL_GR_INFO_SM_VERSION_NONE;
204 }
205
206 return table[pPushDevice->amodelConfig];
207 }
208
209 /*
210 * Query the SM version from resman. This query is per-subDevice,
211 * but we use SM version per-device, so assert that the SM version
212 * matches across subDevices.
213 */
214 for (sd = 0; sd < pPushDevice->numSubDevices; sd++) {
215
216 const NvPushImports *pImports = pPushDevice->pImports;
217 NV2080_CTRL_GR_GET_INFO_PARAMS params = { };
218 NV2080_CTRL_GR_INFO smVersionParams = { };
219 NvU32 ret;
220
221 smVersionParams.index = NV2080_CTRL_GR_INFO_INDEX_SM_VERSION;
222 params.grInfoListSize = 1;
223 params.grInfoList = NV_PTR_TO_NvP64(&smVersionParams);
224
225 ret = pImports->rmApiControl(pPushDevice,
226 pPushDevice->subDevice[sd].handle,
227 NV2080_CTRL_CMD_GR_GET_INFO,
228 ¶ms,
229 sizeof(params));
230
231 if (ret != NVOS_STATUS_SUCCESS) {
232 return NV2080_CTRL_GR_INFO_SM_VERSION_NONE;
233 }
234
235 if (sd == 0) {
236 smVersion = smVersionParams.data;
237 } else {
238 nvAssert(smVersion == smVersionParams.data);
239 }
240 }
241
242 return smVersion;
243 }
244
245 /*!
246 * Get the SPA version to use with the 3D Class.
247 *
248 * Note that resman only reports the SM version (the "hardware
249 * revision"), not the SPA version (the ISA version). So we use a
250 * table to map from SM version to SPA version.
251 *
252 * \params pPushDevice The nvidia-push device corresponding to the GPU.
253 * \params pSpaVersion The spaVersion to assign.
254 *
255 * \return TRUE if the SPA version could be assigned.
256 */
GetSpaVersion(NvPushDevicePtr pPushDevice,Nv3dDeviceSpaVersionRec * pSpaVersion)257 static NvBool GetSpaVersion(
258 NvPushDevicePtr pPushDevice,
259 Nv3dDeviceSpaVersionRec *pSpaVersion)
260 {
261 static const struct {
262 NvU32 smVersion;
263 Nv3dDeviceSpaVersionRec spaVersion;
264 } table[] = {
265 /* Maxwell */
266 { NV2080_CTRL_GR_INFO_SM_VERSION_5_0, { 5,0 } },
267 { NV2080_CTRL_GR_INFO_SM_VERSION_5_2, { 5,2 } },
268 { NV2080_CTRL_GR_INFO_SM_VERSION_5_3, { 5,3 } },
269
270 /* Pascal */
271 { NV2080_CTRL_GR_INFO_SM_VERSION_6_0, { 5,5 } },
272 { NV2080_CTRL_GR_INFO_SM_VERSION_6_1, { 5,5 } },
273 { NV2080_CTRL_GR_INFO_SM_VERSION_6_2, { 5,6 } },
274
275 /* Volta */
276 { NV2080_CTRL_GR_INFO_SM_VERSION_7_0, { 7,0 } },
277 { NV2080_CTRL_GR_INFO_SM_VERSION_7_2, { 7,2 } },
278
279 /* Turing */
280 { NV2080_CTRL_GR_INFO_SM_VERSION_7_3, { 7,3 } },
281 { NV2080_CTRL_GR_INFO_SM_VERSION_7_5, { 7,5 } },
282
283 /* Ampere */
284 { NV2080_CTRL_GR_INFO_SM_VERSION_8_2, { 8,2 } },
285 { NV2080_CTRL_GR_INFO_SM_VERSION_8_6, { 8,6 } },
286 { NV2080_CTRL_GR_INFO_SM_VERSION_8_7, { 8,6 } },
287 { NV2080_CTRL_GR_INFO_SM_VERSION_8_8, { 8,6 } },
288
289 /* Ada */
290 { NV2080_CTRL_GR_INFO_SM_VERSION_8_9, { 8,9 } },
291
292 /* Hopper */
293 { NV2080_CTRL_GR_INFO_SM_VERSION_9_0, { 9,0 } },
294
295 };
296
297 const NvU32 smVersion = GetSmVersion(pPushDevice);
298 NvU32 i;
299
300 for (i = 0; i < ARRAY_LEN(table); i++) {
301 if (table[i].smVersion == smVersion) {
302 *pSpaVersion = table[i].spaVersion;
303 return TRUE;
304 }
305 }
306
307 return FALSE;
308 }
309
310 static const Nv3dHal _nv3dHalMaxwell = {
311 _nv3dSetSpaVersionKepler, /* setSpaVersion */
312 _nv3dInitChannelMaxwell, /* initChannel */
313 _nv3dUploadDataInlineKepler, /* uploadDataInline */
314 _nv3dSetProgramOffsetFermi, /* setProgramOffset */
315 _nv3dAssignNv3dTextureMaxwell, /* assignNv3dTexture */
316 _nv3dSetVertexStreamEndFermi, /* setVertexStreamEnd */
317 };
318
319 static const Nv3dHal _nv3dHalPascal = {
320 _nv3dSetSpaVersionKepler, /* setSpaVersion */
321 _nv3dInitChannelPascal, /* initChannel */
322 _nv3dUploadDataInlineKepler, /* uploadDataInline */
323 _nv3dSetProgramOffsetFermi, /* setProgramOffset */
324 _nv3dAssignNv3dTexturePascal, /* assignNv3dTexture */
325 _nv3dSetVertexStreamEndFermi, /* setVertexStreamEnd */
326 };
327
328 static const Nv3dHal _nv3dHalVolta = {
329 _nv3dSetSpaVersionKepler, /* setSpaVersion */
330 _nv3dInitChannelPascal, /* initChannel */
331 _nv3dUploadDataInlineKepler, /* uploadDataInline */
332 _nv3dSetProgramOffsetVolta, /* setProgramOffset */
333 _nv3dAssignNv3dTexturePascal, /* assignNv3dTexture */
334 _nv3dSetVertexStreamEndFermi, /* setVertexStreamEnd */
335 };
336
337 static const Nv3dHal _nv3dHalTuring = {
338 _nv3dSetSpaVersionKepler, /* setSpaVersion */
339 _nv3dInitChannelTuring, /* initChannel */
340 _nv3dUploadDataInlineKepler, /* uploadDataInline */
341 _nv3dSetProgramOffsetVolta, /* setProgramOffset */
342 _nv3dAssignNv3dTexturePascal, /* assignNv3dTexture */
343 _nv3dSetVertexStreamEndTuring, /* setVertexStreamEnd */
344 };
345
346 static const Nv3dHal _nv3dHalAmpere = {
347 _nv3dSetSpaVersionKepler, /* setSpaVersion */
348 _nv3dInitChannelTuring, /* initChannel */
349 _nv3dUploadDataInlineKepler, /* uploadDataInline */
350 _nv3dSetProgramOffsetVolta, /* setProgramOffset */
351 _nv3dAssignNv3dTexturePascal, /* assignNv3dTexture */
352 _nv3dSetVertexStreamEndTuring, /* setVertexStreamEnd */
353 };
354
355 static const Nv3dHal _nv3dHalHopper = {
356 _nv3dSetSpaVersionKepler, /* setSpaVersion */
357 _nv3dInitChannelHopper, /* initChannel */
358 _nv3dUploadDataInlineKepler, /* uploadDataInline */
359 _nv3dSetProgramOffsetVolta, /* setProgramOffset */
360 _nv3dAssignNv3dTextureHopper, /* assignNv3dTexture */
361 _nv3dSetVertexStreamEndTuring, /* setVertexStreamEnd */
362 };
363
nv3dAllocDevice(const Nv3dAllocDeviceParams * pParams,Nv3dDevicePtr p3dDevice)364 NvBool nv3dAllocDevice(
365 const Nv3dAllocDeviceParams *pParams,
366 Nv3dDevicePtr p3dDevice)
367 {
368 static const struct {
369 NvPushSupportedClass base;
370 const Nv3dDeviceCapsRec caps;
371 const Nv3dHal *hal;
372 enum Nv3dShaderArch shaderArch;
373 } table[] = {
374
375 #define ENTRY(_classNumber, \
376 _arch, \
377 _amodelArch, \
378 _hasSetBindlessTexture, \
379 _hasProgramRegion, \
380 _maxDim, \
381 _hal) \
382 { \
383 .base.classNumber = _classNumber, \
384 .base.amodelConfig = NV_AMODEL_ ## _amodelArch, \
385 .caps.hasSetBindlessTexture = _hasSetBindlessTexture, \
386 .caps.hasProgramRegion = _hasProgramRegion, \
387 .caps.maxDim = _maxDim, \
388 .hal = &_nv3dHal ## _hal, \
389 .shaderArch = NV3D_SHADER_ARCH_ ## _arch,\
390 }
391
392 /*
393 * hal--------------------------------------------------+
394 * maxDim----------------------------------------+ |
395 * hasProgramRegion---------------------------+ | |
396 * hasSetBindlessTexture-------------------+ | | |
397 * amodel arch----------------+ | | | |
398 * shader arch---+ | | | | |
399 * classNumber | | | | | |
400 * | | | | | | |
401 */
402 ENTRY(HOPPER_A, HOPPER, HOPPER, 0, 0, 32768, Hopper),
403 ENTRY(ADA_A, AMPERE, ADA, 0, 0, 32768, Ampere),
404 ENTRY(AMPERE_B, AMPERE, AMPERE, 0, 0, 32768, Ampere),
405 ENTRY(AMPERE_A, AMPERE, AMPERE, 0, 0, 32768, Ampere),
406 ENTRY(TURING_A, TURING, TURING, 0, 0, 32768, Turing),
407 ENTRY(VOLTA_A, VOLTA, VOLTA, 0, 0, 32768, Volta),
408 ENTRY(PASCAL_B, PASCAL, PASCAL, 1, 1, 32768, Pascal),
409 ENTRY(PASCAL_A, PASCAL, PASCAL, 1, 1, 32768, Pascal),
410 ENTRY(MAXWELL_B, MAXWELL, MAXWELL, 1, 1, 16384, Maxwell),
411 ENTRY(MAXWELL_A, MAXWELL, MAXWELL, 1, 1, 16384, Maxwell),
412 };
413
414 int i;
415
416 NVMISC_MEMSET(p3dDevice, 0, sizeof(*p3dDevice));
417
418 /* find the first supported 3D HAL */
419
420 i = nvPushGetSupportedClassIndex(pParams->pPushDevice,
421 table,
422 sizeof(table[0]),
423 ARRAY_LEN(table));
424 if (i == -1) {
425 goto fail;
426 }
427
428 if (!GetSpaVersion(pParams->pPushDevice, &p3dDevice->spaVersion)) {
429 goto fail;
430 }
431
432 p3dDevice->pPushDevice = pParams->pPushDevice;
433 p3dDevice->caps = table[i].caps;
434 p3dDevice->classNumber = table[i].base.classNumber;
435 p3dDevice->hal = table[i].hal;
436 p3dDevice->shaderArch = table[i].shaderArch;
437
438 if (!GetMaxThreadsAndWarps(p3dDevice)) {
439 goto fail;
440 }
441
442 return TRUE;
443
444 fail:
445 nv3dFreeDevice(p3dDevice);
446 return FALSE;
447 }
448
nv3dFreeDevice(Nv3dDevicePtr p3dDevice)449 void nv3dFreeDevice(Nv3dDevicePtr p3dDevice)
450 {
451 /*
452 * So far, there is nothing to free: Nv3dDevicePtr only stores
453 * queried information.
454 */
455 NVMISC_MEMSET(p3dDevice, 0, sizeof(*p3dDevice));
456 }
457
ComputeProgramLocalMemorySize(const Nv3dChannelRec * p3dChannel)458 static NvU64 ComputeProgramLocalMemorySize(
459 const Nv3dChannelRec *p3dChannel)
460 {
461 const Nv3dDeviceRec *p3dDevice = p3dChannel->p3dDevice;
462
463 // LocalMemorySizePerSM needs to be a multiple of 512
464 // Note that maxLocalBytes and/or maxStackBytes might be zero.
465 const NvU64 defaultSizePerWarp =
466 NV_ALIGN_UP(p3dChannel->programs.maxLocalBytes *
467 p3dDevice->maxThreadsPerWarp +
468 p3dChannel->programs.maxStackBytes, 512);
469
470 // shader local memory lower bits must be a multiple of 128kB
471 return NV_ALIGN_UP(defaultSizePerWarp * p3dDevice->maxWarps, 128*1024);
472 }
473
nv3dAllocChannelState(const Nv3dAllocChannelStateParams * pParams,Nv3dChannelPtr p3dChannel)474 NvBool nv3dAllocChannelState(
475 const Nv3dAllocChannelStateParams *pParams,
476 Nv3dChannelPtr p3dChannel)
477 {
478 NVMISC_MEMSET(p3dChannel, 0, sizeof(*p3dChannel));
479
480 p3dChannel->p3dDevice = pParams->p3dDevice;
481
482 p3dChannel->numTextures = pParams->numTextures;
483 p3dChannel->numTextureBindings = pParams->numTextureBindings;
484
485 p3dChannel->hasFrameBoundaries = pParams->hasFrameBoundaries;
486
487 p3dChannel->programs = PickProgramsRec(pParams->p3dDevice);
488
489 p3dChannel->programLocalMemorySize =
490 ComputeProgramLocalMemorySize(p3dChannel);
491
492 _nv3dAssignSurfaceOffsets(pParams, p3dChannel);
493
494 return TRUE;
495 }
496
nv3dFreeChannelState(Nv3dChannelPtr p3dChannel)497 void nv3dFreeChannelState(Nv3dChannelPtr p3dChannel)
498 {
499 int sd;
500 for (sd = 0; sd < NV_MAX_SUBDEVICES; sd++) {
501 nvAssert(p3dChannel->surface.handle[sd] == 0);
502 }
503 nvAssert(p3dChannel->pPushChannel == NULL);
504
505 NVMISC_MEMSET(p3dChannel, 0, sizeof(*p3dChannel));
506 }
507
508