1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "nvidia-3d.h"
25 #include "nvidia-3d-surface.h"
26 #include "nvidia-push-utils.h" /* nvPushIsAmodel() */
27
28 #include <nvos.h>
29
FreeSurface(Nv3dChannelRec * p3dChannel)30 static void FreeSurface(
31 Nv3dChannelRec *p3dChannel)
32 {
33 NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
34 int sd;
35
36 for (sd = ARRAY_LEN(pPushDevice->subDevice) - 1;
37 sd >= 0;
38 sd--) {
39 if (p3dChannel->surface.handle[sd]) {
40 NvU32 ret = pPushDevice->pImports->rmApiFree(
41 pPushDevice,
42 pPushDevice->subDevice[sd].deviceHandle,
43 p3dChannel->surface.handle[sd]);
44 nvAssert(ret == NVOS_STATUS_SUCCESS);
45 (void)ret;
46 p3dChannel->surface.handle[sd] = 0;
47 }
48 }
49 }
50
AllocSurface(Nv3dChannelRec * p3dChannel,NvU64 size)51 static NvBool AllocSurface(
52 Nv3dChannelRec *p3dChannel,
53 NvU64 size)
54 {
55 NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
56 const NvPushImports *pImports = pPushDevice->pImports;
57 int sd;
58
59 for (sd = 0;
60 sd < ARRAY_LEN(pPushDevice->subDevice) &&
61 pPushDevice->subDevice[sd].deviceHandle != 0;
62 sd++) {
63
64 NVOS32_PARAMETERS params = {
65 .hRoot = pPushDevice->clientHandle,
66 .hObjectParent = pPushDevice->subDevice[sd].deviceHandle,
67 .function = NVOS32_FUNCTION_ALLOC_SIZE,
68 .data.AllocSize.owner = pPushDevice->clientHandle,
69 .data.AllocSize.type = NVOS32_TYPE_SHADER_PROGRAM,
70 .data.AllocSize.size = size,
71 .data.AllocSize.attr =
72 DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM) |
73 DRF_DEF(OS32, _ATTR, _PHYSICALITY, _ALLOW_NONCONTIGUOUS) |
74 DRF_DEF(OS32, _ATTR, _COHERENCY, _WRITE_COMBINE),
75 .data.AllocSize.attr2 =
76 DRF_DEF(OS32, _ATTR2, _GPU_CACHEABLE, _YES),
77 .data.AllocSize.flags = 0,
78 .data.AllocSize.alignment = 4096,
79 };
80
81 NvU32 ret = pImports->rmApiVidHeapControl(pPushDevice, ¶ms);
82
83 if (ret != NVOS_STATUS_SUCCESS) {
84 FreeSurface(p3dChannel);
85 return FALSE;
86 }
87
88 p3dChannel->surface.handle[sd] = params.data.AllocSize.hMemory;
89 }
90
91 return TRUE;
92 }
93
UnmapSurface(const Nv3dChannelRec * p3dChannel,NvU64 gpuAddress)94 static void UnmapSurface(
95 const Nv3dChannelRec *p3dChannel,
96 NvU64 gpuAddress)
97 {
98 NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
99 const NvPushImports *pImports = pPushDevice->pImports;
100 int sd;
101
102 for (sd = ARRAY_LEN(p3dChannel->surface.handle) - 1; sd >= 0; sd--) {
103 if (p3dChannel->surface.handle[sd]) {
104 NvU32 ret = pImports->rmApiUnmapMemoryDma(
105 pPushDevice,
106 pPushDevice->subDevice[sd].deviceHandle,
107 pPushDevice->subDevice[sd].gpuVASpaceCtxDma,
108 p3dChannel->surface.handle[sd],
109 0,
110 gpuAddress);
111 nvAssert(ret == NVOS_STATUS_SUCCESS);
112 (void)ret;
113 }
114 }
115 }
116
MapSurface(const Nv3dChannelRec * p3dChannel,NvU64 size)117 static NvU64 MapSurface(
118 const Nv3dChannelRec *p3dChannel,
119 NvU64 size)
120 {
121 NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
122 const NvPushImports *pImports = pPushDevice->pImports;
123 NvU64 gpuAddress = 0;
124 int sd;
125
126 for (sd = 0;
127 sd < ARRAY_LEN(p3dChannel->surface.handle) &&
128 p3dChannel->surface.handle[sd] != 0;
129 sd++) {
130 NvU32 flags = DRF_DEF(OS46, _FLAGS, _CACHE_SNOOP, _ENABLE);
131 NvU64 thisGpuAddress;
132
133 if (sd == 0) {
134 /* For the first device, RM assigns a virtual address. */
135 thisGpuAddress = 0;
136 } else {
137 /* For subsequent devices, use the same virtual address. */
138 flags = FLD_SET_DRF(OS46, _FLAGS, _DMA_OFFSET_FIXED, _TRUE, flags);
139 nvAssert(gpuAddress != 0);
140 thisGpuAddress = gpuAddress;
141 }
142
143 NvU32 ret = pImports->rmApiMapMemoryDma(pPushDevice,
144 pPushDevice->subDevice[sd].deviceHandle,
145 pPushDevice->subDevice[sd].gpuVASpaceCtxDma,
146 p3dChannel->surface.handle[sd],
147 0,
148 size,
149 flags,
150 &thisGpuAddress);
151 if (ret != NVOS_STATUS_SUCCESS) {
152 if (sd != 0) {
153 /* Clean up earlier successful mappings */
154 UnmapSurface(p3dChannel, gpuAddress);
155 }
156 return 0;
157 }
158
159 if (sd == 0) {
160 gpuAddress = thisGpuAddress;
161 } else {
162 nvAssert(gpuAddress == thisGpuAddress);
163 }
164 }
165
166 return gpuAddress;
167 }
168
nv3dAllocChannelSurface(Nv3dChannelPtr p3dChannel)169 NvBool nv3dAllocChannelSurface(Nv3dChannelPtr p3dChannel)
170 {
171 const NvU64 size = p3dChannel->surface.totalSize;
172 NvU64 gpuAddress;
173
174 if (!AllocSurface(p3dChannel, size)) {
175 return FALSE;
176 }
177
178 gpuAddress = MapSurface(p3dChannel, size);
179
180 if (gpuAddress == 0) {
181 FreeSurface(p3dChannel);
182 return FALSE;
183 }
184
185 p3dChannel->surface.gpuAddress = gpuAddress;
186
187 return TRUE;
188 }
189
nv3dFreeChannelSurface(Nv3dChannelPtr p3dChannel)190 void nv3dFreeChannelSurface(Nv3dChannelPtr p3dChannel)
191 {
192 if (p3dChannel->p3dDevice == NULL) {
193 return;
194 }
195
196 if (p3dChannel->surface.gpuAddress != 0) {
197 /*
198 * If the surface is mapped into our channel, we need to ensure
199 * that any methods in the channel that might reference the
200 * gpuAddress have idled before we unmap the address.
201 */
202 nvPushIdleChannel(p3dChannel->pPushChannel);
203
204 UnmapSurface(p3dChannel,
205 p3dChannel->surface.gpuAddress);
206 p3dChannel->surface.gpuAddress = 0;
207 }
208
209 FreeSurface(p3dChannel);
210 }
211
212 /*
213 * The Nv3dChannelRec's surface contains:
214 *
215 * programLocalMemory
216 * programCode
217 * programConstants
218 * Nv3dTexture[numTextures]
219 * bindlessTextureConstantBuffer (optionally)
220 * Nv3dConstantBuffer[numConstantBuffers]
221 * vertexStreams
222 *
223 * Where all items are aligned to NV3D_TEXTURE_PITCH_ALIGNMENT.
224 *
225 * Compute all the offsets into the surface, and the total surface
226 * size.
227 *
228 * XXX TODO: use correct alignment for all items, rather than
229 * NV3D_TEXTURE_PITCH_ALIGNMENT.
230 */
_nv3dAssignSurfaceOffsets(const Nv3dAllocChannelStateParams * pParams,Nv3dChannelPtr p3dChannel)231 void _nv3dAssignSurfaceOffsets(
232 const Nv3dAllocChannelStateParams *pParams,
233 Nv3dChannelPtr p3dChannel)
234 {
235 const NvU32 programPrefetchPadding = 2048;
236
237 NvU64 offset = 0;
238 enum Nv3dVertexAttributeStreamType stream;
239
240 /*
241 * Program local memory requires at least 4k alignment. So, place
242 * it at the start of the surface.
243 */
244 p3dChannel->surface.programLocalMemoryOffset = offset;
245
246 offset += p3dChannel->programLocalMemorySize;
247 offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
248
249 p3dChannel->surface.programOffset = offset;
250
251 offset += p3dChannel->programs.code.decompressedSize;
252 offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
253
254 p3dChannel->surface.programConstantsOffset = offset;
255
256 offset += p3dChannel->programs.constants.size;
257 offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
258
259 p3dChannel->surface.textureOffset = offset;
260
261 offset += (sizeof(Nv3dTexture) * pParams->numTextures);
262 offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
263
264 p3dChannel->surface.bindlessTextureConstantBufferOffset = offset;
265 offset += NV3D_CONSTANT_BUFFER_SIZE;
266 offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
267
268 p3dChannel->surface.constantBufferOffset = offset;
269
270 offset += (NV3D_CONSTANT_BUFFER_SIZE * pParams->numConstantBuffers);
271 offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
272
273 /*
274 * TODO: not all nvidia-3d host drivers will require the vertex stream
275 * memory; maybe host drivers should opt in?
276 */
277 for (stream = NV3D_VERTEX_ATTRIBUTE_STREAM_FIRST;
278 stream < NV3D_VERTEX_ATTRIBUTE_STREAM_COUNT;
279 stream++) {
280
281 p3dChannel->surface.vertexStreamOffset[stream] = offset;
282
283 offset += NV3D_VERTEX_ATTRIBUTE_STREAM_SIZE;
284 offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
285 }
286
287 /*
288 * Make sure the total surface size is large enough to cover any
289 * potential prefetch region.
290 */
291 p3dChannel->surface.totalSize =
292 NV_MAX(p3dChannel->surface.programOffset + programPrefetchPadding,
293 offset);
294 }
295