1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2017 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "nvidia-3d.h"
25 #include "nvidia-3d-surface.h"
26 #include "nvidia-push-utils.h" /* nvPushIsAmodel() */
27 
28 #include <nvos.h>
29 
FreeSurface(Nv3dChannelRec * p3dChannel)30 static void FreeSurface(
31     Nv3dChannelRec *p3dChannel)
32 {
33     NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
34     int sd;
35 
36     for (sd = ARRAY_LEN(pPushDevice->subDevice) - 1;
37          sd >= 0;
38          sd--) {
39         if (p3dChannel->surface.handle[sd]) {
40             NvU32 ret = pPushDevice->pImports->rmApiFree(
41                      pPushDevice,
42                      pPushDevice->subDevice[sd].deviceHandle,
43                      p3dChannel->surface.handle[sd]);
44             nvAssert(ret == NVOS_STATUS_SUCCESS);
45             (void)ret;
46             p3dChannel->surface.handle[sd] = 0;
47         }
48     }
49 }
50 
AllocSurface(Nv3dChannelRec * p3dChannel,NvU64 size)51 static NvBool AllocSurface(
52     Nv3dChannelRec *p3dChannel,
53     NvU64 size)
54 {
55     NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
56     const NvPushImports *pImports = pPushDevice->pImports;
57     int sd;
58 
59     for (sd = 0;
60          sd < ARRAY_LEN(pPushDevice->subDevice) &&
61             pPushDevice->subDevice[sd].deviceHandle != 0;
62          sd++) {
63 
64         NVOS32_PARAMETERS params = {
65             .hRoot = pPushDevice->clientHandle,
66             .hObjectParent = pPushDevice->subDevice[sd].deviceHandle,
67             .function = NVOS32_FUNCTION_ALLOC_SIZE,
68             .data.AllocSize.owner = pPushDevice->clientHandle,
69             .data.AllocSize.type = NVOS32_TYPE_SHADER_PROGRAM,
70             .data.AllocSize.size = size,
71             .data.AllocSize.attr =
72                 DRF_DEF(OS32, _ATTR, _LOCATION, _VIDMEM) |
73                 DRF_DEF(OS32, _ATTR, _PHYSICALITY, _ALLOW_NONCONTIGUOUS) |
74                 DRF_DEF(OS32, _ATTR, _COHERENCY, _WRITE_COMBINE),
75             .data.AllocSize.attr2 =
76                 DRF_DEF(OS32, _ATTR2, _GPU_CACHEABLE, _YES),
77             .data.AllocSize.flags = 0,
78             .data.AllocSize.alignment = 4096,
79         };
80 
81         NvU32 ret = pImports->rmApiVidHeapControl(pPushDevice, &params);
82 
83         if (ret != NVOS_STATUS_SUCCESS) {
84             FreeSurface(p3dChannel);
85             return FALSE;
86         }
87 
88         p3dChannel->surface.handle[sd] = params.data.AllocSize.hMemory;
89     }
90 
91     return TRUE;
92 }
93 
UnmapSurface(const Nv3dChannelRec * p3dChannel,NvU64 gpuAddress)94 static void UnmapSurface(
95     const Nv3dChannelRec *p3dChannel,
96     NvU64 gpuAddress)
97 {
98     NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
99     const NvPushImports *pImports = pPushDevice->pImports;
100     int sd;
101 
102     for (sd = ARRAY_LEN(p3dChannel->surface.handle) - 1; sd >= 0; sd--) {
103         if (p3dChannel->surface.handle[sd]) {
104             NvU32 ret = pImports->rmApiUnmapMemoryDma(
105                             pPushDevice,
106                             pPushDevice->subDevice[sd].deviceHandle,
107                             pPushDevice->subDevice[sd].gpuVASpaceCtxDma,
108                             p3dChannel->surface.handle[sd],
109                             0,
110                             gpuAddress);
111             nvAssert(ret == NVOS_STATUS_SUCCESS);
112             (void)ret;
113         }
114     }
115 }
116 
MapSurface(const Nv3dChannelRec * p3dChannel,NvU64 size)117 static NvU64 MapSurface(
118     const Nv3dChannelRec *p3dChannel,
119     NvU64 size)
120 {
121     NvPushDevicePtr pPushDevice = p3dChannel->p3dDevice->pPushDevice;
122     const NvPushImports *pImports = pPushDevice->pImports;
123     NvU64 gpuAddress = 0;
124     int sd;
125 
126     for (sd = 0;
127          sd < ARRAY_LEN(p3dChannel->surface.handle) &&
128             p3dChannel->surface.handle[sd] != 0;
129          sd++) {
130         NvU32 flags = DRF_DEF(OS46, _FLAGS, _CACHE_SNOOP, _ENABLE);
131         NvU64 thisGpuAddress;
132 
133         if (sd == 0) {
134             /* For the first device, RM assigns a virtual address. */
135             thisGpuAddress = 0;
136         } else {
137             /* For subsequent devices, use the same virtual address. */
138             flags = FLD_SET_DRF(OS46, _FLAGS, _DMA_OFFSET_FIXED, _TRUE, flags);
139             nvAssert(gpuAddress != 0);
140             thisGpuAddress = gpuAddress;
141         }
142 
143         NvU32 ret = pImports->rmApiMapMemoryDma(pPushDevice,
144                                                 pPushDevice->subDevice[sd].deviceHandle,
145                                                 pPushDevice->subDevice[sd].gpuVASpaceCtxDma,
146                                                 p3dChannel->surface.handle[sd],
147                                                 0,
148                                                 size,
149                                                 flags,
150                                                 &thisGpuAddress);
151         if (ret != NVOS_STATUS_SUCCESS) {
152             if (sd != 0) {
153                 /* Clean up earlier successful mappings */
154                 UnmapSurface(p3dChannel, gpuAddress);
155             }
156             return 0;
157         }
158 
159         if (sd == 0) {
160             gpuAddress = thisGpuAddress;
161         } else {
162             nvAssert(gpuAddress == thisGpuAddress);
163         }
164     }
165 
166     return gpuAddress;
167 }
168 
nv3dAllocChannelSurface(Nv3dChannelPtr p3dChannel)169 NvBool nv3dAllocChannelSurface(Nv3dChannelPtr p3dChannel)
170 {
171     const NvU64 size = p3dChannel->surface.totalSize;
172     NvU64 gpuAddress;
173 
174     if (!AllocSurface(p3dChannel, size)) {
175         return FALSE;
176     }
177 
178     gpuAddress = MapSurface(p3dChannel, size);
179 
180     if (gpuAddress == 0) {
181         FreeSurface(p3dChannel);
182         return FALSE;
183     }
184 
185     p3dChannel->surface.gpuAddress = gpuAddress;
186 
187     return TRUE;
188 }
189 
nv3dFreeChannelSurface(Nv3dChannelPtr p3dChannel)190 void nv3dFreeChannelSurface(Nv3dChannelPtr p3dChannel)
191 {
192     if (p3dChannel->p3dDevice == NULL) {
193         return;
194     }
195 
196     if (p3dChannel->surface.gpuAddress != 0) {
197         /*
198          * If the surface is mapped into our channel, we need to ensure
199          * that any methods in the channel that might reference the
200          * gpuAddress have idled before we unmap the address.
201          */
202         nvPushIdleChannel(p3dChannel->pPushChannel);
203 
204         UnmapSurface(p3dChannel,
205                      p3dChannel->surface.gpuAddress);
206         p3dChannel->surface.gpuAddress = 0;
207     }
208 
209     FreeSurface(p3dChannel);
210 }
211 
212 /*
213  * The Nv3dChannelRec's surface contains:
214  *
215  *   programLocalMemory
216  *   programCode
217  *   programConstants
218  *   Nv3dTexture[numTextures]
219  *   bindlessTextureConstantBuffer (optionally)
220  *   Nv3dConstantBuffer[numConstantBuffers]
221  *   vertexStreams
222  *
223  * Where all items are aligned to NV3D_TEXTURE_PITCH_ALIGNMENT.
224  *
225  * Compute all the offsets into the surface, and the total surface
226  * size.
227  *
228  * XXX TODO: use correct alignment for all items, rather than
229  * NV3D_TEXTURE_PITCH_ALIGNMENT.
230  */
_nv3dAssignSurfaceOffsets(const Nv3dAllocChannelStateParams * pParams,Nv3dChannelPtr p3dChannel)231 void _nv3dAssignSurfaceOffsets(
232     const Nv3dAllocChannelStateParams *pParams,
233     Nv3dChannelPtr p3dChannel)
234 {
235     const NvU32 programPrefetchPadding = 2048;
236 
237     NvU64 offset = 0;
238     enum Nv3dVertexAttributeStreamType stream;
239 
240     /*
241      * Program local memory requires at least 4k alignment.  So, place
242      * it at the start of the surface.
243      */
244     p3dChannel->surface.programLocalMemoryOffset = offset;
245 
246     offset += p3dChannel->programLocalMemorySize;
247     offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
248 
249     p3dChannel->surface.programOffset = offset;
250 
251     offset += p3dChannel->programs.code.decompressedSize;
252     offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
253 
254     p3dChannel->surface.programConstantsOffset = offset;
255 
256     offset += p3dChannel->programs.constants.size;
257     offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
258 
259     p3dChannel->surface.textureOffset = offset;
260 
261     offset += (sizeof(Nv3dTexture) * pParams->numTextures);
262     offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
263 
264     p3dChannel->surface.bindlessTextureConstantBufferOffset = offset;
265     offset += NV3D_CONSTANT_BUFFER_SIZE;
266     offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
267 
268     p3dChannel->surface.constantBufferOffset = offset;
269 
270     offset += (NV3D_CONSTANT_BUFFER_SIZE * pParams->numConstantBuffers);
271     offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
272 
273     /*
274      * TODO: not all nvidia-3d host drivers will require the vertex stream
275      * memory; maybe host drivers should opt in?
276      */
277     for (stream = NV3D_VERTEX_ATTRIBUTE_STREAM_FIRST;
278          stream < NV3D_VERTEX_ATTRIBUTE_STREAM_COUNT;
279          stream++) {
280 
281         p3dChannel->surface.vertexStreamOffset[stream] = offset;
282 
283         offset += NV3D_VERTEX_ATTRIBUTE_STREAM_SIZE;
284         offset = NV_ALIGN_UP(offset, NV3D_TEXTURE_PITCH_ALIGNMENT);
285     }
286 
287     /*
288      * Make sure the total surface size is large enough to cover any
289      * potential prefetch region.
290      */
291     p3dChannel->surface.totalSize =
292         NV_MAX(p3dChannel->surface.programOffset + programPrefetchPadding,
293                offset);
294 }
295