1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /*
25 * This file contains macros and inline functions used to actually program
26 * methods.
27 */
28
29 #ifndef __NVIDIA_PUSH_METHODS_H__
30 #define __NVIDIA_PUSH_METHODS_H__
31
32 #include "nvidia-push-types.h"
33
34 #include "class/cla16f.h"
35
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39
__nvPushSetMethodDataSegment(NvPushChannelSegmentPtr s,const NvU32 data)40 static inline void __nvPushSetMethodDataSegment(NvPushChannelSegmentPtr s, const NvU32 data)
41 {
42 s->buffer->u = data;
43 s->buffer++;
44 }
45
nvPushSetMethodData(NvPushChannelPtr p,const NvU32 data)46 static inline void nvPushSetMethodData(NvPushChannelPtr p, const NvU32 data)
47 {
48 __nvPushSetMethodDataSegment(&p->main, data);
49 }
50
51 #if NV_PUSH_ALLOW_FLOAT
__nvPushSetMethodDataSegmentF(NvPushChannelSegmentPtr s,const float data)52 static inline void __nvPushSetMethodDataSegmentF(NvPushChannelSegmentPtr s, const float data)
53 {
54 s->buffer->f = data;
55 s->buffer++;
56 }
57
nvPushSetMethodDataF(NvPushChannelPtr p,const float data)58 static inline void nvPushSetMethodDataF(NvPushChannelPtr p, const float data)
59 {
60 __nvPushSetMethodDataSegmentF(&p->main, data);
61 }
62 #endif
63
__nvPushSetMethodDataSegmentU64(NvPushChannelSegmentPtr s,const NvU64 data)64 static inline void __nvPushSetMethodDataSegmentU64(NvPushChannelSegmentPtr s, const NvU64 data)
65 {
66 __nvPushSetMethodDataSegment(s, NvU64_HI32(data));
67 __nvPushSetMethodDataSegment(s, NvU64_LO32(data));
68 }
69
nvPushSetMethodDataU64(NvPushChannelPtr p,const NvU64 data)70 static inline void nvPushSetMethodDataU64(NvPushChannelPtr p, const NvU64 data)
71 {
72 __nvPushSetMethodDataSegmentU64(&p->main, data);
73 }
74
75 /* Little-endian: least significant bits first. */
__nvPushSetMethodDataSegmentU64LE(NvPushChannelSegmentPtr s,const NvU64 data)76 static inline void __nvPushSetMethodDataSegmentU64LE(NvPushChannelSegmentPtr s, const NvU64 data)
77 {
78 __nvPushSetMethodDataSegment(s, NvU64_LO32(data));
79 __nvPushSetMethodDataSegment(s, NvU64_HI32(data));
80 }
81
nvPushSetMethodDataU64LE(NvPushChannelPtr p,const NvU64 data)82 static inline void nvPushSetMethodDataU64LE(NvPushChannelPtr p, const NvU64 data)
83 {
84 __nvPushSetMethodDataSegmentU64LE(&p->main, data);
85 }
86
87 void __nvPushMoveDWORDS(NvU32* dst, const NvU32* src, int dwords);
88
89 static inline void
nvDmaMoveDWORDS(NvPushChannelUnion * dst,const NvU32 * src,int dwords)90 nvDmaMoveDWORDS(NvPushChannelUnion *dst, const NvU32* src, int dwords)
91 {
92 // The 'dst' argument is an array of NvPushChannelUnion; it is safe
93 // to treat this as an array of NvU32, as long as NvU32 and
94 // NvPushChannelUnion are the same size.
95 ct_assert(sizeof(NvU32) == sizeof(NvPushChannelUnion));
96 __nvPushMoveDWORDS((NvU32 *)dst, src, dwords);
97 }
98
nvPushInlineData(NvPushChannelPtr p,const void * data,size_t dwords)99 static inline void nvPushInlineData(NvPushChannelPtr p, const void *data,
100 size_t dwords)
101 {
102 nvDmaMoveDWORDS(p->main.buffer, (const NvU32 *)data, dwords);
103 p->main.buffer += dwords;
104 }
105
106 /*!
107 * Return the maximum method count: the maximum number of dwords that can be
108 * specified in the nvPushMethod() family of macros.
109 */
nvPushMaxMethodCount(const NvPushChannelRec * p)110 static inline NvU32 nvPushMaxMethodCount(const NvPushChannelRec *p)
111 {
112 /*
113 * The number of methods that can be specified in one NVA16F_DMA_METHOD
114 * header is limited by the bit field size of NVA16F_DMA_METHOD_COUNT: 28:16
115 * (i.e., maximum representable value 8191).
116 */
117 const NvU32 maxFromMethodCountMask = DRF_MASK(NVA16F_DMA_METHOD_COUNT);
118
119 /*
120 * Further, the method count must be smaller than half the total pushbuffer
121 * size minus one, to correctly distinguish empty and full pushbuffers. See
122 * nvPushHeader() for details.
123 */
124 const NvU32 pushBufferSizeInBytes = p->main.sizeInBytes;
125 const NvU32 pushBufferSizeInDWords = pushBufferSizeInBytes / 4;
126 const NvU32 pushBufferHalfSizeInDWords = pushBufferSizeInDWords / 2;
127
128 /*
129 * Subtract two from pushBufferHalfSizeInDWords:
130 *
131 * -1 to distinguish pushbuffer empty from full (see above).
132 *
133 * -1 to be smaller than, rather than equal to, the above constraints.
134 */
135 const NvU32 maxFromPushBufferSize = pushBufferHalfSizeInDWords - 2;
136
137 return NV_MIN(maxFromMethodCountMask, maxFromPushBufferSize);
138 }
139
140 // These macros verify that the values used in the methods fits
141 // into the defined ranges.
142 #define ASSERT_DRF_DEF(d, r, f, n) \
143 nvAssert(!(~DRF_MASK(NV ## d ## r ## f) & (NV ## d ## r ## f ## n)))
144 #define ASSERT_DRF_NUM(d, r, f, n) \
145 nvAssert(!(~DRF_MASK(NV ## d ## r ## f) & (n)))
146
147 #if defined(DEBUG)
148 #include "class/clc36f.h" /* VOLTA_CHANNEL_GPFIFO_A */
149
150 /*
151 * When pushing GPFIFO methods (NVA16F_SEMAPHORE[ABCD]), all four
152 * methods must be pushed together. If the four methods are not
153 * pushed together, nvidia-push might wrap, injecting its progress
154 * tracking semaphore release methods in the middle, and perturb the
155 * NVA16F_SEMAPHOREA_OFFSET_UPPER and NVA16F_SEMAPHOREB_OFFSET_LOWER
156 * channel state.
157 *
158 * Return whether the methods described by the arguments include some,
159 * but not all, of A, B, C, and D. I.e., if the range starts at B, C,
160 * or D, or if the range ends at A, B, or C.
161 *
162 * Perform a similar check for Volta+ semaphore methods
163 * NVC36F_SEM_ADDR_LO..NVC36F_SEM_EXECUTE. Note that we always check for both
164 * sets of methods, regardless of the GPU we're actually running on. This is
165 * okay since:
166 * a) the NVC36F_SEM_ADDR_LO..NVC36F_SEM_EXECUTE method offsets were not used
167 * for anything from (a16f..c36f].
168 * b) the SEMAPHORE[ABCD] methods still exist on the newer classes (they
169 * haven't been reused for anything else)
170 */
__nvPushStartSplitsSemaphore(NvU32 method,NvU32 count,NvU32 secOp)171 static inline NvBool __nvPushStartSplitsSemaphore(
172 NvU32 method,
173 NvU32 count,
174 NvU32 secOp)
175 {
176 ct_assert(NVA16F_SEMAPHOREA < NVA16F_SEMAPHORED);
177 ct_assert(NVC36F_SEM_ADDR_LO < NVC36F_SEM_EXECUTE);
178
179 /*
180 * compute start and end as inclusive; if not incrementing, we
181 * assume end==start
182 */
183 const NvU32 start = method;
184 const NvU32 end = (secOp == NVA16F_DMA_SEC_OP_INC_METHOD) ?
185 (method + ((count - 1) * 4)) : method;
186
187 return ((start > NVA16F_SEMAPHOREA) && (start <= NVA16F_SEMAPHORED)) ||
188 ((end >= NVA16F_SEMAPHOREA) && (end < NVA16F_SEMAPHORED)) ||
189 ((start > NVC36F_SEM_ADDR_LO) && (start <= NVC36F_SEM_EXECUTE)) ||
190 ((end >= NVC36F_SEM_ADDR_LO) && (end < NVC36F_SEM_EXECUTE));
191 }
192 #endif /* DEBUG */
193
194 /*
195 * Note that _count+1 must be less than half the total pushbuffer size. This is
196 * required by GPFIFO because we can't reliably tell when we can write all the
197 * way to the end of the pushbuffer if we wrap (see bug 232454). This
198 * assumption ensures that there will be enough space once GET reaches PUT.
199 */
200 #define nvPushHeader(_push_buffer, _segment, _count, _header) do { \
201 NvPushChannelSegmentPtr _pSegment = &(_push_buffer)->_segment; \
202 nvAssert(((_count)+1) < ((_pSegment)->sizeInBytes / 8)); \
203 if ((_pSegment)->freeDwords < ((_count)+1)) \
204 __nvPushMakeRoom((_push_buffer), (_count) + 1); \
205 __nvPushSetMethodDataSegment((_pSegment), (_header)); \
206 (_pSegment)->freeDwords -= ((_count)+1); \
207 } while(0)
208
209 #define __nvPushStart(_push_buffer, _segment, _subch, _offset, _count, _opcode) \
210 { \
211 nvAssert(!__nvPushStartSplitsSemaphore( \
212 (_offset), \
213 (_count), \
214 NVA16F_DMA_SEC_OP ## _opcode)); \
215 ASSERT_DRF_DEF(A16F, _DMA, _SEC_OP, _opcode); \
216 ASSERT_DRF_NUM(A16F, _DMA, _METHOD_COUNT, _count); \
217 ASSERT_DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL, _subch); \
218 ASSERT_DRF_NUM(A16F, _DMA, _METHOD_ADDRESS, (_offset) >> 2); \
219 nvPushHeader((_push_buffer), _segment, (_count), \
220 DRF_DEF(A16F, _DMA, _SEC_OP, _opcode) | \
221 DRF_NUM(A16F, _DMA, _METHOD_COUNT, _count) | \
222 DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL, _subch) | \
223 DRF_NUM(A16F, _DMA, _METHOD_ADDRESS, (_offset) >> 2)); \
224 }
225
226 // The GPU can encode a 13-bit constant method/data pair in a single DWORD.
227 #define nvPushImmedValSegment(_push_buffer, _segment, _subch, _offset, _data) { \
228 ASSERT_DRF_NUM(A16F, _DMA, _IMMD_DATA, _data); \
229 ASSERT_DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL, _subch); \
230 ASSERT_DRF_NUM(A16F, _DMA, _METHOD_ADDRESS, (_offset) >> 2); \
231 if ((_push_buffer)->_segment.freeDwords < 1) \
232 __nvPushMakeRoom((_push_buffer), 1); \
233 __nvPushSetMethodDataSegment(&(_push_buffer)->_segment, \
234 DRF_DEF(A16F, _DMA, _SEC_OP, _IMMD_DATA_METHOD) | \
235 DRF_NUM(A16F, _DMA, _IMMD_DATA, _data) | \
236 DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL, _subch) | \
237 DRF_NUM(A16F, _DMA, _METHOD_ADDRESS, (_offset) >> 2)); \
238 (_push_buffer)->_segment.freeDwords--; \
239 }
240
241 #define nvPushImmedVal(_push_buffer, _subch, _offset, _data) \
242 nvPushImmedValSegment(_push_buffer, main, _subch, _offset, _data)
243
244 #define nvPushImmed(_push_buffer, _subch, _offset, _val) \
245 nvPushImmedVal(_push_buffer, _subch, _offset, _offset##_V_##_val)
246
247 // Method headers.
248 #define nvPushMethod(_push_buffer, _subch, _offset, _count) \
249 __nvPushStart(_push_buffer, main, _subch, _offset, _count, _INC_METHOD)
250 #define nvPushMethodNoIncr(_push_buffer, _subch, _offset, _count) \
251 __nvPushStart(_push_buffer, main, _subch, _offset, _count, _NON_INC_METHOD)
252 #define nvPushMethodOneIncr(_push_buffer, _subch, _offset, _count) \
253 __nvPushStart(_push_buffer, main, _subch, _offset, _count, _ONE_INC)
254
255 #ifdef __cplusplus
256 };
257 #endif
258
259 #endif /* __NVIDIA_PUSH_METHODS_H__ */
260