1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /*
25  * This file contains macros and inline functions used to actually program
26  * methods.
27  */
28 
29 #ifndef __NVIDIA_PUSH_METHODS_H__
30 #define __NVIDIA_PUSH_METHODS_H__
31 
32 #include "nvidia-push-types.h"
33 
34 #include "class/cla16f.h"
35 
36 #ifdef __cplusplus
37 extern "C" {
38 #endif
39 
__nvPushSetMethodDataSegment(NvPushChannelSegmentPtr s,const NvU32 data)40 static inline void __nvPushSetMethodDataSegment(NvPushChannelSegmentPtr s, const NvU32 data)
41 {
42     s->buffer->u = data;
43     s->buffer++;
44 }
45 
nvPushSetMethodData(NvPushChannelPtr p,const NvU32 data)46 static inline void nvPushSetMethodData(NvPushChannelPtr p, const NvU32 data)
47 {
48     __nvPushSetMethodDataSegment(&p->main, data);
49 }
50 
51 #if NV_PUSH_ALLOW_FLOAT
__nvPushSetMethodDataSegmentF(NvPushChannelSegmentPtr s,const float data)52 static inline void __nvPushSetMethodDataSegmentF(NvPushChannelSegmentPtr s, const float data)
53 {
54     s->buffer->f = data;
55     s->buffer++;
56 }
57 
nvPushSetMethodDataF(NvPushChannelPtr p,const float data)58 static inline void nvPushSetMethodDataF(NvPushChannelPtr p, const float data)
59 {
60     __nvPushSetMethodDataSegmentF(&p->main, data);
61 }
62 #endif
63 
__nvPushSetMethodDataSegmentU64(NvPushChannelSegmentPtr s,const NvU64 data)64 static inline void __nvPushSetMethodDataSegmentU64(NvPushChannelSegmentPtr s, const NvU64 data)
65 {
66     __nvPushSetMethodDataSegment(s, NvU64_HI32(data));
67     __nvPushSetMethodDataSegment(s, NvU64_LO32(data));
68 }
69 
nvPushSetMethodDataU64(NvPushChannelPtr p,const NvU64 data)70 static inline void nvPushSetMethodDataU64(NvPushChannelPtr p, const NvU64 data)
71 {
72     __nvPushSetMethodDataSegmentU64(&p->main, data);
73 }
74 
75 /* Little-endian: least significant bits first. */
__nvPushSetMethodDataSegmentU64LE(NvPushChannelSegmentPtr s,const NvU64 data)76 static inline void __nvPushSetMethodDataSegmentU64LE(NvPushChannelSegmentPtr s, const NvU64 data)
77 {
78     __nvPushSetMethodDataSegment(s, NvU64_LO32(data));
79     __nvPushSetMethodDataSegment(s, NvU64_HI32(data));
80 }
81 
nvPushSetMethodDataU64LE(NvPushChannelPtr p,const NvU64 data)82 static inline void nvPushSetMethodDataU64LE(NvPushChannelPtr p, const NvU64 data)
83 {
84     __nvPushSetMethodDataSegmentU64LE(&p->main, data);
85 }
86 
87 void __nvPushMoveDWORDS(NvU32* dst, const NvU32* src, int dwords);
88 
89 static inline void
nvDmaMoveDWORDS(NvPushChannelUnion * dst,const NvU32 * src,int dwords)90 nvDmaMoveDWORDS(NvPushChannelUnion *dst, const NvU32* src, int dwords)
91 {
92     // The 'dst' argument is an array of NvPushChannelUnion; it is safe
93     // to treat this as an array of NvU32, as long as NvU32 and
94     // NvPushChannelUnion are the same size.
95     ct_assert(sizeof(NvU32) == sizeof(NvPushChannelUnion));
96     __nvPushMoveDWORDS((NvU32 *)dst, src, dwords);
97 }
98 
nvPushInlineData(NvPushChannelPtr p,const void * data,size_t dwords)99 static inline void nvPushInlineData(NvPushChannelPtr p, const void *data,
100                                     size_t dwords)
101 {
102     nvDmaMoveDWORDS(p->main.buffer, (const NvU32 *)data, dwords);
103     p->main.buffer += dwords;
104 }
105 
106 /*!
107  * Return the maximum method count: the maximum number of dwords that can be
108  * specified in the nvPushMethod() family of macros.
109  */
nvPushMaxMethodCount(const NvPushChannelRec * p)110 static inline NvU32 nvPushMaxMethodCount(const NvPushChannelRec *p)
111 {
112     /*
113      * The number of methods that can be specified in one NVA16F_DMA_METHOD
114      * header is limited by the bit field size of NVA16F_DMA_METHOD_COUNT: 28:16
115      * (i.e., maximum representable value 8191).
116      */
117     const NvU32 maxFromMethodCountMask = DRF_MASK(NVA16F_DMA_METHOD_COUNT);
118 
119     /*
120      * Further, the method count must be smaller than half the total pushbuffer
121      * size minus one, to correctly distinguish empty and full pushbuffers.  See
122      * nvPushHeader() for details.
123      */
124     const NvU32 pushBufferSizeInBytes = p->main.sizeInBytes;
125     const NvU32 pushBufferSizeInDWords = pushBufferSizeInBytes / 4;
126     const NvU32 pushBufferHalfSizeInDWords = pushBufferSizeInDWords / 2;
127 
128     /*
129      * Subtract two from pushBufferHalfSizeInDWords:
130      *
131      * -1 to distinguish pushbuffer empty from full (see above).
132      *
133      * -1 to be smaller than, rather than equal to, the above constraints.
134      */
135     const NvU32 maxFromPushBufferSize = pushBufferHalfSizeInDWords - 2;
136 
137     return NV_MIN(maxFromMethodCountMask, maxFromPushBufferSize);
138 }
139 
140 // These macros verify that the values used in the methods fits
141 // into the defined ranges.
142 #define ASSERT_DRF_DEF(d, r, f, n) \
143     nvAssert(!(~DRF_MASK(NV ## d ## r ## f) & (NV ## d ## r ## f ## n)))
144 #define ASSERT_DRF_NUM(d, r, f, n) \
145     nvAssert(!(~DRF_MASK(NV ## d ## r ## f) & (n)))
146 
147 #if defined(DEBUG)
148 #include "class/clc36f.h"    /* VOLTA_CHANNEL_GPFIFO_A */
149 
150 /*
151  * When pushing GPFIFO methods (NVA16F_SEMAPHORE[ABCD]), all four
152  * methods must be pushed together.  If the four methods are not
153  * pushed together, nvidia-push might wrap, injecting its progress
154  * tracking semaphore release methods in the middle, and perturb the
155  * NVA16F_SEMAPHOREA_OFFSET_UPPER and NVA16F_SEMAPHOREB_OFFSET_LOWER
156  * channel state.
157  *
158  * Return whether the methods described by the arguments include some,
159  * but not all, of A, B, C, and D.  I.e., if the range starts at B, C,
160  * or D, or if the range ends at A, B, or C.
161  *
162  * Perform a similar check for Volta+ semaphore methods
163  * NVC36F_SEM_ADDR_LO..NVC36F_SEM_EXECUTE.  Note that we always check for both
164  * sets of methods, regardless of the GPU we're actually running on.  This is
165  * okay since:
166  * a) the NVC36F_SEM_ADDR_LO..NVC36F_SEM_EXECUTE method offsets were not used
167  *    for anything from (a16f..c36f].
168  * b) the SEMAPHORE[ABCD] methods still exist on the newer classes (they
169  *    haven't been reused for anything else)
170  */
__nvPushStartSplitsSemaphore(NvU32 method,NvU32 count,NvU32 secOp)171 static inline NvBool __nvPushStartSplitsSemaphore(
172     NvU32 method,
173     NvU32 count,
174     NvU32 secOp)
175 {
176     ct_assert(NVA16F_SEMAPHOREA < NVA16F_SEMAPHORED);
177     ct_assert(NVC36F_SEM_ADDR_LO < NVC36F_SEM_EXECUTE);
178 
179     /*
180      * compute start and end as inclusive; if not incrementing, we
181      * assume end==start
182      */
183     const NvU32 start = method;
184     const NvU32 end = (secOp == NVA16F_DMA_SEC_OP_INC_METHOD) ?
185         (method + ((count - 1) * 4)) : method;
186 
187     return ((start >  NVA16F_SEMAPHOREA)  && (start <= NVA16F_SEMAPHORED))  ||
188            ((end   >= NVA16F_SEMAPHOREA)  && (end   <  NVA16F_SEMAPHORED))  ||
189            ((start >  NVC36F_SEM_ADDR_LO) && (start <= NVC36F_SEM_EXECUTE)) ||
190            ((end   >= NVC36F_SEM_ADDR_LO) && (end   <  NVC36F_SEM_EXECUTE));
191 }
192 #endif /* DEBUG */
193 
194 /*
195  * Note that _count+1 must be less than half the total pushbuffer size.  This is
196  * required by GPFIFO because we can't reliably tell when we can write all the
197  * way to the end of the pushbuffer if we wrap (see bug 232454).  This
198  * assumption ensures that there will be enough space once GET reaches PUT.
199  */
200 #define nvPushHeader(_push_buffer, _segment, _count, _header) do {    \
201     NvPushChannelSegmentPtr _pSegment = &(_push_buffer)->_segment;    \
202     nvAssert(((_count)+1) < ((_pSegment)->sizeInBytes / 8));          \
203     if ((_pSegment)->freeDwords < ((_count)+1))                       \
204         __nvPushMakeRoom((_push_buffer), (_count) + 1);               \
205     __nvPushSetMethodDataSegment((_pSegment), (_header));             \
206     (_pSegment)->freeDwords -= ((_count)+1);                          \
207 } while(0)
208 
209 #define __nvPushStart(_push_buffer, _segment, _subch, _offset, _count, _opcode) \
210 {                                                                     \
211     nvAssert(!__nvPushStartSplitsSemaphore(                           \
212                                       (_offset),                      \
213                                       (_count),                       \
214                                       NVA16F_DMA_SEC_OP ## _opcode)); \
215     ASSERT_DRF_DEF(A16F, _DMA, _SEC_OP, _opcode);                     \
216     ASSERT_DRF_NUM(A16F, _DMA, _METHOD_COUNT, _count);                \
217     ASSERT_DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL, _subch);           \
218     ASSERT_DRF_NUM(A16F, _DMA, _METHOD_ADDRESS, (_offset) >> 2);      \
219     nvPushHeader((_push_buffer), _segment, (_count),                  \
220         DRF_DEF(A16F, _DMA, _SEC_OP,               _opcode)  |        \
221         DRF_NUM(A16F, _DMA, _METHOD_COUNT,         _count)   |        \
222         DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL,    _subch)   |        \
223         DRF_NUM(A16F, _DMA, _METHOD_ADDRESS,    (_offset) >> 2));     \
224 }
225 
226 // The GPU can encode a 13-bit constant method/data pair in a single DWORD.
227 #define nvPushImmedValSegment(_push_buffer, _segment, _subch, _offset, _data) { \
228     ASSERT_DRF_NUM(A16F, _DMA, _IMMD_DATA, _data);                    \
229     ASSERT_DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL, _subch);           \
230     ASSERT_DRF_NUM(A16F, _DMA, _METHOD_ADDRESS, (_offset) >> 2);      \
231     if ((_push_buffer)->_segment.freeDwords < 1)                      \
232         __nvPushMakeRoom((_push_buffer), 1);                          \
233     __nvPushSetMethodDataSegment(&(_push_buffer)->_segment,           \
234         DRF_DEF(A16F, _DMA, _SEC_OP,     _IMMD_DATA_METHOD)  |        \
235         DRF_NUM(A16F, _DMA, _IMMD_DATA,             _data)   |        \
236         DRF_NUM(A16F, _DMA, _METHOD_SUBCHANNEL,    _subch)   |        \
237         DRF_NUM(A16F, _DMA, _METHOD_ADDRESS,    (_offset) >> 2));     \
238     (_push_buffer)->_segment.freeDwords--;                            \
239 }
240 
241 #define nvPushImmedVal(_push_buffer, _subch, _offset, _data) \
242     nvPushImmedValSegment(_push_buffer, main, _subch, _offset, _data)
243 
244 #define nvPushImmed(_push_buffer, _subch, _offset, _val) \
245     nvPushImmedVal(_push_buffer, _subch, _offset, _offset##_V_##_val)
246 
247 // Method headers.
248 #define nvPushMethod(_push_buffer, _subch, _offset, _count) \
249     __nvPushStart(_push_buffer, main, _subch, _offset, _count, _INC_METHOD)
250 #define nvPushMethodNoIncr(_push_buffer, _subch, _offset, _count) \
251     __nvPushStart(_push_buffer, main, _subch, _offset, _count, _NON_INC_METHOD)
252 #define nvPushMethodOneIncr(_push_buffer, _subch, _offset, _count) \
253     __nvPushStart(_push_buffer, main, _subch, _offset, _count, _ONE_INC)
254 
255 #ifdef __cplusplus
256 };
257 #endif
258 
259 #endif /* __NVIDIA_PUSH_METHODS_H__ */
260