1 /*
2  * Southern Islands Register documentation
3  *
4  * Copyright (C) 2011  Advanced Micro Devices, Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
20  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #ifndef SID_H
25 #define SID_H
26 
27 #include "amdgfxregs.h"
28 
29 /* si values */
30 #define SI_CONFIG_REG_OFFSET       0x00008000
31 #define SI_CONFIG_REG_END          0x0000B000
32 #define SI_SH_REG_OFFSET           0x0000B000
33 #define SI_SH_REG_END              0x0000C000
34 #define SI_CONTEXT_REG_OFFSET      0x00028000
35 #define SI_CONTEXT_REG_END         0x00030000
36 #define CIK_UCONFIG_REG_OFFSET     0x00030000
37 #define CIK_UCONFIG_REG_END        0x00040000
38 #define SI_UCONFIG_PERF_REG_OFFSET 0x00034000
39 #define SI_UCONFIG_PERF_REG_END    0x00038000
40 
41 /* For register shadowing: */
42 #define SI_SH_REG_SPACE_SIZE           (SI_SH_REG_END - SI_SH_REG_OFFSET)
43 #define SI_CONTEXT_REG_SPACE_SIZE      (SI_CONTEXT_REG_END - SI_CONTEXT_REG_OFFSET)
44 #define SI_UCONFIG_REG_SPACE_SIZE      (CIK_UCONFIG_REG_END - CIK_UCONFIG_REG_OFFSET)
45 #define SI_UCONFIG_PERF_REG_SPACE_SIZE (SI_UCONFIG_PERF_REG_END - SI_UCONFIG_PERF_REG_OFFSET)
46 
47 #define SI_SHADOWED_SH_REG_OFFSET      0
48 #define SI_SHADOWED_CONTEXT_REG_OFFSET SI_SH_REG_SPACE_SIZE
49 #define SI_SHADOWED_UCONFIG_REG_OFFSET (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE)
50 #define SI_SHADOWED_REG_BUFFER_SIZE                                                                \
51    (SI_SH_REG_SPACE_SIZE + SI_CONTEXT_REG_SPACE_SIZE + SI_UCONFIG_REG_SPACE_SIZE)
52 
53 #define EVENT_TYPE_CACHE_FLUSH                  0x6
54 #define EVENT_TYPE_PS_PARTIAL_FLUSH             0x10
55 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT 0x14
56 #define EVENT_TYPE_ZPASS_DONE                   0x15
57 #define EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT    0x16
58 #define EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH        0x1f
59 #define EVENT_TYPE_SAMPLE_STREAMOUTSTATS        0x20
60 #define EVENT_TYPE(x)                           ((x) << 0)
61 #define EVENT_INDEX(x)                          ((x) << 8)
62 /* 0 - any non-TS event
63  * 1 - ZPASS_DONE
64  * 2 - SAMPLE_PIPELINESTAT
65  * 3 - SAMPLE_STREAMOUTSTAT*
66  * 4 - *S_PARTIAL_FLUSH
67  * 5 - TS events
68  */
69 
70 /* EVENT_WRITE_EOP (SI-VI) & RELEASE_MEM (GFX9) */
71 #define EVENT_TCL1_VOL_ACTION_ENA (1 << 12)
72 #define EVENT_TC_VOL_ACTION_ENA   (1 << 13)
73 #define EVENT_TC_WB_ACTION_ENA    (1 << 15)
74 #define EVENT_TCL1_ACTION_ENA     (1 << 16)
75 #define EVENT_TC_ACTION_ENA       (1 << 17)
76 #define EVENT_TC_NC_ACTION_ENA    (1 << 19) /* GFX9+ */
77 #define EVENT_TC_WC_ACTION_ENA    (1 << 20) /* GFX9+ */
78 #define EVENT_TC_MD_ACTION_ENA    (1 << 21) /* GFX9+ */
79 
80 #define PREDICATION_OP_CLEAR     0x0
81 #define PREDICATION_OP_ZPASS     0x1
82 #define PREDICATION_OP_PRIMCOUNT 0x2
83 #define PREDICATION_OP_BOOL64    0x3
84 #define PREDICATION_OP_BOOL32    0x4
85 
86 #define PRED_OP(x) ((x) << 16)
87 
88 #define PREDICATION_CONTINUE (1 << 31)
89 
90 #define PREDICATION_HINT_WAIT        (0 << 12)
91 #define PREDICATION_HINT_NOWAIT_DRAW (1 << 12)
92 
93 #define PREDICATION_DRAW_NOT_VISIBLE (0 << 8)
94 #define PREDICATION_DRAW_VISIBLE     (1 << 8)
95 
96 #define R600_TEXEL_PITCH_ALIGNMENT_MASK 0x7
97 
98 /* All registers defined in this packet section don't exist and the only
99  * purpose of these definitions is to define packet encoding that
100  * the IB parser understands, and also to have an accurate documentation.
101  */
102 #define PKT3_NOP                            0x10
103 #define PKT3_SET_BASE                       0x11
104 #define PKT3_CLEAR_STATE                    0x12
105 #define PKT3_INDEX_BUFFER_SIZE              0x13
106 #define PKT3_DISPATCH_DIRECT                0x15
107 #define PKT3_DISPATCH_INDIRECT              0x16
108 #define PKT3_OCCLUSION_QUERY                0x1F /* new for CIK */
109 #define PKT3_SET_PREDICATION                0x20
110 #define PKT3_COND_EXEC                      0x22
111 #define PKT3_PRED_EXEC                      0x23
112 #define PKT3_DRAW_INDIRECT                  0x24
113 #define PKT3_DRAW_INDEX_INDIRECT            0x25
114 #define PKT3_INDEX_BASE                     0x26
115 #define PKT3_DRAW_INDEX_2                   0x27
116 #define PKT3_CONTEXT_CONTROL                0x28
117 #define CC0_LOAD_GLOBAL_CONFIG(x)           (((unsigned)(x)&0x1) << 0)
118 #define CC0_LOAD_PER_CONTEXT_STATE(x)       (((unsigned)(x)&0x1) << 1)
119 #define CC0_LOAD_GLOBAL_UCONFIG(x)          (((unsigned)(x)&0x1) << 15)
120 #define CC0_LOAD_GFX_SH_REGS(x)             (((unsigned)(x)&0x1) << 16)
121 #define CC0_LOAD_CS_SH_REGS(x)              (((unsigned)(x)&0x1) << 24)
122 #define CC0_LOAD_CE_RAM(x)                  (((unsigned)(x)&0x1) << 28)
123 #define CC0_UPDATE_LOAD_ENABLES(x)          (((unsigned)(x)&0x1) << 31)
124 #define CC1_SHADOW_GLOBAL_CONFIG(x)         (((unsigned)(x)&0x1) << 0)
125 #define CC1_SHADOW_PER_CONTEXT_STATE(x)     (((unsigned)(x)&0x1) << 1)
126 #define CC1_SHADOW_GLOBAL_UCONFIG(x)        (((unsigned)(x)&0x1) << 15)
127 #define CC1_SHADOW_GFX_SH_REGS(x)           (((unsigned)(x)&0x1) << 16)
128 #define CC1_SHADOW_CS_SH_REGS(x)            (((unsigned)(x)&0x1) << 24)
129 #define CC1_UPDATE_SHADOW_ENABLES(x)        (((unsigned)(x)&0x1) << 31)
130 #define PKT3_INDEX_TYPE                     0x2A /* not on GFX9 */
131 #define PKT3_DRAW_INDIRECT_MULTI            0x2C
132 #define R_2C3_DRAW_INDEX_LOC                0x2C3
133 #define S_2C3_COUNT_INDIRECT_ENABLE(x)      (((unsigned)(x)&0x1) << 30)
134 #define S_2C3_DRAW_INDEX_ENABLE(x)          (((unsigned)(x)&0x1) << 31)
135 #define PKT3_DRAW_INDEX_AUTO                0x2D
136 #define PKT3_DRAW_INDEX_IMMD                0x2E /* not on CIK */
137 #define PKT3_NUM_INSTANCES                  0x2F
138 #define PKT3_DRAW_INDEX_MULTI_AUTO          0x30
139 #define PKT3_INDIRECT_BUFFER_SI             0x32 /* not on CIK */
140 #define PKT3_INDIRECT_BUFFER_CONST          0x33
141 #define PKT3_STRMOUT_BUFFER_UPDATE          0x34
142 #define STRMOUT_STORE_BUFFER_FILLED_SIZE    1
143 #define STRMOUT_OFFSET_SOURCE(x)            (((unsigned)(x)&0x3) << 1)
144 #define STRMOUT_OFFSET_FROM_PACKET          0
145 #define STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE 1
146 #define STRMOUT_OFFSET_FROM_MEM             2
147 #define STRMOUT_OFFSET_NONE                 3
148 #define STRMOUT_DATA_TYPE(x)                (((unsigned)(x)&0x1) << 7)
149 #define STRMOUT_SELECT_BUFFER(x)            (((unsigned)(x)&0x3) << 8)
150 #define PKT3_DRAW_INDEX_OFFSET_2            0x35
151 #define PKT3_WRITE_DATA                     0x37
152 #define PKT3_DRAW_INDEX_INDIRECT_MULTI      0x38
153 #define PKT3_MEM_SEMAPHORE                  0x39
154 #define PKT3_MPEG_INDEX                     0x3A /* not on CIK */
155 #define PKT3_WAIT_REG_MEM                   0x3C
156 #define WAIT_REG_MEM_EQUAL                  3
157 #define WAIT_REG_MEM_NOT_EQUAL              4
158 #define WAIT_REG_MEM_GREATER_OR_EQUAL       5
159 #define WAIT_REG_MEM_MEM_SPACE(x)           (((unsigned)(x)&0x3) << 4)
160 #define WAIT_REG_MEM_PFP                    (1 << 8)
161 #define PKT3_MEM_WRITE                      0x3D /* not on CIK */
162 #define PKT3_INDIRECT_BUFFER_CIK            0x3F /* new on CIK */
163 
164 #define PKT3_COPY_DATA                         0x40
165 #define COPY_DATA_SRC_SEL(x)                   ((x)&0xf)
166 #define COPY_DATA_REG                          0
167 #define COPY_DATA_SRC_MEM                      1 /* only valid as source */
168 #define COPY_DATA_TC_L2                        2
169 #define COPY_DATA_GDS                          3
170 #define COPY_DATA_PERF                         4
171 #define COPY_DATA_IMM                          5
172 #define COPY_DATA_TIMESTAMP                    9
173 #define COPY_DATA_DST_SEL(x)                   (((unsigned)(x)&0xf) << 8)
174 #define COPY_DATA_DST_MEM_GRBM                 1 /* sync across GRBM, deprecated */
175 #define COPY_DATA_TC_L2                        2
176 #define COPY_DATA_GDS                          3
177 #define COPY_DATA_PERF                         4
178 #define COPY_DATA_DST_MEM                      5
179 #define COPY_DATA_COUNT_SEL                    (1 << 16)
180 #define COPY_DATA_WR_CONFIRM                   (1 << 20)
181 #define COPY_DATA_ENGINE_PFP                   (1 << 30)
182 #define PKT3_PFP_SYNC_ME                       0x42
183 #define PKT3_SURFACE_SYNC                      0x43 /* deprecated on CIK, use ACQUIRE_MEM */
184 #define PKT3_ME_INITIALIZE                     0x44 /* not on CIK */
185 #define PKT3_COND_WRITE                        0x45
186 #define PKT3_EVENT_WRITE                       0x46
187 #define PKT3_EVENT_WRITE_EOP                   0x47 /* not on GFX9 */
188 #define PKT3_EVENT_WRITE_EOS                   0x48 /* not on GFX9 */
189 #define EOP_DST_SEL(x)                         ((x) << 16)
190 #define EOP_DST_SEL_MEM                        0
191 #define EOP_DST_SEL_TC_L2                      1
192 #define EOP_INT_SEL(x)                         ((x) << 24)
193 #define EOP_INT_SEL_NONE                       0
194 #define EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM 3
195 #define EOP_DATA_SEL(x)                        ((x) << 29)
196 #define EOP_DATA_SEL_DISCARD                   0
197 #define EOP_DATA_SEL_VALUE_32BIT               1
198 #define EOP_DATA_SEL_VALUE_64BIT               2
199 #define EOP_DATA_SEL_TIMESTAMP                 3
200 #define EOP_DATA_SEL_GDS                       5
201 #define EOP_DATA_GDS(dw_offset, num_dwords)    ((dw_offset) | ((unsigned)(num_dwords) << 16))
202 
203 #define EOS_DATA_SEL(x)                        ((x) << 29)
204 #define EOS_DATA_SEL_APPEND_COUNT              0
205 #define EOS_DATA_SEL_GDS                       1
206 #define EOS_DATA_SEL_VALUE_32BIT               2
207 
208 /* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets
209  * are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and
210  * DST_SEL=MC. Only CIK chips are affected.
211  */
212 /* fix CP DMA before uncommenting: */
213 /*#define PKT3_EVENT_WRITE_EOS                   0x48*/ /* not on GFX9 */
214 #define PKT3_RELEASE_MEM            0x49 /* GFX9+ [any ring] or GFX8 [compute ring only] */
215 #define PKT3_CONTEXT_REG_RMW        0x51 /* older firmware versions on older chips don't have this */
216 #define PKT3_ONE_REG_WRITE          0x57 /* not on CIK */
217 #define PKT3_ACQUIRE_MEM            0x58 /* new for CIK */
218 #define PKT3_REWIND                 0x59 /* VI+ [any ring] or CIK [compute ring only] */
219 #define PKT3_LOAD_UCONFIG_REG       0x5E /* GFX7+ */
220 #define PKT3_LOAD_SH_REG            0x5F
221 #define PKT3_LOAD_CONTEXT_REG       0x61
222 #define PKT3_SET_CONFIG_REG         0x68
223 #define PKT3_SET_CONTEXT_REG        0x69
224 #define PKT3_SET_SH_REG             0x76
225 #define PKT3_SET_SH_REG_OFFSET      0x77
226 #define PKT3_SET_UCONFIG_REG        0x79 /* new for CIK */
227 #define PKT3_SET_UCONFIG_REG_INDEX  0x7A /* new for GFX9, CP ucode version >= 26 */
228 #define PKT3_LOAD_CONST_RAM         0x80
229 #define PKT3_WRITE_CONST_RAM        0x81
230 #define PKT3_DUMP_CONST_RAM         0x83
231 #define PKT3_INCREMENT_CE_COUNTER   0x84
232 #define PKT3_INCREMENT_DE_COUNTER   0x85
233 #define PKT3_WAIT_ON_CE_COUNTER     0x86
234 #define PKT3_SET_SH_REG_INDEX       0x9B
235 #define PKT3_LOAD_CONTEXT_REG_INDEX 0x9F /* new for VI */
236 
237 #define PKT_TYPE_S(x)         (((unsigned)(x)&0x3) << 30)
238 #define PKT_TYPE_G(x)         (((x) >> 30) & 0x3)
239 #define PKT_TYPE_C            0x3FFFFFFF
240 #define PKT_COUNT_S(x)        (((unsigned)(x)&0x3FFF) << 16)
241 #define PKT_COUNT_G(x)        (((x) >> 16) & 0x3FFF)
242 #define PKT_COUNT_C           0xC000FFFF
243 #define PKT0_BASE_INDEX_S(x)  (((unsigned)(x)&0xFFFF) << 0)
244 #define PKT0_BASE_INDEX_G(x)  (((x) >> 0) & 0xFFFF)
245 #define PKT0_BASE_INDEX_C     0xFFFF0000
246 #define PKT3_IT_OPCODE_S(x)   (((unsigned)(x)&0xFF) << 8)
247 #define PKT3_IT_OPCODE_G(x)   (((x) >> 8) & 0xFF)
248 #define PKT3_IT_OPCODE_C      0xFFFF00FF
249 #define PKT3_PREDICATE(x)     (((x) >> 0) & 0x1)
250 #define PKT3_SHADER_TYPE_S(x) (((unsigned)(x)&0x1) << 1)
251 #define PKT0(index, count)    (PKT_TYPE_S(0) | PKT0_BASE_INDEX_S(index) | PKT_COUNT_S(count))
252 #define PKT3(op, count, predicate)                                                                 \
253    (PKT_TYPE_S(3) | PKT_COUNT_S(count) | PKT3_IT_OPCODE_S(op) | PKT3_PREDICATE(predicate))
254 
255 #define PKT2_NOP_PAD PKT_TYPE_S(2)
256 #define PKT3_NOP_PAD PKT3(PKT3_NOP, 0x3fff, 0) /* header-only version */
257 
258 #define PKT3_CP_DMA 0x41
259 /* 1. header
260  * 2. SRC_ADDR_LO [31:0] or DATA [31:0]
261  * 3. CP_SYNC [31] | SRC_SEL [30:29] | ENGINE [27] | DST_SEL [21:20] | SRC_ADDR_HI [15:0]
262  * 4. DST_ADDR_LO [31:0]
263  * 5. DST_ADDR_HI [15:0]
264  * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
265  */
266 
267 #define PKT3_DMA_DATA 0x50 /* new for CIK */
268 /* 1. header
269  * 2. CP_SYNC [31] | SRC_SEL [30:29] | DST_SEL [21:20] | ENGINE [0]
270  * 2. SRC_ADDR_LO [31:0] or DATA [31:0]
271  * 3. SRC_ADDR_HI [31:0]
272  * 4. DST_ADDR_LO [31:0]
273  * 5. DST_ADDR_HI [31:0]
274  * 6. COMMAND [29:22] | BYTE_COUNT [20:0]
275  */
276 
277 /* SI async DMA packets */
278 #define SI_DMA_PACKET(cmd, sub_cmd, n)                                                             \
279    ((((unsigned)(cmd)&0xF) << 28) | (((unsigned)(sub_cmd)&0xFF) << 20) |                           \
280     (((unsigned)(n)&0xFFFFF) << 0))
281 /* SI async DMA Packet types */
282 #define SI_DMA_PACKET_WRITE               0x2
283 #define SI_DMA_PACKET_COPY                0x3
284 #define SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE 0xfffe0
285 /* The documentation says 0xffff8 is the maximum size in dwords, which is
286  * 0x3fffe0 in bytes. */
287 #define SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE 0x3fffe0
288 #define SI_DMA_COPY_DWORD_ALIGNED          0x00
289 #define SI_DMA_COPY_BYTE_ALIGNED           0x40
290 #define SI_DMA_COPY_TILED                  0x8
291 #define SI_DMA_PACKET_INDIRECT_BUFFER      0x4
292 #define SI_DMA_PACKET_SEMAPHORE            0x5
293 #define SI_DMA_PACKET_FENCE                0x6
294 #define SI_DMA_PACKET_TRAP                 0x7
295 #define SI_DMA_PACKET_SRBM_WRITE           0x9
296 #define SI_DMA_PACKET_CONSTANT_FILL        0xd
297 #define SI_DMA_PACKET_NOP                  0xf
298 
299 /* CIK async DMA packets */
300 #define CIK_SDMA_PACKET(op, sub_op, n)                                                             \
301    ((((unsigned)(n)&0xFFFF) << 16) | (((unsigned)(sub_op)&0xFF) << 8) |                            \
302     (((unsigned)(op)&0xFF) << 0))
303 /* CIK async DMA packet types */
304 #define CIK_SDMA_OPCODE_NOP                        0x0
305 #define CIK_SDMA_OPCODE_COPY                       0x1
306 #define CIK_SDMA_COPY_SUB_OPCODE_LINEAR            0x0
307 #define CIK_SDMA_COPY_SUB_OPCODE_TILED             0x1
308 #define CIK_SDMA_COPY_SUB_OPCODE_SOA               0x3
309 #define CIK_SDMA_COPY_SUB_OPCODE_LINEAR_SUB_WINDOW 0x4
310 #define CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW  0x5
311 #define CIK_SDMA_COPY_SUB_OPCODE_T2T_SUB_WINDOW    0x6
312 #define CIK_SDMA_OPCODE_WRITE                      0x2
313 #define SDMA_WRITE_SUB_OPCODE_LINEAR               0x0
314 #define SDMA_WRTIE_SUB_OPCODE_TILED                0x1
315 #define CIK_SDMA_OPCODE_INDIRECT_BUFFER            0x4
316 #define CIK_SDMA_PACKET_FENCE                      0x5
317 #define CIK_SDMA_PACKET_TRAP                       0x6
318 #define CIK_SDMA_PACKET_SEMAPHORE                  0x7
319 #define CIK_SDMA_PACKET_CONSTANT_FILL              0xb
320 #define CIK_SDMA_OPCODE_TIMESTAMP                  0xd
321 #define SDMA_TS_SUB_OPCODE_SET_LOCAL_TIMESTAMP     0x0
322 #define SDMA_TS_SUB_OPCODE_GET_LOCAL_TIMESTAMP     0x1
323 #define SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP    0x2
324 #define CIK_SDMA_PACKET_SRBM_WRITE                 0xe
325 /* There is apparently an undocumented HW limitation that
326    prevents the HW from copying the last 255 bytes of (1 << 22) - 1 */
327 #define CIK_SDMA_COPY_MAX_SIZE    0x3fff00   /* almost 4 MB*/
328 #define GFX103_SDMA_COPY_MAX_SIZE 0x3fffff00 /* almost 1 GB */
329 
330 enum amd_cmp_class_flags
331 {
332    S_NAN = 1 << 0,       // Signaling NaN
333    Q_NAN = 1 << 1,       // Quiet NaN
334    N_INFINITY = 1 << 2,  // Negative infinity
335    N_NORMAL = 1 << 3,    // Negative normal
336    N_SUBNORMAL = 1 << 4, // Negative subnormal
337    N_ZERO = 1 << 5,      // Negative zero
338    P_ZERO = 1 << 6,      // Positive zero
339    P_SUBNORMAL = 1 << 7, // Positive subnormal
340    P_NORMAL = 1 << 8,    // Positive normal
341    P_INFINITY = 1 << 9   // Positive infinity
342 };
343 
344 #endif /* _SID_H */
345