1 /*
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
23  */
24 
25 /**
26  * This file contains helpers for writing commands to commands streams.
27  */
28 
29 #ifndef SI_BUILD_PM4_H
30 #define SI_BUILD_PM4_H
31 
32 #include "si_pipe.h"
33 #include "sid.h"
34 
35 #if 0
36 #include "ac_shadowed_regs.h"
37 #define SI_CHECK_SHADOWED_REGS(reg_offset, count) ac_check_shadowed_regs(GFX10, CHIP_NAVI14, reg_offset, count)
38 #else
39 #define SI_CHECK_SHADOWED_REGS(reg_offset, count)
40 #endif
41 
42 #define radeon_begin(cs) struct radeon_cmdbuf *__cs = (cs); \
43                          unsigned __cs_num = __cs->current.cdw; \
44                          UNUSED unsigned __cs_num_initial = __cs_num; \
45                          uint32_t *__cs_buf = __cs->current.buf
46 
47 #define radeon_begin_again(cs) do { \
48    assert(__cs == NULL); \
49    __cs = (cs); \
50    __cs_num = __cs->current.cdw; \
51    __cs_num_initial = __cs_num; \
52    __cs_buf = __cs->current.buf; \
53 } while (0)
54 
55 #define radeon_end() do { \
56    __cs->current.cdw = __cs_num; \
57    assert(__cs->current.cdw <= __cs->current.max_dw); \
58    __cs = NULL; \
59 } while (0)
60 
61 #define radeon_emit(value)  __cs_buf[__cs_num++] = (value)
62 #define radeon_packets_added()  (__cs_num != __cs_num_initial)
63 
64 #define radeon_end_update_context_roll(sctx) do { \
65    radeon_end(); \
66    if (radeon_packets_added()) \
67       (sctx)->context_roll = true; \
68 } while (0)
69 
70 #define radeon_emit_array(values, num) do { \
71    unsigned __n = (num); \
72    memcpy(__cs_buf + __cs_num, (values), __n * 4); \
73    __cs_num += __n; \
74 } while (0)
75 
76 #define radeon_set_config_reg_seq(reg, num) do { \
77    SI_CHECK_SHADOWED_REGS(reg, num); \
78    assert((reg) < SI_CONTEXT_REG_OFFSET); \
79    radeon_emit(PKT3(PKT3_SET_CONFIG_REG, num, 0)); \
80    radeon_emit(((reg) - SI_CONFIG_REG_OFFSET) >> 2); \
81 } while (0)
82 
83 #define radeon_set_config_reg(reg, value) do { \
84    radeon_set_config_reg_seq(reg, 1); \
85    radeon_emit(value); \
86 } while (0)
87 
88 #define radeon_set_context_reg_seq(reg, num) do { \
89    SI_CHECK_SHADOWED_REGS(reg, num); \
90    assert((reg) >= SI_CONTEXT_REG_OFFSET); \
91    radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, num, 0)); \
92    radeon_emit(((reg) - SI_CONTEXT_REG_OFFSET) >> 2); \
93 } while (0)
94 
95 #define radeon_set_context_reg(reg, value) do { \
96    radeon_set_context_reg_seq(reg, 1); \
97    radeon_emit(value); \
98 } while (0)
99 
100 #define radeon_set_context_reg_seq_array(reg, num, values) do { \
101    radeon_set_context_reg_seq(reg, num); \
102    radeon_emit_array(values, num); \
103 } while (0)
104 
105 #define radeon_set_context_reg_idx(reg, idx, value) do { \
106    SI_CHECK_SHADOWED_REGS(reg, 1); \
107    assert((reg) >= SI_CONTEXT_REG_OFFSET); \
108    radeon_emit(PKT3(PKT3_SET_CONTEXT_REG, 1, 0)); \
109    radeon_emit(((reg) - SI_CONTEXT_REG_OFFSET) >> 2 | ((idx) << 28)); \
110    radeon_emit(value); \
111 } while (0)
112 
113 #define radeon_set_sh_reg_seq(reg, num) do { \
114    SI_CHECK_SHADOWED_REGS(reg, num); \
115    assert((reg) >= SI_SH_REG_OFFSET && (reg) < SI_SH_REG_END); \
116    radeon_emit(PKT3(PKT3_SET_SH_REG, num, 0)); \
117    radeon_emit(((reg) - SI_SH_REG_OFFSET) >> 2); \
118 } while (0)
119 
120 #define radeon_set_sh_reg(reg, value) do { \
121    radeon_set_sh_reg_seq(reg, 1); \
122    radeon_emit(value); \
123 } while (0)
124 
125 #define radeon_set_uconfig_reg_seq(reg, num, perfctr) do { \
126    SI_CHECK_SHADOWED_REGS(reg, num); \
127    assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \
128    radeon_emit(PKT3(PKT3_SET_UCONFIG_REG, num, perfctr)); \
129    radeon_emit(((reg) - CIK_UCONFIG_REG_OFFSET) >> 2); \
130 } while (0)
131 
132 #define radeon_set_uconfig_reg(reg, value) do { \
133    radeon_set_uconfig_reg_seq(reg, 1, false); \
134    radeon_emit(value); \
135 } while (0)
136 
137 #define radeon_set_uconfig_reg_perfctr(reg, value) do { \
138    radeon_set_uconfig_reg_seq(reg, 1, true); \
139    radeon_emit(value); \
140 } while (0)
141 
142 #define radeon_set_uconfig_reg_idx(screen, chip_class, reg, idx, value) do { \
143    SI_CHECK_SHADOWED_REGS(reg, 1); \
144    assert((reg) >= CIK_UCONFIG_REG_OFFSET && (reg) < CIK_UCONFIG_REG_END); \
145    assert((idx) != 0); \
146    unsigned __opcode = PKT3_SET_UCONFIG_REG_INDEX; \
147    if ((chip_class) < GFX9 || \
148        ((chip_class) == GFX9 && (screen)->info.me_fw_version < 26)) \
149       __opcode = PKT3_SET_UCONFIG_REG; \
150    radeon_emit(PKT3(__opcode, 1, 0)); \
151    radeon_emit(((reg) - CIK_UCONFIG_REG_OFFSET) >> 2 | ((idx) << 28)); \
152    radeon_emit(value); \
153 } while (0)
154 
155 /* Emit PKT3_SET_CONTEXT_REG if the register value is different. */
156 #define radeon_opt_set_context_reg(sctx, offset, reg, val) do { \
157    unsigned __value = val; \
158    if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
159        sctx->tracked_regs.reg_value[reg] != __value) { \
160       radeon_set_context_reg(offset, __value); \
161       sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \
162       sctx->tracked_regs.reg_value[reg] = __value; \
163    } \
164 } while (0)
165 
166 /**
167  * Set 2 consecutive registers if any registers value is different.
168  * @param offset        starting register offset
169  * @param val1          is written to first register
170  * @param val2          is written to second register
171  */
172 #define radeon_opt_set_context_reg2(sctx, offset, reg, val1, val2) do { \
173    unsigned __value1 = (val1), __value2 = (val2); \
174    if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x3) != 0x3 || \
175        sctx->tracked_regs.reg_value[reg] != __value1 || \
176        sctx->tracked_regs.reg_value[(reg) + 1] != __value2) { \
177       radeon_set_context_reg_seq(offset, 2); \
178       radeon_emit(__value1); \
179       radeon_emit(__value2); \
180       sctx->tracked_regs.reg_value[reg] = __value1; \
181       sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
182       sctx->tracked_regs.reg_saved |= 0x3ull << (reg); \
183    } \
184 } while (0)
185 
186 /**
187  * Set 3 consecutive registers if any registers value is different.
188  */
189 #define radeon_opt_set_context_reg3(sctx, offset, reg, val1, val2, val3) do { \
190    unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3); \
191    if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x7) != 0x7 || \
192        sctx->tracked_regs.reg_value[reg] != __value1 || \
193        sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \
194        sctx->tracked_regs.reg_value[(reg) + 2] != __value3) { \
195       radeon_set_context_reg_seq(offset, 3); \
196       radeon_emit(__value1); \
197       radeon_emit(__value2); \
198       radeon_emit(__value3); \
199       sctx->tracked_regs.reg_value[reg] = __value1; \
200       sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
201       sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \
202       sctx->tracked_regs.reg_saved |= 0x7ull << (reg); \
203    } \
204 } while (0)
205 
206 /**
207  * Set 4 consecutive registers if any registers value is different.
208  */
209 #define radeon_opt_set_context_reg4(sctx, offset, reg, val1, val2, val3, val4) do { \
210    unsigned __value1 = (val1), __value2 = (val2), __value3 = (val3), __value4 = (val4); \
211    if (((sctx->tracked_regs.reg_saved >> (reg)) & 0xf) != 0xf || \
212        sctx->tracked_regs.reg_value[reg] != __value1 || \
213        sctx->tracked_regs.reg_value[(reg) + 1] != __value2 || \
214        sctx->tracked_regs.reg_value[(reg) + 2] != __value3 || \
215        sctx->tracked_regs.reg_value[(reg) + 3] != __value4) { \
216       radeon_set_context_reg_seq(offset, 4); \
217       radeon_emit(__value1); \
218       radeon_emit(__value2); \
219       radeon_emit(__value3); \
220       radeon_emit(__value4); \
221       sctx->tracked_regs.reg_value[reg] = __value1; \
222       sctx->tracked_regs.reg_value[(reg) + 1] = __value2; \
223       sctx->tracked_regs.reg_value[(reg) + 2] = __value3; \
224       sctx->tracked_regs.reg_value[(reg) + 3] = __value4; \
225       sctx->tracked_regs.reg_saved |= 0xfull << (reg); \
226    } \
227 } while (0)
228 
229 /**
230  * Set consecutive registers if any registers value is different.
231  */
232 #define radeon_opt_set_context_regn(sctx, offset, value, saved_val, num) do { \
233    if (memcmp(value, saved_val, sizeof(uint32_t) * (num))) { \
234       radeon_set_context_reg_seq(offset, num); \
235       radeon_emit_array(value, num); \
236       memcpy(saved_val, value, sizeof(uint32_t) * (num)); \
237    } \
238 } while (0)
239 
240 #define radeon_opt_set_sh_reg(sctx, offset, reg, val) do { \
241    unsigned __value = val; \
242    if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
243        sctx->tracked_regs.reg_value[reg] != __value) { \
244       radeon_set_sh_reg(offset, __value); \
245       sctx->tracked_regs.reg_saved |= BITFIELD64_BIT(reg); \
246       sctx->tracked_regs.reg_value[reg] = __value; \
247    } \
248 } while (0)
249 
250 #define radeon_opt_set_uconfig_reg(sctx, offset, reg, val) do { \
251    unsigned __value = val; \
252    if (((sctx->tracked_regs.reg_saved >> (reg)) & 0x1) != 0x1 || \
253        sctx->tracked_regs.reg_value[reg] != __value) { \
254       radeon_set_uconfig_reg(offset, __value); \
255       sctx->tracked_regs.reg_saved |= 0x1ull << (reg); \
256       sctx->tracked_regs.reg_value[reg] = __value; \
257    } \
258 } while (0)
259 
260 #define radeon_set_privileged_config_reg(reg, value) do { \
261    assert((reg) < CIK_UCONFIG_REG_OFFSET); \
262    radeon_emit(PKT3(PKT3_COPY_DATA, 4, 0)); \
263    radeon_emit(COPY_DATA_SRC_SEL(COPY_DATA_IMM) | \
264                COPY_DATA_DST_SEL(COPY_DATA_PERF)); \
265    radeon_emit(value); \
266    radeon_emit(0); /* unused */ \
267    radeon_emit((reg) >> 2); \
268    radeon_emit(0); /* unused */ \
269 } while (0)
270 
271 #define radeon_emit_32bit_pointer(sscreen, va) do { \
272    radeon_emit(va); \
273    assert((va) == 0 || ((va) >> 32) == sscreen->info.address32_hi); \
274 } while (0)
275 
276 #define radeon_emit_one_32bit_pointer(sctx, desc, sh_base) do { \
277    unsigned sh_offset = (sh_base) + (desc)->shader_userdata_offset; \
278    radeon_set_sh_reg_seq(sh_offset, 1); \
279    radeon_emit_32bit_pointer(sctx->screen, (desc)->gpu_address); \
280 } while (0)
281 
282 /* This should be evaluated at compile time if all parameters are constants. */
283 static ALWAYS_INLINE unsigned
si_get_user_data_base(enum chip_class chip_class,enum si_has_tess has_tess,enum si_has_gs has_gs,enum si_has_ngg ngg,enum pipe_shader_type shader)284 si_get_user_data_base(enum chip_class chip_class, enum si_has_tess has_tess,
285                       enum si_has_gs has_gs, enum si_has_ngg ngg,
286                       enum pipe_shader_type shader)
287 {
288    switch (shader) {
289    case PIPE_SHADER_VERTEX:
290       /* VS can be bound as VS, ES, or LS. */
291       if (has_tess) {
292          if (chip_class >= GFX10) {
293             return R_00B430_SPI_SHADER_USER_DATA_HS_0;
294          } else if (chip_class == GFX9) {
295             return R_00B430_SPI_SHADER_USER_DATA_LS_0;
296          } else {
297             return R_00B530_SPI_SHADER_USER_DATA_LS_0;
298          }
299       } else if (chip_class >= GFX10) {
300          if (ngg || has_gs) {
301             return R_00B230_SPI_SHADER_USER_DATA_GS_0;
302          } else {
303             return R_00B130_SPI_SHADER_USER_DATA_VS_0;
304          }
305       } else if (has_gs) {
306          return R_00B330_SPI_SHADER_USER_DATA_ES_0;
307       } else {
308          return R_00B130_SPI_SHADER_USER_DATA_VS_0;
309       }
310 
311    case PIPE_SHADER_TESS_CTRL:
312       if (chip_class == GFX9) {
313          return R_00B430_SPI_SHADER_USER_DATA_LS_0;
314       } else {
315          return R_00B430_SPI_SHADER_USER_DATA_HS_0;
316       }
317 
318    case PIPE_SHADER_TESS_EVAL:
319       /* TES can be bound as ES, VS, or not bound. */
320       if (has_tess) {
321          if (chip_class >= GFX10) {
322             if (ngg || has_gs) {
323                return R_00B230_SPI_SHADER_USER_DATA_GS_0;
324             } else {
325                return R_00B130_SPI_SHADER_USER_DATA_VS_0;
326             }
327          } else if (has_gs) {
328             return R_00B330_SPI_SHADER_USER_DATA_ES_0;
329          } else {
330             return R_00B130_SPI_SHADER_USER_DATA_VS_0;
331          }
332       } else {
333          return 0;
334       }
335 
336    case PIPE_SHADER_GEOMETRY:
337       if (chip_class == GFX9) {
338          return R_00B330_SPI_SHADER_USER_DATA_ES_0;
339       } else {
340          return R_00B230_SPI_SHADER_USER_DATA_GS_0;
341       }
342 
343    default:
344       assert(0);
345       return 0;
346    }
347 }
348 
349 #endif
350