1 /*
2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #ifndef FD5_EMIT_H
28 #define FD5_EMIT_H
29 
30 #include "pipe/p_context.h"
31 
32 #include "fd5_context.h"
33 #include "fd5_format.h"
34 #include "fd5_program.h"
35 #include "fd5_screen.h"
36 #include "freedreno_batch.h"
37 #include "freedreno_context.h"
38 #include "ir3_gallium.h"
39 
40 struct fd_ringbuffer;
41 
42 /* grouped together emit-state for prog/vertex/state emit: */
43 struct fd5_emit {
44    struct pipe_debug_callback *debug;
45    const struct fd_vertex_state *vtx;
46    const struct fd5_program_state *prog;
47    const struct pipe_draw_info *info;
48         unsigned drawid_offset;
49    const struct pipe_draw_indirect_info *indirect;
50 	const struct pipe_draw_start_count_bias *draw;
51    bool binning_pass;
52    struct ir3_cache_key key;
53    enum fd_dirty_3d_state dirty;
54 
55    uint32_t sprite_coord_enable; /* bitmask */
56    bool sprite_coord_mode;
57    bool rasterflat;
58 
59    /* in binning pass, we don't have real frag shader, so we
60     * don't know if real draw disqualifies lrz write.  So just
61     * figure that out up-front and stash it in the emit.
62     */
63    bool no_lrz_write;
64 
65    /* cached to avoid repeated lookups of same variants: */
66    const struct ir3_shader_variant *vs, *fs;
67    /* TODO: other shader stages.. */
68 
69    unsigned streamout_mask;
70 };
71 
72 static inline enum a5xx_color_fmt
fd5_emit_format(struct pipe_surface * surf)73 fd5_emit_format(struct pipe_surface *surf)
74 {
75    if (!surf)
76       return 0;
77    return fd5_pipe2color(surf->format);
78 }
79 
80 static inline const struct ir3_shader_variant *
fd5_emit_get_vp(struct fd5_emit * emit)81 fd5_emit_get_vp(struct fd5_emit *emit)
82 {
83    if (!emit->vs) {
84       /* We use nonbinning VS during binning when TFB is enabled because that
85        * is what has all the outputs that might be involved in TFB.
86        */
87       if (emit->binning_pass &&
88           !emit->prog->vs->shader->stream_output.num_outputs)
89          emit->vs = emit->prog->bs;
90       else
91          emit->vs = emit->prog->vs;
92    }
93    return emit->vs;
94 }
95 
96 static inline const struct ir3_shader_variant *
fd5_emit_get_fp(struct fd5_emit * emit)97 fd5_emit_get_fp(struct fd5_emit *emit)
98 {
99    if (!emit->fs) {
100       if (emit->binning_pass) {
101          /* use dummy stateobj to simplify binning vs non-binning: */
102          static const struct ir3_shader_variant binning_fs = {};
103          emit->fs = &binning_fs;
104       } else {
105          emit->fs = emit->prog->fs;
106       }
107    }
108    return emit->fs;
109 }
110 
111 static inline void
fd5_cache_flush(struct fd_batch * batch,struct fd_ringbuffer * ring)112 fd5_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
113 {
114    fd_reset_wfi(batch);
115    OUT_PKT4(ring, REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO, 5);
116    OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_LO */
117    OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_HI */
118    OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_LO */
119    OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_HI */
120    OUT_RING(ring, 0x00000012); /* UCHE_CACHE_INVALIDATE */
121    fd_wfi(batch, ring);
122 }
123 
124 static inline void
fd5_set_render_mode(struct fd_context * ctx,struct fd_ringbuffer * ring,enum render_mode_cmd mode)125 fd5_set_render_mode(struct fd_context *ctx, struct fd_ringbuffer *ring,
126                     enum render_mode_cmd mode)
127 {
128    /* TODO add preemption support, gmem bypass, etc */
129    emit_marker5(ring, 7);
130    OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
131    OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(mode));
132    OUT_RING(ring, 0x00000000); /* ADDR_LO */
133    OUT_RING(ring, 0x00000000); /* ADDR_HI */
134    OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE) |
135                      COND(mode == BINNING, CP_SET_RENDER_MODE_3_VSC_ENABLE));
136    OUT_RING(ring, 0x00000000);
137    emit_marker5(ring, 7);
138 }
139 
140 static inline void
fd5_event_write(struct fd_batch * batch,struct fd_ringbuffer * ring,enum vgt_event_type evt,bool timestamp)141 fd5_event_write(struct fd_batch *batch, struct fd_ringbuffer *ring,
142                 enum vgt_event_type evt, bool timestamp)
143 {
144    OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1);
145    OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt));
146    if (timestamp) {
147       OUT_RELOC(ring, fd5_context(batch->ctx)->blit_mem, 0, 0,
148                 0); /* ADDR_LO/HI */
149       OUT_RING(ring, 0x00000000);
150    }
151 }
152 
153 static inline void
fd5_emit_blit(struct fd_batch * batch,struct fd_ringbuffer * ring)154 fd5_emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring)
155 {
156    emit_marker5(ring, 7);
157    fd5_event_write(batch, ring, BLIT, true);
158    emit_marker5(ring, 7);
159 }
160 
161 static inline void
fd5_emit_render_cntl(struct fd_context * ctx,bool blit,bool binning)162 fd5_emit_render_cntl(struct fd_context *ctx, bool blit, bool binning) assert_dt
163 {
164    struct fd_ringbuffer *ring =
165       binning ? ctx->batch->binning : ctx->batch->draw;
166 
167    /* TODO eventually this partially depends on the pfb state, ie.
168     * which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part
169     * we could probably cache and just regenerate if framebuffer
170     * state is dirty (or something like that)..
171     *
172     * Other bits seem to depend on query state, like if samples-passed
173     * query is active.
174     */
175    bool samples_passed = (fd5_context(ctx)->samples_passed_queries > 0);
176    OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);
177    OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */
178                      COND(binning, A5XX_RB_RENDER_CNTL_BINNING_PASS) |
179                      COND(binning, A5XX_RB_RENDER_CNTL_DISABLE_COLOR_PIPE) |
180                      COND(samples_passed, A5XX_RB_RENDER_CNTL_SAMPLES_PASSED) |
181                      COND(!blit, 0x8));
182 
183    OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1);
184    OUT_RING(ring, 0x00000008 | /* GRAS_SC_CNTL */
185                      COND(binning, A5XX_GRAS_SC_CNTL_BINNING_PASS) |
186                      COND(samples_passed, A5XX_GRAS_SC_CNTL_SAMPLES_PASSED));
187 }
188 
189 static inline void
fd5_emit_lrz_flush(struct fd_batch * batch,struct fd_ringbuffer * ring)190 fd5_emit_lrz_flush(struct fd_batch *batch, struct fd_ringbuffer *ring)
191 {
192    /* TODO I think the extra writes to GRAS_LRZ_CNTL are probably
193     * a workaround and not needed on all a5xx.
194     */
195    OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
196    OUT_RING(ring, A5XX_GRAS_LRZ_CNTL_ENABLE);
197 
198    fd5_event_write(batch, ring, LRZ_FLUSH, false);
199 
200    OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1);
201    OUT_RING(ring, 0x0);
202 }
203 
204 void fd5_emit_vertex_bufs(struct fd_ringbuffer *ring,
205                           struct fd5_emit *emit) assert_dt;
206 
207 void fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
208                     struct fd5_emit *emit) assert_dt;
209 
210 void fd5_emit_cs_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
211                        struct ir3_shader_variant *cp) assert_dt;
212 void fd5_emit_cs_consts(const struct ir3_shader_variant *v,
213                         struct fd_ringbuffer *ring, struct fd_context *ctx,
214                         const struct pipe_grid_info *info) assert_dt;
215 
216 void fd5_emit_restore(struct fd_batch *batch,
217                       struct fd_ringbuffer *ring) assert_dt;
218 
219 void fd5_emit_init_screen(struct pipe_screen *pscreen);
220 void fd5_emit_init(struct pipe_context *pctx);
221 
222 static inline void
fd5_emit_ib(struct fd_ringbuffer * ring,struct fd_ringbuffer * target)223 fd5_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
224 {
225    /* for debug after a lock up, write a unique counter value
226     * to scratch6 for each IB, to make it easier to match up
227     * register dumps to cmdstream.  The combination of IB and
228     * DRAW (scratch7) is enough to "triangulate" the particular
229     * draw that caused lockup.
230     */
231    emit_marker5(ring, 6);
232    __OUT_IB5(ring, target);
233    emit_marker5(ring, 6);
234 }
235 
236 #endif /* FD5_EMIT_H */
237