1 /*
2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3  * Copyright © 2018 Google, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  * Authors:
25  *    Rob Clark <robclark@freedesktop.org>
26  */
27 
28 #include <stdio.h>
29 
30 #include "pipe/p_state.h"
31 #include "util/format/u_format.h"
32 #include "util/u_inlines.h"
33 #include "util/u_memory.h"
34 #include "util/u_string.h"
35 
36 #include "freedreno_draw.h"
37 #include "freedreno_resource.h"
38 #include "freedreno_state.h"
39 #include "freedreno_tracepoints.h"
40 
41 #include "fd6_blitter.h"
42 #include "fd6_context.h"
43 #include "fd6_draw.h"
44 #include "fd6_emit.h"
45 #include "fd6_format.h"
46 #include "fd6_gmem.h"
47 #include "fd6_pack.h"
48 #include "fd6_program.h"
49 #include "fd6_resource.h"
50 #include "fd6_zsa.h"
51 
52 /**
53  * Emits the flags registers, suitable for RB_MRT_FLAG_BUFFER,
54  * RB_DEPTH_FLAG_BUFFER, SP_PS_2D_SRC_FLAGS, and RB_BLIT_FLAG_DST.
55  */
56 void
fd6_emit_flag_reference(struct fd_ringbuffer * ring,struct fd_resource * rsc,int level,int layer)57 fd6_emit_flag_reference(struct fd_ringbuffer *ring, struct fd_resource *rsc,
58                         int level, int layer)
59 {
60    if (fd_resource_ubwc_enabled(rsc, level)) {
61       OUT_RELOC(ring, rsc->bo, fd_resource_ubwc_offset(rsc, level, layer), 0,
62                 0);
63       OUT_RING(ring, A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH(
64                         fdl_ubwc_pitch(&rsc->layout, level)) |
65                         A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH(
66                            rsc->layout.ubwc_layer_size >> 2));
67    } else {
68       OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
69       OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
70       OUT_RING(ring, 0x00000000);
71    }
72 }
73 
74 static void
emit_mrt(struct fd_ringbuffer * ring,struct pipe_framebuffer_state * pfb,const struct fd_gmem_stateobj * gmem)75 emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb,
76          const struct fd_gmem_stateobj *gmem)
77 {
78    unsigned srgb_cntl = 0;
79    unsigned i;
80 
81    /* Note, GLES 3.2 says "If the fragment’s layer number is negative, or
82     * greater than or equal to the minimum number of layers of any attachment,
83     * the effects of the fragment on the framebuffer contents are undefined."
84     */
85    unsigned max_layer_index = 0;
86    enum a6xx_format mrt0_format = 0;
87 
88    for (i = 0; i < pfb->nr_cbufs; i++) {
89       enum a3xx_color_swap swap = WZYX;
90       bool sint = false, uint = false;
91       struct fd_resource *rsc = NULL;
92       struct fdl_slice *slice = NULL;
93       uint32_t stride = 0;
94       uint32_t array_stride = 0;
95       uint32_t offset;
96 
97       if (!pfb->cbufs[i])
98          continue;
99 
100       struct pipe_surface *psurf = pfb->cbufs[i];
101       enum pipe_format pformat = psurf->format;
102       rsc = fd_resource(psurf->texture);
103       if (!rsc->bo)
104          continue;
105 
106       uint32_t base = gmem ? gmem->cbuf_base[i] : 0;
107       slice = fd_resource_slice(rsc, psurf->u.tex.level);
108       uint32_t tile_mode = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
109       enum a6xx_format format = fd6_color_format(pformat, tile_mode);
110       sint = util_format_is_pure_sint(pformat);
111       uint = util_format_is_pure_uint(pformat);
112 
113       if (util_format_is_srgb(pformat))
114          srgb_cntl |= (1 << i);
115 
116       offset =
117          fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
118 
119       stride = fd_resource_pitch(rsc, psurf->u.tex.level);
120       array_stride = fd_resource_layer_stride(rsc, psurf->u.tex.level);
121       swap = fd6_color_swap(pformat, rsc->layout.tile_mode);
122 
123       max_layer_index = psurf->u.tex.last_layer - psurf->u.tex.first_layer;
124 
125       debug_assert((offset + slice->size0) <= fd_bo_size(rsc->bo));
126 
127       OUT_REG(
128          ring,
129          A6XX_RB_MRT_BUF_INFO(i, .color_format = format,
130                               .color_tile_mode = tile_mode, .color_swap = swap),
131          A6XX_RB_MRT_PITCH(i, .a6xx_rb_mrt_pitch = stride),
132          A6XX_RB_MRT_ARRAY_PITCH(i, .a6xx_rb_mrt_array_pitch = array_stride),
133          A6XX_RB_MRT_BASE(i, .bo = rsc->bo, .bo_offset = offset),
134          A6XX_RB_MRT_BASE_GMEM(i, .unknown = base));
135 
136       OUT_REG(ring, A6XX_SP_FS_MRT_REG(i, .color_format = format,
137                                        .color_sint = sint, .color_uint = uint));
138 
139       OUT_PKT4(ring, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3);
140       fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level,
141                               psurf->u.tex.first_layer);
142 
143       if (i == 0)
144          mrt0_format = format;
145    }
146    if (pfb->zsbuf)
147       max_layer_index = pfb->zsbuf->u.tex.last_layer - pfb->zsbuf->u.tex.first_layer;
148 
149    OUT_REG(ring, A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = mrt0_format));
150 
151    OUT_REG(ring, A6XX_RB_SRGB_CNTL(.dword = srgb_cntl));
152    OUT_REG(ring, A6XX_SP_SRGB_CNTL(.dword = srgb_cntl));
153 
154    OUT_REG(ring, A6XX_GRAS_MAX_LAYER_INDEX(max_layer_index));
155 }
156 
157 static void
emit_zs(struct fd_ringbuffer * ring,struct pipe_surface * zsbuf,const struct fd_gmem_stateobj * gmem)158 emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
159         const struct fd_gmem_stateobj *gmem)
160 {
161    if (zsbuf) {
162       struct fd_resource *rsc = fd_resource(zsbuf->texture);
163       enum a6xx_depth_format fmt = fd6_pipe2depth(zsbuf->format);
164       uint32_t stride = fd_resource_pitch(rsc, zsbuf->u.tex.level);
165       uint32_t array_stride = fd_resource_layer_stride(rsc, zsbuf->u.tex.level);
166       uint32_t base = gmem ? gmem->zsbuf_base[0] : 0;
167       uint32_t offset =
168          fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer);
169 
170       OUT_REG(
171          ring, A6XX_RB_DEPTH_BUFFER_INFO(.depth_format = fmt),
172          A6XX_RB_DEPTH_BUFFER_PITCH(.a6xx_rb_depth_buffer_pitch = stride),
173          A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(.a6xx_rb_depth_buffer_array_pitch =
174                                              array_stride),
175          A6XX_RB_DEPTH_BUFFER_BASE(.bo = rsc->bo, .bo_offset = offset),
176          A6XX_RB_DEPTH_BUFFER_BASE_GMEM(.dword = base));
177 
178       OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = fmt));
179 
180       OUT_PKT4(ring, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE, 3);
181       fd6_emit_flag_reference(ring, rsc, zsbuf->u.tex.level,
182                               zsbuf->u.tex.first_layer);
183 
184       if (rsc->lrz) {
185          OUT_REG(ring, A6XX_GRAS_LRZ_BUFFER_BASE(.bo = rsc->lrz),
186                  A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = rsc->lrz_pitch),
187                  // XXX a6xx seems to use a different buffer here.. not sure
188                  // what for..
189                  A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE());
190       } else {
191          OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5);
192          OUT_RING(ring, 0x00000000);
193          OUT_RING(ring, 0x00000000);
194          OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
195          OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
196          OUT_RING(ring, 0x00000000);
197       }
198 
199       /* NOTE: blob emits GRAS_LRZ_CNTL plus GRAZ_LRZ_BUFFER_BASE
200        * plus this CP_EVENT_WRITE at the end in it's own IB..
201        */
202       OUT_PKT7(ring, CP_EVENT_WRITE, 1);
203       OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(UNK_25));
204 
205       if (rsc->stencil) {
206          stride = fd_resource_pitch(rsc->stencil, zsbuf->u.tex.level);
207          array_stride = fd_resource_layer_stride(rsc->stencil, zsbuf->u.tex.level);
208          uint32_t base = gmem ? gmem->zsbuf_base[1] : 0;
209          uint32_t offset =
210             fd_resource_offset(rsc->stencil, zsbuf->u.tex.level, zsbuf->u.tex.first_layer);
211 
212          OUT_REG(ring, A6XX_RB_STENCIL_INFO(.separate_stencil = true),
213                  A6XX_RB_STENCIL_BUFFER_PITCH(.a6xx_rb_stencil_buffer_pitch =
214                                                  stride),
215                  A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH(
216                        .a6xx_rb_stencil_buffer_array_pitch = array_stride),
217                  A6XX_RB_STENCIL_BUFFER_BASE(.bo = rsc->stencil->bo, .bo_offset = offset),
218                  A6XX_RB_STENCIL_BUFFER_BASE_GMEM(.dword = base));
219       } else {
220          OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));
221       }
222    } else {
223       OUT_PKT4(ring, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6);
224       OUT_RING(ring, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE));
225       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
226       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
227       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
228       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
229       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */
230 
231       OUT_REG(ring,
232               A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE));
233 
234       OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5);
235       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
236       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
237       OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
238       OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */
239       OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */
240 
241       OUT_REG(ring, A6XX_RB_STENCIL_INFO(0));
242    }
243 }
244 
245 static bool
use_hw_binning(struct fd_batch * batch)246 use_hw_binning(struct fd_batch *batch)
247 {
248    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
249 
250    if ((gmem->maxpw * gmem->maxph) > 32)
251       return false;
252 
253    return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) >= 2) &&
254           (batch->num_draws > 0);
255 }
256 
257 static void
patch_fb_read_gmem(struct fd_batch * batch)258 patch_fb_read_gmem(struct fd_batch *batch)
259 {
260    unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches);
261    if (!num_patches)
262       return;
263 
264    struct fd_screen *screen = batch->ctx->screen;
265    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
266    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
267    struct pipe_surface *psurf = pfb->cbufs[0];
268    struct pipe_resource *prsc = psurf->texture;
269    struct fd_resource *rsc = fd_resource(prsc);
270    enum pipe_format format = psurf->format;
271 
272    uint8_t swiz[4];
273    fd6_tex_swiz(psurf->format, swiz, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W);
274 
275    /* always TILE6_2 mode in GMEM, which also means no swap: */
276    uint32_t texconst0 = A6XX_TEX_CONST_0_FMT(fd6_texture_format(format, rsc->layout.tile_mode)) |
277           A6XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) |
278           A6XX_TEX_CONST_0_SWAP(WZYX) |
279           A6XX_TEX_CONST_0_TILE_MODE(TILE6_2) |
280           COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) |
281           A6XX_TEX_CONST_0_SWIZ_X(fd6_pipe2swiz(swiz[0])) |
282           A6XX_TEX_CONST_0_SWIZ_Y(fd6_pipe2swiz(swiz[1])) |
283           A6XX_TEX_CONST_0_SWIZ_Z(fd6_pipe2swiz(swiz[2])) |
284           A6XX_TEX_CONST_0_SWIZ_W(fd6_pipe2swiz(swiz[3]));
285 
286    for (unsigned i = 0; i < num_patches; i++) {
287       struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);
288       patch->cs[0] = texconst0;
289       patch->cs[2] = A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[0]) |
290                      A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);
291       patch->cs[4] = A6XX_TEX_CONST_4_BASE_LO(screen->gmem_base);
292       patch->cs[5] = A6XX_TEX_CONST_5_BASE_HI(screen->gmem_base >> 32) |
293                      A6XX_TEX_CONST_5_DEPTH(1);
294    }
295    util_dynarray_clear(&batch->fb_read_patches);
296 }
297 
298 static void
patch_fb_read_sysmem(struct fd_batch * batch)299 patch_fb_read_sysmem(struct fd_batch *batch)
300 {
301    unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches);
302    if (!num_patches)
303       return;
304 
305    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
306    struct pipe_surface *psurf = pfb->cbufs[0];
307    if (!psurf)
308       return;
309 
310    struct fd_resource *rsc = fd_resource(psurf->texture);
311 
312    uint32_t block_width, block_height;
313    fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
314 
315    struct fdl_view_args args = {
316       .iova = fd_bo_get_iova(rsc->bo),
317 
318       .base_miplevel = psurf->u.tex.level,
319       .level_count = 1,
320 
321       .base_array_layer = psurf->u.tex.first_layer,
322       .layer_count = 1,
323 
324       .format = psurf->format,
325       .swiz = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W},
326 
327       .type = FDL_VIEW_TYPE_2D,
328       .chroma_offsets = {FDL_CHROMA_LOCATION_COSITED_EVEN,
329                          FDL_CHROMA_LOCATION_COSITED_EVEN},
330    };
331    const struct fdl_layout *layouts[3] = {&rsc->layout, NULL, NULL};
332    struct fdl6_view view;
333    fdl6_view_init(&view, layouts, &args,
334                   batch->ctx->screen->info->a6xx.has_z24uint_s8uint);
335 
336    for (unsigned i = 0; i < num_patches; i++) {
337       struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i);
338 
339       /* This is cheating a bit, since we can't use OUT_RELOC() here.. but
340        * the render target will already have a reloc emitted for RB_MRT state,
341        * so we can get away with manually patching in the address here:
342        */
343       memcpy(patch->cs, view.descriptor, FDL6_TEX_CONST_DWORDS * 4);
344    }
345    util_dynarray_clear(&batch->fb_read_patches);
346 }
347 
348 static void
update_render_cntl(struct fd_batch * batch,struct pipe_framebuffer_state * pfb,bool binning)349 update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb,
350                    bool binning)
351 {
352    struct fd_ringbuffer *ring = batch->gmem;
353    struct fd_screen *screen = batch->ctx->screen;
354    uint32_t cntl = 0;
355    bool depth_ubwc_enable = false;
356    uint32_t mrts_ubwc_enable = 0;
357    int i;
358 
359    if (pfb->zsbuf) {
360       struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
361       depth_ubwc_enable =
362          fd_resource_ubwc_enabled(rsc, pfb->zsbuf->u.tex.level);
363    }
364 
365    for (i = 0; i < pfb->nr_cbufs; i++) {
366       if (!pfb->cbufs[i])
367          continue;
368 
369       struct pipe_surface *psurf = pfb->cbufs[i];
370       struct fd_resource *rsc = fd_resource(psurf->texture);
371       if (!rsc->bo)
372          continue;
373 
374       if (fd_resource_ubwc_enabled(rsc, psurf->u.tex.level))
375          mrts_ubwc_enable |= 1 << i;
376    }
377 
378    cntl |= A6XX_RB_RENDER_CNTL_CCUSINGLECACHELINESIZE(2);
379    if (binning)
380       cntl |= A6XX_RB_RENDER_CNTL_BINNING;
381 
382    if (screen->info->a6xx.has_cp_reg_write) {
383       OUT_PKT7(ring, CP_REG_WRITE, 3);
384       OUT_RING(ring, CP_REG_WRITE_0_TRACKER(TRACK_RENDER_CNTL));
385       OUT_RING(ring, REG_A6XX_RB_RENDER_CNTL);
386    } else {
387       OUT_PKT4(ring, REG_A6XX_RB_RENDER_CNTL, 1);
388    }
389    OUT_RING(ring, cntl |
390                      COND(depth_ubwc_enable, A6XX_RB_RENDER_CNTL_FLAG_DEPTH) |
391                      A6XX_RB_RENDER_CNTL_FLAG_MRTS(mrts_ubwc_enable));
392 }
393 
394 /* extra size to store VSC_DRAW_STRM_SIZE: */
395 #define VSC_DRAW_STRM_SIZE(pitch) ((pitch)*32 + 0x100)
396 #define VSC_PRIM_STRM_SIZE(pitch) ((pitch)*32)
397 
398 static void
update_vsc_pipe(struct fd_batch * batch)399 update_vsc_pipe(struct fd_batch *batch)
400 {
401    struct fd_context *ctx = batch->ctx;
402    struct fd6_context *fd6_ctx = fd6_context(ctx);
403    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
404    struct fd_ringbuffer *ring = batch->gmem;
405    int i;
406 
407    if (batch->draw_strm_bits / 8 > fd6_ctx->vsc_draw_strm_pitch) {
408       if (fd6_ctx->vsc_draw_strm)
409          fd_bo_del(fd6_ctx->vsc_draw_strm);
410       fd6_ctx->vsc_draw_strm = NULL;
411       /* Note: probably only need to align to 0x40, but aligning stronger
412        * reduces the odds that we will have to realloc again on the next
413        * frame:
414        */
415       fd6_ctx->vsc_draw_strm_pitch = align(batch->draw_strm_bits / 8, 0x4000);
416       mesa_logd("pre-resize VSC_DRAW_STRM_PITCH to: 0x%x",
417                 fd6_ctx->vsc_draw_strm_pitch);
418    }
419 
420    if (batch->prim_strm_bits / 8 > fd6_ctx->vsc_prim_strm_pitch) {
421       if (fd6_ctx->vsc_prim_strm)
422          fd_bo_del(fd6_ctx->vsc_prim_strm);
423       fd6_ctx->vsc_prim_strm = NULL;
424       fd6_ctx->vsc_prim_strm_pitch = align(batch->prim_strm_bits / 8, 0x4000);
425       mesa_logd("pre-resize VSC_PRIM_STRM_PITCH to: 0x%x",
426                 fd6_ctx->vsc_prim_strm_pitch);
427    }
428 
429    if (!fd6_ctx->vsc_draw_strm) {
430       fd6_ctx->vsc_draw_strm = fd_bo_new(
431          ctx->screen->dev, VSC_DRAW_STRM_SIZE(fd6_ctx->vsc_draw_strm_pitch),
432          FD_BO_NOMAP, "vsc_draw_strm");
433    }
434 
435    if (!fd6_ctx->vsc_prim_strm) {
436       fd6_ctx->vsc_prim_strm = fd_bo_new(
437          ctx->screen->dev, VSC_PRIM_STRM_SIZE(fd6_ctx->vsc_prim_strm_pitch),
438          FD_BO_NOMAP, "vsc_prim_strm");
439    }
440 
441    OUT_REG(
442       ring, A6XX_VSC_BIN_SIZE(.width = gmem->bin_w, .height = gmem->bin_h),
443       A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = fd6_ctx->vsc_draw_strm,
444                                       .bo_offset =
445                                          32 * fd6_ctx->vsc_draw_strm_pitch));
446 
447    OUT_REG(ring, A6XX_VSC_BIN_COUNT(.nx = gmem->nbins_x, .ny = gmem->nbins_y));
448 
449    OUT_PKT4(ring, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
450    for (i = 0; i < 32; i++) {
451       const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
452       OUT_RING(ring, A6XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
453                         A6XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
454                         A6XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
455                         A6XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
456    }
457 
458    OUT_REG(
459       ring, A6XX_VSC_PRIM_STRM_ADDRESS(.bo = fd6_ctx->vsc_prim_strm),
460       A6XX_VSC_PRIM_STRM_PITCH(.dword = fd6_ctx->vsc_prim_strm_pitch),
461       A6XX_VSC_PRIM_STRM_LIMIT(.dword = fd6_ctx->vsc_prim_strm_pitch - 64));
462 
463    OUT_REG(
464       ring, A6XX_VSC_DRAW_STRM_ADDRESS(.bo = fd6_ctx->vsc_draw_strm),
465       A6XX_VSC_DRAW_STRM_PITCH(.dword = fd6_ctx->vsc_draw_strm_pitch),
466       A6XX_VSC_DRAW_STRM_LIMIT(.dword = fd6_ctx->vsc_draw_strm_pitch - 64));
467 }
468 
469 /*
470  * If overflow is detected, either 0x1 (VSC_DRAW_STRM overflow) or 0x3
471  * (VSC_PRIM_STRM overflow) plus the size of the overflowed buffer is
472  * written to control->vsc_overflow.  This allows the CPU to
473  * detect which buffer overflowed (and, since the current size is
474  * encoded as well, this protects against already-submitted but
475  * not executed batches from fooling the CPU into increasing the
476  * size again unnecessarily).
477  */
478 static void
emit_vsc_overflow_test(struct fd_batch * batch)479 emit_vsc_overflow_test(struct fd_batch *batch)
480 {
481    struct fd_ringbuffer *ring = batch->gmem;
482    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
483    struct fd6_context *fd6_ctx = fd6_context(batch->ctx);
484 
485    debug_assert((fd6_ctx->vsc_draw_strm_pitch & 0x3) == 0);
486    debug_assert((fd6_ctx->vsc_prim_strm_pitch & 0x3) == 0);
487 
488    /* Check for overflow, write vsc_scratch if detected: */
489    for (int i = 0; i < gmem->num_vsc_pipes; i++) {
490       OUT_PKT7(ring, CP_COND_WRITE5, 8);
491       OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
492                         CP_COND_WRITE5_0_WRITE_MEMORY);
493       OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(
494                         REG_A6XX_VSC_DRAW_STRM_SIZE_REG(i)));
495       OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
496       OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_draw_strm_pitch - 64));
497       OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
498       OUT_RELOC(ring,
499                 control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */
500       OUT_RING(ring,
501                CP_COND_WRITE5_7_WRITE_DATA(1 + fd6_ctx->vsc_draw_strm_pitch));
502 
503       OUT_PKT7(ring, CP_COND_WRITE5, 8);
504       OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
505                         CP_COND_WRITE5_0_WRITE_MEMORY);
506       OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO(
507                         REG_A6XX_VSC_PRIM_STRM_SIZE_REG(i)));
508       OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
509       OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_prim_strm_pitch - 64));
510       OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0));
511       OUT_RELOC(ring,
512                 control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */
513       OUT_RING(ring,
514                CP_COND_WRITE5_7_WRITE_DATA(3 + fd6_ctx->vsc_prim_strm_pitch));
515    }
516 
517    OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0);
518 }
519 
520 static void
check_vsc_overflow(struct fd_context * ctx)521 check_vsc_overflow(struct fd_context *ctx)
522 {
523    struct fd6_context *fd6_ctx = fd6_context(ctx);
524    struct fd6_control *control = fd_bo_map(fd6_ctx->control_mem);
525    uint32_t vsc_overflow = control->vsc_overflow;
526 
527    if (!vsc_overflow)
528       return;
529 
530    /* clear overflow flag: */
531    control->vsc_overflow = 0;
532 
533    unsigned buffer = vsc_overflow & 0x3;
534    unsigned size = vsc_overflow & ~0x3;
535 
536    if (buffer == 0x1) {
537       /* VSC_DRAW_STRM overflow: */
538 
539       if (size < fd6_ctx->vsc_draw_strm_pitch) {
540          /* we've already increased the size, this overflow is
541           * from a batch submitted before resize, but executed
542           * after
543           */
544          return;
545       }
546 
547       fd_bo_del(fd6_ctx->vsc_draw_strm);
548       fd6_ctx->vsc_draw_strm = NULL;
549       fd6_ctx->vsc_draw_strm_pitch *= 2;
550 
551       mesa_logd("resized VSC_DRAW_STRM_PITCH to: 0x%x",
552                 fd6_ctx->vsc_draw_strm_pitch);
553 
554    } else if (buffer == 0x3) {
555       /* VSC_PRIM_STRM overflow: */
556 
557       if (size < fd6_ctx->vsc_prim_strm_pitch) {
558          /* we've already increased the size */
559          return;
560       }
561 
562       fd_bo_del(fd6_ctx->vsc_prim_strm);
563       fd6_ctx->vsc_prim_strm = NULL;
564       fd6_ctx->vsc_prim_strm_pitch *= 2;
565 
566       mesa_logd("resized VSC_PRIM_STRM_PITCH to: 0x%x",
567                 fd6_ctx->vsc_prim_strm_pitch);
568 
569    } else {
570       /* NOTE: it's possible, for example, for overflow to corrupt the
571        * control page.  I mostly just see this hit if I set initial VSC
572        * buffer size extremely small.  Things still seem to recover,
573        * but maybe we should pre-emptively realloc vsc_data/vsc_data2
574        * and hope for different memory placement?
575        */
576       mesa_loge("invalid vsc_overflow value: 0x%08x", vsc_overflow);
577    }
578 }
579 
580 static void
emit_common_init(struct fd_batch * batch)581 emit_common_init(struct fd_batch *batch)
582 {
583    struct fd_ringbuffer *ring = batch->gmem;
584    struct fd_autotune *at = &batch->ctx->autotune;
585    struct fd_batch_result *result = batch->autotune_result;
586 
587    if (!result)
588       return;
589 
590    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
591    OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
592 
593    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
594    OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_start));
595 
596    fd6_event_write(batch, ring, ZPASS_DONE, false);
597 }
598 
599 static void
emit_common_fini(struct fd_batch * batch)600 emit_common_fini(struct fd_batch *batch)
601 {
602    struct fd_ringbuffer *ring = batch->gmem;
603    struct fd_autotune *at = &batch->ctx->autotune;
604    struct fd_batch_result *result = batch->autotune_result;
605 
606    if (!result)
607       return;
608 
609    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1);
610    OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY);
611 
612    OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2);
613    OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_end));
614 
615    fd6_event_write(batch, ring, ZPASS_DONE, false);
616 
617    // TODO is there a better event to use.. a single ZPASS_DONE_TS would be nice
618    OUT_PKT7(ring, CP_EVENT_WRITE, 4);
619    OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS));
620    OUT_RELOC(ring, results_ptr(at, fence));
621    OUT_RING(ring, result->fence);
622 }
623 
624 /*
625  * Emit conditional CP_INDIRECT_BRANCH based on VSC_STATE[p], ie. the IB
626  * is skipped for tiles that have no visible geometry.
627  */
628 static void
emit_conditional_ib(struct fd_batch * batch,const struct fd_tile * tile,struct fd_ringbuffer * target)629 emit_conditional_ib(struct fd_batch *batch, const struct fd_tile *tile,
630                     struct fd_ringbuffer *target)
631 {
632    struct fd_ringbuffer *ring = batch->gmem;
633 
634    if (target->cur == target->start)
635       return;
636 
637    emit_marker6(ring, 6);
638 
639    unsigned count = fd_ringbuffer_cmd_count(target);
640 
641    BEGIN_RING(ring, 5 + 4 * count); /* ensure conditional doesn't get split */
642 
643    OUT_PKT7(ring, CP_REG_TEST, 1);
644    OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(REG_A6XX_VSC_STATE_REG(tile->p)) |
645                      A6XX_CP_REG_TEST_0_BIT(tile->n) |
646                      A6XX_CP_REG_TEST_0_WAIT_FOR_ME);
647 
648    OUT_PKT7(ring, CP_COND_REG_EXEC, 2);
649    OUT_RING(ring, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
650    OUT_RING(ring, CP_COND_REG_EXEC_1_DWORDS(4 * count));
651 
652    for (unsigned i = 0; i < count; i++) {
653       uint32_t dwords;
654       OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
655       dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
656       assert(dwords > 0);
657       OUT_RING(ring, dwords);
658    }
659 
660    emit_marker6(ring, 6);
661 }
662 
663 static void
set_scissor(struct fd_ringbuffer * ring,uint32_t x1,uint32_t y1,uint32_t x2,uint32_t y2)664 set_scissor(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1, uint32_t x2,
665             uint32_t y2)
666 {
667    OUT_REG(ring, A6XX_GRAS_SC_WINDOW_SCISSOR_TL(.x = x1, .y = y1),
668            A6XX_GRAS_SC_WINDOW_SCISSOR_BR(.x = x2, .y = y2));
669 
670    OUT_REG(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1(.x = x1, .y = y1),
671            A6XX_GRAS_2D_RESOLVE_CNTL_2(.x = x2, .y = y2));
672 }
673 
674 static void
set_bin_size(struct fd_ringbuffer * ring,uint32_t w,uint32_t h,uint32_t flag)675 set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag)
676 {
677    OUT_REG(ring, A6XX_GRAS_BIN_CONTROL(.binw = w, .binh = h, .dword = flag));
678    OUT_REG(ring, A6XX_RB_BIN_CONTROL(.binw = w, .binh = h, .dword = flag));
679    /* no flag for RB_BIN_CONTROL2... */
680    OUT_REG(ring, A6XX_RB_BIN_CONTROL2(.binw = w, .binh = h));
681 }
682 
683 static void
emit_binning_pass(struct fd_batch * batch)684 emit_binning_pass(struct fd_batch *batch) assert_dt
685 {
686    struct fd_ringbuffer *ring = batch->gmem;
687    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
688    struct fd_screen *screen = batch->ctx->screen;
689 
690    debug_assert(!batch->tessellation);
691 
692    set_scissor(ring, 0, 0, gmem->width - 1, gmem->height - 1);
693 
694    emit_marker6(ring, 7);
695    OUT_PKT7(ring, CP_SET_MARKER, 1);
696    OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
697    emit_marker6(ring, 7);
698 
699    OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
700    OUT_RING(ring, 0x1);
701 
702    OUT_PKT7(ring, CP_SET_MODE, 1);
703    OUT_RING(ring, 0x1);
704 
705    OUT_WFI5(ring);
706 
707    OUT_REG(ring, A6XX_VFD_MODE_CNTL(.render_mode = BINNING_PASS));
708 
709    update_vsc_pipe(batch);
710 
711    OUT_PKT4(ring, REG_A6XX_PC_POWER_CNTL, 1);
712    OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
713 
714    OUT_PKT4(ring, REG_A6XX_VFD_POWER_CNTL, 1);
715    OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
716 
717    OUT_PKT7(ring, CP_EVENT_WRITE, 1);
718    OUT_RING(ring, UNK_2C);
719 
720    OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
721    OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(0) | A6XX_RB_WINDOW_OFFSET_Y(0));
722 
723    OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
724    OUT_RING(ring,
725             A6XX_SP_TP_WINDOW_OFFSET_X(0) | A6XX_SP_TP_WINDOW_OFFSET_Y(0));
726 
727    /* emit IB to binning drawcmds: */
728    trace_start_binning_ib(&batch->trace, ring);
729    fd6_emit_ib(ring, batch->draw);
730    trace_end_binning_ib(&batch->trace, ring);
731 
732    fd_reset_wfi(batch);
733 
734    OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
735    OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
736                      CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
737                      CP_SET_DRAW_STATE__0_GROUP_ID(0));
738    OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
739    OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
740 
741    OUT_PKT7(ring, CP_EVENT_WRITE, 1);
742    OUT_RING(ring, UNK_2D);
743 
744    fd6_cache_inv(batch, ring);
745    fd6_cache_flush(batch, ring);
746    fd_wfi(batch, ring);
747 
748    OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
749 
750    trace_start_vsc_overflow_test(&batch->trace, batch->gmem);
751    emit_vsc_overflow_test(batch);
752    trace_end_vsc_overflow_test(&batch->trace, batch->gmem);
753 
754    OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
755    OUT_RING(ring, 0x0);
756 
757    OUT_PKT7(ring, CP_SET_MODE, 1);
758    OUT_RING(ring, 0x0);
759 
760    OUT_WFI5(ring);
761 
762    OUT_REG(ring,
763            A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem,
764                             .gmem = true,
765                             .unk2 = screen->info->a6xx.ccu_cntl_gmem_unk2));
766 }
767 
768 static void
emit_msaa(struct fd_ringbuffer * ring,unsigned nr)769 emit_msaa(struct fd_ringbuffer *ring, unsigned nr)
770 {
771    enum a3xx_msaa_samples samples = fd_msaa_samples(nr);
772 
773    OUT_PKT4(ring, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2);
774    OUT_RING(ring, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples));
775    OUT_RING(ring, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
776                      COND(samples == MSAA_ONE,
777                           A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
778 
779    OUT_PKT4(ring, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2);
780    OUT_RING(ring, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples));
781    OUT_RING(ring, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) |
782                      COND(samples == MSAA_ONE,
783                           A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE));
784 
785    OUT_PKT4(ring, REG_A6XX_RB_RAS_MSAA_CNTL, 2);
786    OUT_RING(ring, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
787    OUT_RING(ring,
788             A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
789                COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
790 
791    OUT_PKT4(ring, REG_A6XX_RB_MSAA_CNTL, 1);
792    OUT_RING(ring, A6XX_RB_MSAA_CNTL_SAMPLES(samples));
793 }
794 
795 static void prepare_tile_setup_ib(struct fd_batch *batch);
796 static void prepare_tile_fini_ib(struct fd_batch *batch);
797 
798 /* before first tile */
799 static void
fd6_emit_tile_init(struct fd_batch * batch)800 fd6_emit_tile_init(struct fd_batch *batch) assert_dt
801 {
802    struct fd_ringbuffer *ring = batch->gmem;
803    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
804    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
805    struct fd_screen *screen = batch->ctx->screen;
806 
807    fd6_emit_restore(batch, ring);
808 
809    fd6_emit_lrz_flush(ring);
810 
811    if (batch->prologue) {
812       trace_start_prologue(&batch->trace, ring);
813       fd6_emit_ib(ring, batch->prologue);
814       trace_end_prologue(&batch->trace, ring);
815    }
816 
817    fd6_cache_inv(batch, ring);
818 
819    prepare_tile_setup_ib(batch);
820    prepare_tile_fini_ib(batch);
821 
822    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
823    OUT_RING(ring, 0x0);
824 
825    /* blob controls "local" in IB2, but I think that is not required */
826    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
827    OUT_RING(ring, 0x1);
828 
829    fd_wfi(batch, ring);
830    OUT_REG(ring,
831            A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem,
832                             .gmem = true,
833                             .unk2 = screen->info->a6xx.ccu_cntl_gmem_unk2));
834 
835    emit_zs(ring, pfb->zsbuf, batch->gmem_state);
836    emit_mrt(ring, pfb, batch->gmem_state);
837    emit_msaa(ring, pfb->samples);
838    patch_fb_read_gmem(batch);
839 
840    if (use_hw_binning(batch)) {
841       /* enable stream-out during binning pass: */
842       OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
843 
844       set_bin_size(ring, gmem->bin_w, gmem->bin_h,
845                    A6XX_RB_BIN_CONTROL_RENDER_MODE(BINNING_PASS) |
846                    A6XX_RB_BIN_CONTROL_LRZ_FEEDBACK_ZMODE_MASK(0x6));
847       update_render_cntl(batch, pfb, true);
848       emit_binning_pass(batch);
849 
850       /* and disable stream-out for draw pass: */
851       OUT_REG(ring, A6XX_VPC_SO_DISABLE(true));
852 
853       /*
854        * NOTE: even if we detect VSC overflow and disable use of
855        * visibility stream in draw pass, it is still safe to execute
856        * the reset of these cmds:
857        */
858 
859       // NOTE a618 not setting .FORCE_LRZ_WRITE_DIS ..
860       set_bin_size(ring, gmem->bin_w, gmem->bin_h,
861                    A6XX_RB_BIN_CONTROL_FORCE_LRZ_WRITE_DIS |
862                    A6XX_RB_BIN_CONTROL_LRZ_FEEDBACK_ZMODE_MASK(0x6));
863 
864       OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1);
865       OUT_RING(ring, 0x0);
866 
867       OUT_PKT4(ring, REG_A6XX_PC_POWER_CNTL, 1);
868       OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
869 
870       OUT_PKT4(ring, REG_A6XX_VFD_POWER_CNTL, 1);
871       OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL);
872 
873       OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
874       OUT_RING(ring, 0x1);
875    } else {
876       /* no binning pass, so enable stream-out for draw pass:: */
877       OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
878 
879       set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
880    }
881 
882    update_render_cntl(batch, pfb, false);
883 
884    emit_common_init(batch);
885 }
886 
887 static void
set_window_offset(struct fd_ringbuffer * ring,uint32_t x1,uint32_t y1)888 set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1)
889 {
890    OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1);
891    OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(x1) | A6XX_RB_WINDOW_OFFSET_Y(y1));
892 
893    OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET2, 1);
894    OUT_RING(ring, A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1));
895 
896    OUT_PKT4(ring, REG_A6XX_SP_WINDOW_OFFSET, 1);
897    OUT_RING(ring, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1));
898 
899    OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
900    OUT_RING(ring,
901             A6XX_SP_TP_WINDOW_OFFSET_X(x1) | A6XX_SP_TP_WINDOW_OFFSET_Y(y1));
902 }
903 
904 /* before mem2gmem */
905 static void
fd6_emit_tile_prep(struct fd_batch * batch,const struct fd_tile * tile)906 fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
907 {
908    struct fd_context *ctx = batch->ctx;
909    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
910    struct fd6_context *fd6_ctx = fd6_context(ctx);
911    struct fd_ringbuffer *ring = batch->gmem;
912 
913    emit_marker6(ring, 7);
914    OUT_PKT7(ring, CP_SET_MARKER, 1);
915    OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM));
916    emit_marker6(ring, 7);
917 
918    uint32_t x1 = tile->xoff;
919    uint32_t y1 = tile->yoff;
920    uint32_t x2 = tile->xoff + tile->bin_w - 1;
921    uint32_t y2 = tile->yoff + tile->bin_h - 1;
922 
923    set_scissor(ring, x1, y1, x2, y2);
924 
925    if (use_hw_binning(batch)) {
926       const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
927 
928       OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
929 
930       OUT_PKT7(ring, CP_SET_MODE, 1);
931       OUT_RING(ring, 0x0);
932 
933       OUT_PKT7(ring, CP_SET_BIN_DATA5, 7);
934       OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
935                         CP_SET_BIN_DATA5_0_VSC_N(tile->n));
936       OUT_RELOC(ring, fd6_ctx->vsc_draw_strm, /* per-pipe draw-stream address */
937                 (tile->p * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
938       OUT_RELOC(ring,
939                 fd6_ctx->vsc_draw_strm, /* VSC_DRAW_STRM_ADDRESS + (p * 4) */
940                 (tile->p * 4) + (32 * fd6_ctx->vsc_draw_strm_pitch), 0, 0);
941       OUT_RELOC(ring, fd6_ctx->vsc_prim_strm,
942                 (tile->p * fd6_ctx->vsc_prim_strm_pitch), 0, 0);
943 
944       OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
945       OUT_RING(ring, 0x0);
946 
947       set_window_offset(ring, x1, y1);
948 
949       const struct fd_gmem_stateobj *gmem = batch->gmem_state;
950       set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000);
951 
952       OUT_PKT7(ring, CP_SET_MODE, 1);
953       OUT_RING(ring, 0x0);
954    } else {
955       set_window_offset(ring, x1, y1);
956 
957       OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
958       OUT_RING(ring, 0x1);
959 
960       OUT_PKT7(ring, CP_SET_MODE, 1);
961       OUT_RING(ring, 0x0);
962    }
963 }
964 
965 static void
set_blit_scissor(struct fd_batch * batch,struct fd_ringbuffer * ring)966 set_blit_scissor(struct fd_batch *batch, struct fd_ringbuffer *ring)
967 {
968    struct pipe_scissor_state blit_scissor = batch->max_scissor;
969 
970    blit_scissor.minx = ROUND_DOWN_TO(blit_scissor.minx, 16);
971    blit_scissor.miny = ROUND_DOWN_TO(blit_scissor.miny, 4);
972    blit_scissor.maxx = ALIGN(blit_scissor.maxx, 16);
973    blit_scissor.maxy = ALIGN(blit_scissor.maxy, 4);
974 
975    OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
976    OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_TL_X(blit_scissor.minx) |
977                      A6XX_RB_BLIT_SCISSOR_TL_Y(blit_scissor.miny));
978    OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_BR_X(blit_scissor.maxx - 1) |
979                      A6XX_RB_BLIT_SCISSOR_BR_Y(blit_scissor.maxy - 1));
980 }
981 
982 static void
emit_blit(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,bool stencil)983 emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, uint32_t base,
984           struct pipe_surface *psurf, bool stencil)
985 {
986    struct fd_resource *rsc = fd_resource(psurf->texture);
987    enum pipe_format pfmt = psurf->format;
988    uint32_t offset;
989    bool ubwc_enabled;
990 
991    debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
992 
993    /* separate stencil case: */
994    if (stencil) {
995       rsc = rsc->stencil;
996       pfmt = rsc->b.b.format;
997    }
998 
999    offset =
1000       fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
1001    ubwc_enabled = fd_resource_ubwc_enabled(rsc, psurf->u.tex.level);
1002 
1003    debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
1004 
1005    uint32_t tile_mode = fd_resource_tile_mode(&rsc->b.b, psurf->u.tex.level);
1006    enum a6xx_format format = fd6_color_format(pfmt, tile_mode);
1007    uint32_t stride = fd_resource_pitch(rsc, psurf->u.tex.level);
1008    uint32_t array_stride = fd_resource_layer_stride(rsc, psurf->u.tex.level);
1009    enum a3xx_color_swap swap = fd6_color_swap(pfmt, rsc->layout.tile_mode);
1010    enum a3xx_msaa_samples samples = fd_msaa_samples(rsc->b.b.nr_samples);
1011 
1012    OUT_REG(ring,
1013            A6XX_RB_BLIT_DST_INFO(.tile_mode = tile_mode, .samples = samples,
1014                                  .color_format = format, .color_swap = swap,
1015                                  .flags = ubwc_enabled),
1016            A6XX_RB_BLIT_DST(.bo = rsc->bo, .bo_offset = offset),
1017            A6XX_RB_BLIT_DST_PITCH(.a6xx_rb_blit_dst_pitch = stride),
1018            A6XX_RB_BLIT_DST_ARRAY_PITCH(.a6xx_rb_blit_dst_array_pitch = array_stride));
1019 
1020    OUT_REG(ring, A6XX_RB_BLIT_BASE_GMEM(.dword = base));
1021 
1022    if (ubwc_enabled) {
1023       OUT_PKT4(ring, REG_A6XX_RB_BLIT_FLAG_DST, 3);
1024       fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level,
1025                               psurf->u.tex.first_layer);
1026    }
1027 
1028    fd6_emit_blit(batch, ring);
1029 }
1030 
1031 static void
emit_restore_blit(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,unsigned buffer)1032 emit_restore_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
1033                   uint32_t base, struct pipe_surface *psurf, unsigned buffer)
1034 {
1035    bool stencil = (buffer == FD_BUFFER_STENCIL);
1036 
1037    OUT_REG(ring, A6XX_RB_BLIT_INFO(.gmem = true, .unk0 = true,
1038                                    .depth = (buffer == FD_BUFFER_DEPTH),
1039                                    .sample_0 = util_format_is_pure_integer(
1040                                       psurf->format)));
1041 
1042    emit_blit(batch, ring, base, psurf, stencil);
1043 }
1044 
1045 static void
emit_clears(struct fd_batch * batch,struct fd_ringbuffer * ring)1046 emit_clears(struct fd_batch *batch, struct fd_ringbuffer *ring)
1047 {
1048    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1049    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1050    enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
1051 
1052    uint32_t buffers = batch->fast_cleared;
1053 
1054    if (buffers & PIPE_CLEAR_COLOR) {
1055 
1056       for (int i = 0; i < pfb->nr_cbufs; i++) {
1057          union pipe_color_union *color = &batch->clear_color[i];
1058          union util_color uc = {0};
1059 
1060          if (!pfb->cbufs[i])
1061             continue;
1062 
1063          if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
1064             continue;
1065 
1066          enum pipe_format pfmt = pfb->cbufs[i]->format;
1067 
1068          // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
1069          union pipe_color_union swapped;
1070          switch (fd6_color_swap(pfmt, TILE6_LINEAR)) {
1071          case WZYX:
1072             swapped.ui[0] = color->ui[0];
1073             swapped.ui[1] = color->ui[1];
1074             swapped.ui[2] = color->ui[2];
1075             swapped.ui[3] = color->ui[3];
1076             break;
1077          case WXYZ:
1078             swapped.ui[2] = color->ui[0];
1079             swapped.ui[1] = color->ui[1];
1080             swapped.ui[0] = color->ui[2];
1081             swapped.ui[3] = color->ui[3];
1082             break;
1083          case ZYXW:
1084             swapped.ui[3] = color->ui[0];
1085             swapped.ui[0] = color->ui[1];
1086             swapped.ui[1] = color->ui[2];
1087             swapped.ui[2] = color->ui[3];
1088             break;
1089          case XYZW:
1090             swapped.ui[3] = color->ui[0];
1091             swapped.ui[2] = color->ui[1];
1092             swapped.ui[1] = color->ui[2];
1093             swapped.ui[0] = color->ui[3];
1094             break;
1095          }
1096 
1097          util_pack_color_union(pfmt, &uc, &swapped);
1098 
1099          OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1100          OUT_RING(ring,
1101                   A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1102                      A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1103                      A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_color_format(pfmt, TILE6_LINEAR)));
1104 
1105          OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1106          OUT_RING(ring,
1107                   A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf));
1108 
1109          OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1110          OUT_RING(ring, gmem->cbuf_base[i]);
1111 
1112          OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1113          OUT_RING(ring, 0);
1114 
1115          OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
1116          OUT_RING(ring, uc.ui[0]);
1117          OUT_RING(ring, uc.ui[1]);
1118          OUT_RING(ring, uc.ui[2]);
1119          OUT_RING(ring, uc.ui[3]);
1120 
1121          fd6_emit_blit(batch, ring);
1122       }
1123    }
1124 
1125    const bool has_depth = pfb->zsbuf;
1126    const bool has_separate_stencil =
1127       has_depth && fd_resource(pfb->zsbuf->texture)->stencil;
1128 
1129    /* First clear depth or combined depth/stencil. */
1130    if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) ||
1131        (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
1132       enum pipe_format pfmt = pfb->zsbuf->format;
1133       uint32_t clear_value;
1134       uint32_t mask = 0;
1135 
1136       if (has_separate_stencil) {
1137          pfmt = util_format_get_depth_only(pfb->zsbuf->format);
1138          clear_value = util_pack_z(pfmt, batch->clear_depth);
1139       } else {
1140          pfmt = pfb->zsbuf->format;
1141          clear_value =
1142             util_pack_z_stencil(pfmt, batch->clear_depth, batch->clear_stencil);
1143       }
1144 
1145       if (buffers & PIPE_CLEAR_DEPTH)
1146          mask |= 0x1;
1147 
1148       if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))
1149          mask |= 0x2;
1150 
1151       OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1152       OUT_RING(ring,
1153                A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1154                   A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1155                   A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_color_format(pfmt, TILE6_LINEAR)));
1156 
1157       OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1158       OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
1159                         // XXX UNK0 for separate stencil ??
1160                         A6XX_RB_BLIT_INFO_DEPTH |
1161                         A6XX_RB_BLIT_INFO_CLEAR_MASK(mask));
1162 
1163       OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1164       OUT_RING(ring, gmem->zsbuf_base[0]);
1165 
1166       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1167       OUT_RING(ring, 0);
1168 
1169       OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
1170       OUT_RING(ring, clear_value);
1171 
1172       fd6_emit_blit(batch, ring);
1173    }
1174 
1175    /* Then clear the separate stencil buffer in case of 32 bit depth
1176     * formats with separate stencil. */
1177    if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
1178       OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1);
1179       OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) |
1180                         A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) |
1181                         A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(FMT6_8_UINT));
1182 
1183       OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1184       OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM |
1185                         // A6XX_RB_BLIT_INFO_UNK0 |
1186                         A6XX_RB_BLIT_INFO_DEPTH |
1187                         A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1));
1188 
1189       OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
1190       OUT_RING(ring, gmem->zsbuf_base[1]);
1191 
1192       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1);
1193       OUT_RING(ring, 0);
1194 
1195       OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1);
1196       OUT_RING(ring, batch->clear_stencil & 0xff);
1197 
1198       fd6_emit_blit(batch, ring);
1199    }
1200 }
1201 
1202 /*
1203  * transfer from system memory to gmem
1204  */
1205 static void
emit_restore_blits(struct fd_batch * batch,struct fd_ringbuffer * ring)1206 emit_restore_blits(struct fd_batch *batch, struct fd_ringbuffer *ring)
1207 {
1208    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1209    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1210 
1211    if (batch->restore & FD_BUFFER_COLOR) {
1212       unsigned i;
1213       for (i = 0; i < pfb->nr_cbufs; i++) {
1214          if (!pfb->cbufs[i])
1215             continue;
1216          if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
1217             continue;
1218          emit_restore_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
1219                            FD_BUFFER_COLOR);
1220       }
1221    }
1222 
1223    if (batch->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
1224       struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
1225 
1226       if (!rsc->stencil || (batch->restore & FD_BUFFER_DEPTH)) {
1227          emit_restore_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,
1228                            FD_BUFFER_DEPTH);
1229       }
1230       if (rsc->stencil && (batch->restore & FD_BUFFER_STENCIL)) {
1231          emit_restore_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,
1232                            FD_BUFFER_STENCIL);
1233       }
1234    }
1235 }
1236 
1237 static void
prepare_tile_setup_ib(struct fd_batch * batch)1238 prepare_tile_setup_ib(struct fd_batch *batch)
1239 {
1240    if (!(batch->restore || batch->fast_cleared))
1241       return;
1242 
1243    batch->tile_setup =
1244       fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
1245 
1246    set_blit_scissor(batch, batch->tile_setup);
1247 
1248    emit_restore_blits(batch, batch->tile_setup);
1249    emit_clears(batch, batch->tile_setup);
1250 }
1251 
1252 /*
1253  * transfer from system memory to gmem
1254  */
1255 static void
fd6_emit_tile_mem2gmem(struct fd_batch * batch,const struct fd_tile * tile)1256 fd6_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
1257 {
1258 }
1259 
1260 /* before IB to rendering cmds: */
1261 static void
fd6_emit_tile_renderprep(struct fd_batch * batch,const struct fd_tile * tile)1262 fd6_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
1263 {
1264    if (!batch->tile_setup)
1265       return;
1266 
1267    trace_start_clear_restore(&batch->trace, batch->gmem, batch->fast_cleared);
1268    if (batch->fast_cleared || !use_hw_binning(batch)) {
1269       fd6_emit_ib(batch->gmem, batch->tile_setup);
1270    } else {
1271       emit_conditional_ib(batch, tile, batch->tile_setup);
1272    }
1273    trace_end_clear_restore(&batch->trace, batch->gmem);
1274 }
1275 
1276 static bool
blit_can_resolve(enum pipe_format format)1277 blit_can_resolve(enum pipe_format format)
1278 {
1279    const struct util_format_description *desc = util_format_description(format);
1280 
1281    /* blit event can only do resolve for simple cases:
1282     * averaging samples as unsigned integers or choosing only one sample
1283     */
1284    if (util_format_is_snorm(format) || util_format_is_srgb(format))
1285       return false;
1286 
1287    /* can't do formats with larger channel sizes
1288     * note: this includes all float formats
1289     * note2: single channel integer formats seem OK
1290     */
1291    if (desc->channel[0].size > 10)
1292       return false;
1293 
1294    switch (format) {
1295    /* for unknown reasons blit event can't msaa resolve these formats when tiled
1296     * likely related to these formats having different layout from other cpp=2
1297     * formats
1298     */
1299    case PIPE_FORMAT_R8G8_UNORM:
1300    case PIPE_FORMAT_R8G8_UINT:
1301    case PIPE_FORMAT_R8G8_SINT:
1302    /* TODO: this one should be able to work? */
1303    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1304       return false;
1305    default:
1306       break;
1307    }
1308 
1309    return true;
1310 }
1311 
1312 static bool
needs_resolve(struct pipe_surface * psurf)1313 needs_resolve(struct pipe_surface *psurf)
1314 {
1315    return psurf->nr_samples &&
1316           (psurf->nr_samples != psurf->texture->nr_samples);
1317 }
1318 
1319 /**
1320  * Returns the UNKNOWN_8C01 value for handling partial depth/stencil
1321  * clear/stores to Z24S8.
1322  */
1323 static uint32_t
fd6_unknown_8c01(enum pipe_format format,unsigned buffers)1324 fd6_unknown_8c01(enum pipe_format format, unsigned buffers)
1325 {
1326    if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
1327       if (buffers == FD_BUFFER_DEPTH)
1328          return 0x08000041;
1329       else if (buffers == FD_BUFFER_STENCIL)
1330          return 0x00084001;
1331    }
1332    return 0;
1333 }
1334 
1335 static void
emit_resolve_blit(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,unsigned buffer)1336 emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring,
1337                   uint32_t base, struct pipe_surface *psurf,
1338                   unsigned buffer) assert_dt
1339 {
1340    uint32_t info = 0;
1341    bool stencil = false;
1342 
1343    if (!fd_resource(psurf->texture)->valid)
1344       return;
1345 
1346    /* if we need to resolve, but cannot with BLIT event, we instead need
1347     * to generate per-tile CP_BLIT (r2d) commands:
1348     *
1349     * The separate-stencil is a special case, we might need to use CP_BLIT
1350     * for depth, but we can still resolve stencil with a BLIT event
1351     */
1352    if (needs_resolve(psurf) && !blit_can_resolve(psurf->format) &&
1353        (buffer != FD_BUFFER_STENCIL)) {
1354       /* We could potentially use fd6_unknown_8c01() to handle partial z/s
1355        * resolve to packed z/s, but we would need a corresponding ability in the
1356        * !resolve case below, so batch_draw_tracking_for_dirty_bits() has us
1357        * just do a restore of the other channel for partial packed z/s writes.
1358        */
1359       fd6_resolve_tile(batch, ring, base, psurf, 0);
1360       return;
1361    }
1362 
1363    switch (buffer) {
1364    case FD_BUFFER_COLOR:
1365       break;
1366    case FD_BUFFER_STENCIL:
1367       info |= A6XX_RB_BLIT_INFO_UNK0;
1368       stencil = true;
1369       break;
1370    case FD_BUFFER_DEPTH:
1371       info |= A6XX_RB_BLIT_INFO_DEPTH;
1372       break;
1373    }
1374 
1375    if (util_format_is_pure_integer(psurf->format) ||
1376        util_format_is_depth_or_stencil(psurf->format))
1377       info |= A6XX_RB_BLIT_INFO_SAMPLE_0;
1378 
1379    OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1);
1380    OUT_RING(ring, info);
1381 
1382    emit_blit(batch, ring, base, psurf, stencil);
1383 }
1384 
1385 /*
1386  * transfer from gmem to system memory (ie. normal RAM)
1387  */
1388 
1389 static void
prepare_tile_fini_ib(struct fd_batch * batch)1390 prepare_tile_fini_ib(struct fd_batch *batch) assert_dt
1391 {
1392    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
1393    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1394    struct fd_ringbuffer *ring;
1395 
1396    batch->tile_fini =
1397       fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
1398    ring = batch->tile_fini;
1399 
1400    set_blit_scissor(batch, ring);
1401 
1402    if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
1403       struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
1404 
1405       if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) {
1406          emit_resolve_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf,
1407                            FD_BUFFER_DEPTH);
1408       }
1409       if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) {
1410          emit_resolve_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf,
1411                            FD_BUFFER_STENCIL);
1412       }
1413    }
1414 
1415    if (batch->resolve & FD_BUFFER_COLOR) {
1416       unsigned i;
1417       for (i = 0; i < pfb->nr_cbufs; i++) {
1418          if (!pfb->cbufs[i])
1419             continue;
1420          if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
1421             continue;
1422          emit_resolve_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i],
1423                            FD_BUFFER_COLOR);
1424       }
1425    }
1426 }
1427 
1428 static void
fd6_emit_tile(struct fd_batch * batch,const struct fd_tile * tile)1429 fd6_emit_tile(struct fd_batch *batch, const struct fd_tile *tile)
1430 {
1431    if (!use_hw_binning(batch)) {
1432       fd6_emit_ib(batch->gmem, batch->draw);
1433    } else {
1434       emit_conditional_ib(batch, tile, batch->draw);
1435    }
1436 
1437    if (batch->epilogue)
1438       fd6_emit_ib(batch->gmem, batch->epilogue);
1439 }
1440 
1441 static void
fd6_emit_tile_gmem2mem(struct fd_batch * batch,const struct fd_tile * tile)1442 fd6_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
1443 {
1444    struct fd_ringbuffer *ring = batch->gmem;
1445 
1446    if (use_hw_binning(batch)) {
1447       OUT_PKT7(ring, CP_SET_MARKER, 1);
1448       OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_ENDVIS));
1449    }
1450 
1451    OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
1452    OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) |
1453                      CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
1454                      CP_SET_DRAW_STATE__0_GROUP_ID(0));
1455    OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0));
1456    OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0));
1457 
1458    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
1459    OUT_RING(ring, 0x0);
1460 
1461    emit_marker6(ring, 7);
1462    OUT_PKT7(ring, CP_SET_MARKER, 1);
1463    OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE));
1464    emit_marker6(ring, 7);
1465 
1466    trace_start_resolve(&batch->trace, batch->gmem);
1467    if (batch->fast_cleared || !use_hw_binning(batch)) {
1468       fd6_emit_ib(batch->gmem, batch->tile_fini);
1469    } else {
1470       emit_conditional_ib(batch, tile, batch->tile_fini);
1471    }
1472    trace_end_resolve(&batch->trace, batch->gmem);
1473 }
1474 
1475 static void
fd6_emit_tile_fini(struct fd_batch * batch)1476 fd6_emit_tile_fini(struct fd_batch *batch)
1477 {
1478    struct fd_ringbuffer *ring = batch->gmem;
1479 
1480    emit_common_fini(batch);
1481 
1482    OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1);
1483    OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE);
1484 
1485    fd6_emit_lrz_flush(ring);
1486 
1487    fd6_event_write(batch, ring, PC_CCU_RESOLVE_TS, true);
1488 
1489    if (use_hw_binning(batch)) {
1490       check_vsc_overflow(batch->ctx);
1491    }
1492 }
1493 
1494 static void
emit_sysmem_clears(struct fd_batch * batch,struct fd_ringbuffer * ring)1495 emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt
1496 {
1497    struct fd_context *ctx = batch->ctx;
1498    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1499 
1500    uint32_t buffers = batch->fast_cleared;
1501 
1502    if (!buffers)
1503       return;
1504 
1505    trace_start_clear_restore(&batch->trace, ring, buffers);
1506 
1507    if (buffers & PIPE_CLEAR_COLOR) {
1508       for (int i = 0; i < pfb->nr_cbufs; i++) {
1509          union pipe_color_union color = batch->clear_color[i];
1510 
1511          if (!pfb->cbufs[i])
1512             continue;
1513 
1514          if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
1515             continue;
1516 
1517          fd6_clear_surface(ctx, ring, pfb->cbufs[i], pfb->width, pfb->height,
1518                            &color, 0);
1519       }
1520    }
1521    if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
1522       union pipe_color_union value = {};
1523 
1524       const bool has_depth = pfb->zsbuf;
1525       struct pipe_resource *separate_stencil =
1526          has_depth && fd_resource(pfb->zsbuf->texture)->stencil
1527             ? &fd_resource(pfb->zsbuf->texture)->stencil->b.b
1528             : NULL;
1529 
1530       if ((buffers & PIPE_CLEAR_DEPTH) || (!separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) {
1531          value.f[0] = batch->clear_depth;
1532          value.ui[1] = batch->clear_stencil;
1533          fd6_clear_surface(ctx, ring, pfb->zsbuf, pfb->width, pfb->height,
1534                            &value, fd6_unknown_8c01(pfb->zsbuf->format, buffers));
1535       }
1536 
1537       if (separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) {
1538          value.ui[0] = batch->clear_stencil;
1539 
1540          struct pipe_surface stencil_surf = *pfb->zsbuf;
1541          stencil_surf.format = PIPE_FORMAT_S8_UINT;
1542          stencil_surf.texture = separate_stencil;
1543 
1544          fd6_clear_surface(ctx, ring, &stencil_surf, pfb->width, pfb->height,
1545                            &value, 0);
1546       }
1547    }
1548 
1549    fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
1550    fd_wfi(batch, ring);
1551 
1552    trace_end_clear_restore(&batch->trace, ring);
1553 }
1554 
1555 static void
fd6_emit_sysmem_prep(struct fd_batch * batch)1556 fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt
1557 {
1558    struct fd_ringbuffer *ring = batch->gmem;
1559    struct fd_screen *screen = batch->ctx->screen;
1560 
1561    fd6_emit_restore(batch, ring);
1562    fd6_emit_lrz_flush(ring);
1563 
1564    if (batch->prologue) {
1565       if (!batch->nondraw) {
1566          trace_start_prologue(&batch->trace, ring);
1567       }
1568       fd6_emit_ib(ring, batch->prologue);
1569       if (!batch->nondraw) {
1570          trace_end_prologue(&batch->trace, ring);
1571       }
1572    }
1573 
1574    /* remaining setup below here does not apply to blit/compute: */
1575    if (batch->nondraw)
1576       return;
1577 
1578    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
1579 
1580    if (pfb->width > 0 && pfb->height > 0)
1581       set_scissor(ring, 0, 0, pfb->width - 1, pfb->height - 1);
1582    else
1583       set_scissor(ring, 0, 0, 0, 0);
1584 
1585    set_window_offset(ring, 0, 0);
1586 
1587    set_bin_size(ring, 0, 0, 0xc00000); /* 0xc00000 = BYPASS? */
1588 
1589    emit_sysmem_clears(batch, ring);
1590 
1591    emit_marker6(ring, 7);
1592    OUT_PKT7(ring, CP_SET_MARKER, 1);
1593    OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS));
1594    emit_marker6(ring, 7);
1595 
1596    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1597    OUT_RING(ring, 0x0);
1598 
1599    /* blob controls "local" in IB2, but I think that is not required */
1600    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1);
1601    OUT_RING(ring, 0x1);
1602 
1603    fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
1604    fd6_cache_inv(batch, ring);
1605 
1606    fd_wfi(batch, ring);
1607    OUT_REG(ring, A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_bypass));
1608 
1609    /* enable stream-out, with sysmem there is only one pass: */
1610    OUT_REG(ring, A6XX_VPC_SO_DISABLE(false));
1611 
1612    OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
1613    OUT_RING(ring, 0x1);
1614 
1615    emit_zs(ring, pfb->zsbuf, NULL);
1616    emit_mrt(ring, pfb, NULL);
1617    emit_msaa(ring, pfb->samples);
1618    patch_fb_read_sysmem(batch);
1619 
1620    update_render_cntl(batch, pfb, false);
1621 
1622    emit_common_init(batch);
1623 }
1624 
1625 static void
fd6_emit_sysmem_fini(struct fd_batch * batch)1626 fd6_emit_sysmem_fini(struct fd_batch *batch) assert_dt
1627 {
1628    struct fd_ringbuffer *ring = batch->gmem;
1629 
1630    emit_common_fini(batch);
1631 
1632    if (batch->epilogue)
1633       fd6_emit_ib(batch->gmem, batch->epilogue);
1634 
1635    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
1636    OUT_RING(ring, 0x0);
1637 
1638    fd6_emit_lrz_flush(ring);
1639 
1640    fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
1641    fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
1642    fd_wfi(batch, ring);
1643 }
1644 
1645 void
fd6_gmem_init(struct pipe_context * pctx)1646 fd6_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
1647 {
1648    struct fd_context *ctx = fd_context(pctx);
1649 
1650    ctx->emit_tile_init = fd6_emit_tile_init;
1651    ctx->emit_tile_prep = fd6_emit_tile_prep;
1652    ctx->emit_tile_mem2gmem = fd6_emit_tile_mem2gmem;
1653    ctx->emit_tile_renderprep = fd6_emit_tile_renderprep;
1654    ctx->emit_tile = fd6_emit_tile;
1655    ctx->emit_tile_gmem2mem = fd6_emit_tile_gmem2mem;
1656    ctx->emit_tile_fini = fd6_emit_tile_fini;
1657    ctx->emit_sysmem_prep = fd6_emit_sysmem_prep;
1658    ctx->emit_sysmem_fini = fd6_emit_sysmem_fini;
1659 }
1660