1 /*
2  * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "pipe/p_state.h"
28 #include "util/format/u_format.h"
29 #include "util/u_inlines.h"
30 #include "util/u_memory.h"
31 #include "util/u_string.h"
32 
33 #include "freedreno_draw.h"
34 #include "freedreno_resource.h"
35 #include "freedreno_state.h"
36 
37 #include "fd5_context.h"
38 #include "fd5_draw.h"
39 #include "fd5_emit.h"
40 #include "fd5_format.h"
41 #include "fd5_gmem.h"
42 #include "fd5_program.h"
43 #include "fd5_zsa.h"
44 
45 static void
emit_mrt(struct fd_ringbuffer * ring,unsigned nr_bufs,struct pipe_surface ** bufs,const struct fd_gmem_stateobj * gmem)46 emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
47          struct pipe_surface **bufs, const struct fd_gmem_stateobj *gmem)
48 {
49    enum a5xx_tile_mode tile_mode;
50    unsigned i;
51 
52    for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) {
53       enum a5xx_color_fmt format = 0;
54       enum a3xx_color_swap swap = WZYX;
55       bool srgb = false, sint = false, uint = false;
56       struct fd_resource *rsc = NULL;
57       struct fdl_slice *slice = NULL;
58       uint32_t stride = 0;
59       uint32_t size = 0;
60       uint32_t base = 0;
61       uint32_t offset = 0;
62 
63       if (gmem) {
64          tile_mode = TILE5_2;
65       } else {
66          tile_mode = TILE5_LINEAR;
67       }
68 
69       if ((i < nr_bufs) && bufs[i]) {
70          struct pipe_surface *psurf = bufs[i];
71          enum pipe_format pformat = psurf->format;
72 
73          rsc = fd_resource(psurf->texture);
74 
75          slice = fd_resource_slice(rsc, psurf->u.tex.level);
76          format = fd5_pipe2color(pformat);
77          swap = fd5_pipe2swap(pformat);
78          srgb = util_format_is_srgb(pformat);
79          sint = util_format_is_pure_sint(pformat);
80          uint = util_format_is_pure_uint(pformat);
81 
82          debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
83 
84          offset = fd_resource_offset(rsc, psurf->u.tex.level,
85                                      psurf->u.tex.first_layer);
86 
87          if (gmem) {
88             stride = gmem->bin_w * gmem->cbuf_cpp[i];
89             size = stride * gmem->bin_h;
90             base = gmem->cbuf_base[i];
91          } else {
92             stride = fd_resource_pitch(rsc, psurf->u.tex.level);
93             size = slice->size0;
94 
95             tile_mode =
96                fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
97          }
98       }
99 
100       OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5);
101       OUT_RING(
102          ring,
103          A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
104             A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) |
105             A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) |
106             COND(gmem,
107                  0x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */
108             COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB));
109       OUT_RING(ring, A5XX_RB_MRT_PITCH(stride));
110       OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size));
111       if (gmem || (i >= nr_bufs) || !bufs[i]) {
112          OUT_RING(ring, base);       /* RB_MRT[i].BASE_LO */
113          OUT_RING(ring, 0x00000000); /* RB_MRT[i].BASE_HI */
114       } else {
115          debug_assert((offset + size) <= fd_bo_size(rsc->bo));
116          OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */
117       }
118 
119       OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);
120       OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) |
121                         COND(sint, A5XX_SP_FS_MRT_REG_COLOR_SINT) |
122                         COND(uint, A5XX_SP_FS_MRT_REG_COLOR_UINT) |
123                         COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB));
124 
125       /* when we support UBWC, these would be the system memory
126        * addr/pitch/etc:
127        */
128       OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4);
129       OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */
130       OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */
131       OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0));
132       OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0));
133    }
134 }
135 
136 static void
emit_zs(struct fd_ringbuffer * ring,struct pipe_surface * zsbuf,const struct fd_gmem_stateobj * gmem)137 emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf,
138         const struct fd_gmem_stateobj *gmem)
139 {
140    if (zsbuf) {
141       struct fd_resource *rsc = fd_resource(zsbuf->texture);
142       enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format);
143       uint32_t cpp = rsc->layout.cpp;
144       uint32_t stride = 0;
145       uint32_t size = 0;
146 
147       if (gmem) {
148          stride = cpp * gmem->bin_w;
149          size = stride * gmem->bin_h;
150       } else {
151          stride = fd_resource_pitch(rsc, 0);
152          size = fd_resource_slice(rsc, 0)->size0;
153       }
154 
155       OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
156       OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
157       if (gmem) {
158          OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */
159          OUT_RING(ring, 0x00000000);          /* RB_DEPTH_BUFFER_BASE_HI */
160       } else {
161          OUT_RELOC(ring, rsc->bo, 0, 0, 0); /* RB_DEPTH_BUFFER_BASE_LO/HI */
162       }
163       OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride));
164       OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size));
165 
166       OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
167       OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt));
168 
169       OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
170       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
171       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
172       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
173 
174       if (rsc->lrz) {
175          OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
176          OUT_RELOC(ring, rsc->lrz, 0x1000, 0, 0);
177          OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch));
178 
179          OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
180          OUT_RELOC(ring, rsc->lrz, 0, 0, 0);
181       } else {
182          OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3);
183          OUT_RING(ring, 0x00000000);
184          OUT_RING(ring, 0x00000000);
185          OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */
186 
187          OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2);
188          OUT_RING(ring, 0x00000000);
189          OUT_RING(ring, 0x00000000);
190       }
191 
192       if (rsc->stencil) {
193          if (gmem) {
194             stride = 1 * gmem->bin_w;
195             size = stride * gmem->bin_h;
196          } else {
197             stride = fd_resource_pitch(rsc->stencil, 0);
198             size = fd_resource_slice(rsc->stencil, 0)->size0;
199          }
200 
201          OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5);
202          OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL);
203          if (gmem) {
204             OUT_RING(ring, gmem->zsbuf_base[1]); /* RB_STENCIL_BASE_LO */
205             OUT_RING(ring, 0x00000000);          /* RB_STENCIL_BASE_HI */
206          } else {
207             OUT_RELOC(ring, rsc->stencil->bo, 0, 0,
208                       0); /* RB_STENCIL_BASE_LO/HI */
209          }
210          OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride));
211          OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size));
212       } else {
213          OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
214          OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
215       }
216    } else {
217       OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5);
218       OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
219       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */
220       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */
221       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */
222       OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */
223 
224       OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1);
225       OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE));
226 
227       OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3);
228       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */
229       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */
230       OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */
231 
232       OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1);
233       OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */
234    }
235 }
236 
237 static bool
use_hw_binning(struct fd_batch * batch)238 use_hw_binning(struct fd_batch *batch)
239 {
240    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
241 
242    if ((gmem->maxpw * gmem->maxph) > 32)
243       return false;
244 
245    if ((gmem->maxpw > 15) || (gmem->maxph > 15))
246       return false;
247 
248    return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) &&
249           (batch->num_draws > 0);
250 }
251 
252 static void
patch_draws(struct fd_batch * batch,enum pc_di_vis_cull_mode vismode)253 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
254 {
255    unsigned i;
256    for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
257       struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
258       *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
259    }
260    util_dynarray_clear(&batch->draw_patches);
261 }
262 
263 static void
update_vsc_pipe(struct fd_batch * batch)264 update_vsc_pipe(struct fd_batch *batch) assert_dt
265 {
266    struct fd_context *ctx = batch->ctx;
267    struct fd5_context *fd5_ctx = fd5_context(ctx);
268    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
269    struct fd_ringbuffer *ring = batch->gmem;
270    int i;
271 
272    OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3);
273    OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
274                      A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
275    OUT_RELOC(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */
276 
277    OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2);
278    OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */
279    OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */
280 
281    OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16);
282    for (i = 0; i < 16; i++) {
283       const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
284       OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
285                         A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
286                         A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
287                         A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
288    }
289 
290    OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32);
291    for (i = 0; i < 16; i++) {
292       if (!ctx->vsc_pipe_bo[i]) {
293          ctx->vsc_pipe_bo[i] = fd_bo_new(
294             ctx->dev, 0x20000, 0, "vsc_pipe[%u]", i);
295       }
296       OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0,
297                 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */
298    }
299 
300    OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16);
301    for (i = 0; i < 16; i++) {
302       OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) -
303                         32); /* VSC_PIPE_DATA_LENGTH[i] */
304    }
305 }
306 
307 static void
emit_binning_pass(struct fd_batch * batch)308 emit_binning_pass(struct fd_batch *batch) assert_dt
309 {
310    struct fd_ringbuffer *ring = batch->gmem;
311    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
312 
313    uint32_t x1 = gmem->minx;
314    uint32_t y1 = gmem->miny;
315    uint32_t x2 = gmem->minx + gmem->width - 1;
316    uint32_t y2 = gmem->miny + gmem->height - 1;
317 
318    fd5_set_render_mode(batch->ctx, ring, BINNING);
319 
320    OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
321    OUT_RING(ring,
322             A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
323 
324    OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
325    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
326                      A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
327    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
328                      A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
329 
330    OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
331    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
332    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
333 
334    update_vsc_pipe(batch);
335 
336    OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
337    OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS);
338 
339    fd5_event_write(batch, ring, UNK_2C, false);
340 
341    OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
342    OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
343 
344    /* emit IB to binning drawcmds: */
345    fd5_emit_ib(ring, batch->binning);
346 
347    fd_reset_wfi(batch);
348 
349    fd5_event_write(batch, ring, UNK_2D, false);
350 
351    fd5_event_write(batch, ring, CACHE_FLUSH_TS, true);
352 
353    // TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??)
354 
355    fd_wfi(batch, ring);
356 
357    OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1);
358    OUT_RING(ring, 0x0);
359 }
360 
361 /* before first tile */
362 static void
fd5_emit_tile_init(struct fd_batch * batch)363 fd5_emit_tile_init(struct fd_batch *batch) assert_dt
364 {
365    struct fd_ringbuffer *ring = batch->gmem;
366    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
367 
368    fd5_emit_restore(batch, ring);
369 
370    if (batch->prologue)
371       fd5_emit_ib(ring, batch->prologue);
372 
373    fd5_emit_lrz_flush(batch, ring);
374 
375    OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);
376    OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */
377 
378    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
379    OUT_RING(ring, 0x0);
380 
381    OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
382    OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
383 
384    OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
385    OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
386 
387    /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
388    fd_wfi(batch, ring);
389    OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
390    OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */
391 
392    emit_zs(ring, pfb->zsbuf, batch->gmem_state);
393    emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, batch->gmem_state);
394 
395    /* Enable stream output for the first pass (likely the binning). */
396    OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
397    OUT_RING(ring, 0);
398 
399    if (use_hw_binning(batch)) {
400       emit_binning_pass(batch);
401 
402       /* Disable stream output after binning, since each VS output should get
403        * streamed out once.
404        */
405       OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
406       OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE);
407 
408       fd5_emit_lrz_flush(batch, ring);
409       patch_draws(batch, USE_VISIBILITY);
410    } else {
411       patch_draws(batch, IGNORE_VISIBILITY);
412    }
413 
414    fd5_set_render_mode(batch->ctx, ring, GMEM);
415 
416    /* XXX If we're in gmem mode but not doing HW binning, then after the first
417     * tile we should disable stream output (fd6_gmem.c doesn't do that either).
418     */
419 }
420 
421 /* before mem2gmem */
422 static void
fd5_emit_tile_prep(struct fd_batch * batch,const struct fd_tile * tile)423 fd5_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) assert_dt
424 {
425    struct fd_context *ctx = batch->ctx;
426    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
427    struct fd5_context *fd5_ctx = fd5_context(ctx);
428    struct fd_ringbuffer *ring = batch->gmem;
429 
430    uint32_t x1 = tile->xoff;
431    uint32_t y1 = tile->yoff;
432    uint32_t x2 = tile->xoff + tile->bin_w - 1;
433    uint32_t y2 = tile->yoff + tile->bin_h - 1;
434 
435    OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
436    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) |
437                      A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1));
438    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) |
439                      A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2));
440 
441    OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
442    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1));
443    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2));
444 
445    if (use_hw_binning(batch)) {
446       const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p];
447       struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
448 
449       OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
450 
451       OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
452       OUT_RING(ring, 0x0);
453 
454       OUT_PKT7(ring, CP_SET_BIN_DATA5, 5);
455       OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) |
456                         CP_SET_BIN_DATA5_0_VSC_N(tile->n));
457       OUT_RELOC(ring, pipe_bo, 0, 0, 0);     /* VSC_PIPE[p].DATA_ADDRESS */
458       OUT_RELOC(ring, fd5_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */
459                 (tile->p * 4), 0, 0);
460    } else {
461       OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
462       OUT_RING(ring, 0x1);
463    }
464 
465    OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
466    OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) | A5XX_RB_WINDOW_OFFSET_Y(y1));
467 }
468 
469 /*
470  * transfer from system memory to gmem
471  */
472 
473 static void
emit_mem2gmem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf,enum a5xx_blit_buf buf)474 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
475                    struct pipe_surface *psurf, enum a5xx_blit_buf buf)
476 {
477    struct fd_ringbuffer *ring = batch->gmem;
478    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
479    struct fd_resource *rsc = fd_resource(psurf->texture);
480    uint32_t stride, size;
481 
482    debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
483 
484    if (buf == BLIT_S)
485       rsc = rsc->stencil;
486 
487    if ((buf == BLIT_ZS) || (buf == BLIT_S)) {
488       // XXX hack import via BLIT_MRT0 instead of BLIT_ZS, since I don't
489       // know otherwise how to go from linear in sysmem to tiled in gmem.
490       // possibly we want to flip this around gmem2mem and keep depth
491       // tiled in sysmem (and fixup sampler state to assume tiled).. this
492       // might be required for doing depth/stencil in bypass mode?
493       struct fdl_slice *slice = fd_resource_slice(rsc, 0);
494       enum a5xx_color_fmt format =
495          fd5_pipe2color(fd_gmem_restore_format(rsc->b.b.format));
496 
497       OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5);
498       OUT_RING(ring,
499                A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
500                   A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->layout.tile_mode) |
501                   A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX));
502       OUT_RING(ring, A5XX_RB_MRT_PITCH(fd_resource_pitch(rsc, 0)));
503       OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(slice->size0));
504       OUT_RELOC(ring, rsc->bo, 0, 0, 0); /* BASE_LO/HI */
505 
506       buf = BLIT_MRT0;
507    }
508 
509    stride = gmem->bin_w << fdl_cpp_shift(&rsc->layout);
510    size = stride * gmem->bin_h;
511 
512    OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
513    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
514    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
515    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
516    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
517 
518    OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
519    OUT_RING(ring, 0x00000000); /* RB_RESOLVE_CNTL_3 */
520    OUT_RING(ring, base);       /* RB_BLIT_DST_LO */
521    OUT_RING(ring, 0x00000000); /* RB_BLIT_DST_HI */
522    OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(stride));
523    OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(size));
524 
525    OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
526    OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
527 
528    fd5_emit_blit(batch, ring);
529 }
530 
531 static void
fd5_emit_tile_mem2gmem(struct fd_batch * batch,const struct fd_tile * tile)532 fd5_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile)
533 {
534    struct fd_ringbuffer *ring = batch->gmem;
535    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
536    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
537 
538    /*
539     * setup mrt and zs with system memory base addresses:
540     */
541 
542    emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
543    //	emit_zs(ring, pfb->zsbuf, NULL);
544 
545    OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
546    OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) |
547                      A5XX_RB_CNTL_HEIGHT(gmem->bin_h) | A5XX_RB_CNTL_BYPASS);
548 
549    if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) {
550       unsigned i;
551       for (i = 0; i < pfb->nr_cbufs; i++) {
552          if (!pfb->cbufs[i])
553             continue;
554          if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i)))
555             continue;
556          emit_mem2gmem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
557                             BLIT_MRT0 + i);
558       }
559    }
560 
561    if (fd_gmem_needs_restore(batch, tile,
562                              FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
563       struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
564 
565       if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH))
566          emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
567       if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL))
568          emit_mem2gmem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
569    }
570 }
571 
572 /* before IB to rendering cmds: */
573 static void
fd5_emit_tile_renderprep(struct fd_batch * batch,const struct fd_tile * tile)574 fd5_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile)
575 {
576    struct fd_ringbuffer *ring = batch->gmem;
577    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
578    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
579 
580    OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
581    OUT_RING(ring,
582             A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h));
583 
584    emit_zs(ring, pfb->zsbuf, gmem);
585    emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem);
586 
587    enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples);
588 
589    OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
590    OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(samples));
591    OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(samples) |
592                      COND(samples == MSAA_ONE,
593                           A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE));
594 
595    OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
596    OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(samples));
597    OUT_RING(ring,
598             A5XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) |
599                COND(samples == MSAA_ONE, A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE));
600 
601    OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
602    OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(samples));
603    OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(samples) |
604                      COND(samples == MSAA_ONE,
605                           A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE));
606 }
607 
608 /*
609  * transfer from gmem to system memory (ie. normal RAM)
610  */
611 
612 static void
emit_gmem2mem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf,enum a5xx_blit_buf buf)613 emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
614                    struct pipe_surface *psurf, enum a5xx_blit_buf buf)
615 {
616    struct fd_ringbuffer *ring = batch->gmem;
617    struct fd_resource *rsc = fd_resource(psurf->texture);
618    struct fdl_slice *slice;
619    bool tiled;
620    uint32_t offset, pitch;
621 
622    if (!rsc->valid)
623       return;
624 
625    if (buf == BLIT_S)
626       rsc = rsc->stencil;
627 
628    slice = fd_resource_slice(rsc, psurf->u.tex.level);
629    offset =
630       fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
631    pitch = fd_resource_pitch(rsc, psurf->u.tex.level);
632 
633    debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
634 
635    OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4);
636    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */
637    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */
638    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */
639    OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */
640 
641    tiled = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level);
642 
643    OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5);
644    OUT_RING(ring, 0x00000004 | /* XXX RB_RESOLVE_CNTL_3 */
645                      COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED));
646    OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */
647    OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(pitch));
648    OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0));
649 
650    OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
651    OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf));
652 
653    //	bool msaa_resolve = pfb->samples > 1;
654    bool msaa_resolve = false;
655    OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1);
656    OUT_RING(ring, COND(msaa_resolve, A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE));
657 
658    fd5_emit_blit(batch, ring);
659 }
660 
661 static void
fd5_emit_tile_gmem2mem(struct fd_batch * batch,const struct fd_tile * tile)662 fd5_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
663 {
664    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
665    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
666 
667    if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
668       struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
669 
670       if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH))
671          emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS);
672       if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL))
673          emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S);
674    }
675 
676    if (batch->resolve & FD_BUFFER_COLOR) {
677       unsigned i;
678       for (i = 0; i < pfb->nr_cbufs; i++) {
679          if (!pfb->cbufs[i])
680             continue;
681          if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i)))
682             continue;
683          emit_gmem2mem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i],
684                             BLIT_MRT0 + i);
685       }
686    }
687 }
688 
689 static void
fd5_emit_tile_fini(struct fd_batch * batch)690 fd5_emit_tile_fini(struct fd_batch *batch) assert_dt
691 {
692    struct fd_ringbuffer *ring = batch->gmem;
693 
694    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
695    OUT_RING(ring, 0x0);
696 
697    fd5_emit_lrz_flush(batch, ring);
698 
699    fd5_cache_flush(batch, ring);
700    fd5_set_render_mode(batch->ctx, ring, BYPASS);
701 }
702 
703 static void
fd5_emit_sysmem_prep(struct fd_batch * batch)704 fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt
705 {
706    struct fd_ringbuffer *ring = batch->gmem;
707 
708    fd5_emit_restore(batch, ring);
709 
710    fd5_emit_lrz_flush(batch, ring);
711 
712    if (batch->prologue)
713       fd5_emit_ib(ring, batch->prologue);
714 
715    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
716    OUT_RING(ring, 0x0);
717 
718    fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
719 
720    OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1);
721    OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */
722 
723    OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1);
724    OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */
725 
726    /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */
727    fd_wfi(batch, ring);
728    OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1);
729    OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */
730 
731    OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);
732    OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) | A5XX_RB_CNTL_HEIGHT(0) |
733                      A5XX_RB_CNTL_BYPASS);
734 
735    /* remaining setup below here does not apply to blit/compute: */
736    if (batch->nondraw)
737       return;
738 
739    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
740 
741    OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
742    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
743                      A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
744    OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
745                      A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
746 
747    OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2);
748    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0));
749    OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) |
750                      A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1));
751 
752    OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1);
753    OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0));
754 
755    /* Enable stream output, since there's no binning pass to put it in. */
756    OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1);
757    OUT_RING(ring, 0);
758 
759    OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1);
760    OUT_RING(ring, 0x1);
761 
762    patch_draws(batch, IGNORE_VISIBILITY);
763 
764    emit_zs(ring, pfb->zsbuf, NULL);
765    emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL);
766 
767    OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2);
768    OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
769    OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
770                      A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE);
771 
772    OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2);
773    OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
774    OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
775                      A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE);
776 
777    OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2);
778    OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE));
779    OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) |
780                      A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE);
781 }
782 
783 static void
fd5_emit_sysmem_fini(struct fd_batch * batch)784 fd5_emit_sysmem_fini(struct fd_batch *batch)
785 {
786    struct fd_ringbuffer *ring = batch->gmem;
787 
788    OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1);
789    OUT_RING(ring, 0x0);
790 
791    fd5_emit_lrz_flush(batch, ring);
792 
793    fd5_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
794    fd5_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
795 }
796 
797 void
fd5_gmem_init(struct pipe_context * pctx)798 fd5_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
799 {
800    struct fd_context *ctx = fd_context(pctx);
801 
802    ctx->emit_tile_init = fd5_emit_tile_init;
803    ctx->emit_tile_prep = fd5_emit_tile_prep;
804    ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem;
805    ctx->emit_tile_renderprep = fd5_emit_tile_renderprep;
806    ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem;
807    ctx->emit_tile_fini = fd5_emit_tile_fini;
808    ctx->emit_sysmem_prep = fd5_emit_sysmem_prep;
809    ctx->emit_sysmem_fini = fd5_emit_sysmem_fini;
810 }
811