1 /*
2  * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #include "pipe/p_state.h"
28 #include "util/u_inlines.h"
29 #include "util/u_memory.h"
30 #include "util/u_string.h"
31 
32 #include "freedreno_draw.h"
33 #include "freedreno_resource.h"
34 #include "freedreno_state.h"
35 
36 #include "ir2/instr-a2xx.h"
37 #include "fd2_context.h"
38 #include "fd2_draw.h"
39 #include "fd2_emit.h"
40 #include "fd2_gmem.h"
41 #include "fd2_program.h"
42 #include "fd2_util.h"
43 #include "fd2_zsa.h"
44 
45 static uint32_t
fmt2swap(enum pipe_format format)46 fmt2swap(enum pipe_format format)
47 {
48    switch (format) {
49    case PIPE_FORMAT_B8G8R8A8_UNORM:
50    case PIPE_FORMAT_B8G8R8X8_UNORM:
51    case PIPE_FORMAT_B5G6R5_UNORM:
52    case PIPE_FORMAT_B5G5R5A1_UNORM:
53    case PIPE_FORMAT_B5G5R5X1_UNORM:
54    case PIPE_FORMAT_B4G4R4A4_UNORM:
55    case PIPE_FORMAT_B4G4R4X4_UNORM:
56    case PIPE_FORMAT_B2G3R3_UNORM:
57       return 1;
58    default:
59       return 0;
60    }
61 }
62 
63 static bool
use_hw_binning(struct fd_batch * batch)64 use_hw_binning(struct fd_batch *batch)
65 {
66    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
67 
68    /* we hardcoded a limit of 8 "pipes", we can increase this limit
69     * at the cost of a slightly larger command stream
70     * however very few cases will need more than 8
71     * gmem->num_vsc_pipes == 0 means empty batch (TODO: does it still happen?)
72     */
73    if (gmem->num_vsc_pipes > 8 || !gmem->num_vsc_pipes)
74       return false;
75 
76    /* only a20x hw binning is implement
77     * a22x is more like a3xx, but perhaps the a20x works? (TODO)
78     */
79    if (!is_a20x(batch->ctx->screen))
80       return false;
81 
82    return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
83 }
84 
85 /* transfer from gmem to system memory (ie. normal RAM) */
86 
87 static void
emit_gmem2mem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf)88 emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
89                    struct pipe_surface *psurf)
90 {
91    struct fd_ringbuffer *ring = batch->tile_fini;
92    struct fd_resource *rsc = fd_resource(psurf->texture);
93    uint32_t offset =
94       fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
95    enum pipe_format format = fd_gmem_restore_format(psurf->format);
96    uint32_t pitch = fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level);
97 
98    assert((pitch & 31) == 0);
99    assert((offset & 0xfff) == 0);
100 
101    if (!rsc->valid)
102       return;
103 
104    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
105    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
106    OUT_RING(ring, A2XX_RB_COLOR_INFO_BASE(base) |
107                      A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
108 
109    OUT_PKT3(ring, CP_SET_CONSTANT, 5);
110    OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
111    OUT_RING(ring, 0x00000000);             /* RB_COPY_CONTROL */
112    OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */
113    OUT_RING(ring, pitch >> 5);             /* RB_COPY_DEST_PITCH */
114    OUT_RING(ring,                          /* RB_COPY_DEST_INFO */
115             A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(format)) |
116                COND(!rsc->layout.tile_mode, A2XX_RB_COPY_DEST_INFO_LINEAR) |
117                A2XX_RB_COPY_DEST_INFO_WRITE_RED |
118                A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
119                A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
120                A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
121 
122    if (!is_a20x(batch->ctx->screen)) {
123       OUT_WFI(ring);
124 
125       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
126       OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
127       OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
128       OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
129    }
130 
131    fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
132            DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
133 }
134 
135 static void
prepare_tile_fini_ib(struct fd_batch * batch)136 prepare_tile_fini_ib(struct fd_batch *batch) assert_dt
137 {
138    struct fd_context *ctx = batch->ctx;
139    struct fd2_context *fd2_ctx = fd2_context(ctx);
140    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
141    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
142    struct fd_ringbuffer *ring;
143 
144    batch->tile_fini =
145       fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
146    ring = batch->tile_fini;
147 
148    fd2_emit_vertex_bufs(ring, 0x9c,
149                         (struct fd2_vertex_buf[]){
150                            {.prsc = fd2_ctx->solid_vertexbuf, .size = 36},
151                         },
152                         1);
153 
154    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
155    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
156    OUT_RING(ring, 0x00000000); /* PA_SC_WINDOW_OFFSET */
157 
158    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
159    OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
160    OUT_RING(ring, 0);
161 
162    if (!is_a20x(ctx->screen)) {
163       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
164       OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
165       OUT_RING(ring, 0x0000028f);
166    }
167 
168    fd2_program_emit(ctx, ring, &ctx->solid_prog);
169 
170    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
171    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
172    OUT_RING(ring, 0x0000ffff);
173 
174    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
175    OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
176    OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
177 
178    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
179    OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
180    OUT_RING(
181       ring,
182       A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
183          A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
184          A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
185 
186    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
187    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
188    OUT_RING(ring, xy2d(0, 0));                    /* PA_SC_WINDOW_SCISSOR_TL */
189    OUT_RING(ring, xy2d(pfb->width, pfb->height)); /* PA_SC_WINDOW_SCISSOR_BR */
190 
191    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
192    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
193    OUT_RING(ring, 0x00000000);
194 
195    OUT_PKT3(ring, CP_SET_CONSTANT, 5);
196    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
197    OUT_RING(ring, fui((float)gmem->bin_w / 2.0)); /* XSCALE */
198    OUT_RING(ring, fui((float)gmem->bin_w / 2.0)); /* XOFFSET */
199    OUT_RING(ring, fui((float)gmem->bin_h / 2.0)); /* YSCALE */
200    OUT_RING(ring, fui((float)gmem->bin_h / 2.0)); /* YOFFSET */
201 
202    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
203    OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
204    OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY));
205 
206    if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
207       emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
208 
209    if (batch->resolve & FD_BUFFER_COLOR)
210       emit_gmem2mem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
211 
212    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
213    OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
214    OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
215 
216    if (!is_a20x(ctx->screen)) {
217       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
218       OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
219       OUT_RING(ring, 0x0000003b);
220    }
221 }
222 
223 static void
fd2_emit_tile_gmem2mem(struct fd_batch * batch,const struct fd_tile * tile)224 fd2_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
225 {
226    fd2_emit_ib(batch->gmem, batch->tile_fini);
227 }
228 
229 /* transfer from system memory to gmem */
230 
231 static void
emit_mem2gmem_surf(struct fd_batch * batch,uint32_t base,struct pipe_surface * psurf)232 emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
233                    struct pipe_surface *psurf)
234 {
235    struct fd_ringbuffer *ring = batch->gmem;
236    struct fd_resource *rsc = fd_resource(psurf->texture);
237    uint32_t offset =
238       fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
239    enum pipe_format format = fd_gmem_restore_format(psurf->format);
240 
241    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
242    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
243    OUT_RING(ring, A2XX_RB_COLOR_INFO_BASE(base) |
244                      A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
245 
246    /* emit fb as a texture: */
247    OUT_PKT3(ring, CP_SET_CONSTANT, 7);
248    OUT_RING(ring, 0x00010000);
249    OUT_RING(ring, A2XX_SQ_TEX_0_CLAMP_X(SQ_TEX_WRAP) |
250                      A2XX_SQ_TEX_0_CLAMP_Y(SQ_TEX_WRAP) |
251                      A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) |
252                      A2XX_SQ_TEX_0_PITCH(
253                         fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level)));
254    OUT_RELOC(ring, rsc->bo, offset,
255              A2XX_SQ_TEX_1_FORMAT(fd2_pipe2surface(format).format) |
256                 A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL),
257              0);
258    OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) |
259                      A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1));
260    OUT_RING(ring, A2XX_SQ_TEX_3_MIP_FILTER(SQ_TEX_FILTER_BASEMAP) |
261                      A2XX_SQ_TEX_3_SWIZ_X(0) | A2XX_SQ_TEX_3_SWIZ_Y(1) |
262                      A2XX_SQ_TEX_3_SWIZ_Z(2) | A2XX_SQ_TEX_3_SWIZ_W(3) |
263                      A2XX_SQ_TEX_3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) |
264                      A2XX_SQ_TEX_3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT));
265    OUT_RING(ring, 0x00000000);
266    OUT_RING(ring, A2XX_SQ_TEX_5_DIMENSION(SQ_TEX_DIMENSION_2D));
267 
268    if (!is_a20x(batch->ctx->screen)) {
269       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
270       OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
271       OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
272       OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
273    }
274 
275    fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
276            DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
277 }
278 
279 static void
fd2_emit_tile_mem2gmem(struct fd_batch * batch,const struct fd_tile * tile)280 fd2_emit_tile_mem2gmem(struct fd_batch *batch,
281                        const struct fd_tile *tile) assert_dt
282 {
283    struct fd_context *ctx = batch->ctx;
284    struct fd2_context *fd2_ctx = fd2_context(ctx);
285    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
286    struct fd_ringbuffer *ring = batch->gmem;
287    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
288    unsigned bin_w = tile->bin_w;
289    unsigned bin_h = tile->bin_h;
290    float x0, y0, x1, y1;
291 
292    fd2_emit_vertex_bufs(
293       ring, 0x9c,
294       (struct fd2_vertex_buf[]){
295          {.prsc = fd2_ctx->solid_vertexbuf, .size = 36},
296          {.prsc = fd2_ctx->solid_vertexbuf, .size = 24, .offset = 36},
297       },
298       2);
299 
300    /* write texture coordinates to vertexbuf: */
301    x0 = ((float)tile->xoff) / ((float)pfb->width);
302    x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
303    y0 = ((float)tile->yoff) / ((float)pfb->height);
304    y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
305    OUT_PKT3(ring, CP_MEM_WRITE, 7);
306    OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 36, 0, 0);
307    OUT_RING(ring, fui(x0));
308    OUT_RING(ring, fui(y0));
309    OUT_RING(ring, fui(x1));
310    OUT_RING(ring, fui(y0));
311    OUT_RING(ring, fui(x0));
312    OUT_RING(ring, fui(y1));
313 
314    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
315    OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
316    OUT_RING(ring, 0);
317 
318    fd2_program_emit(ctx, ring, &ctx->blit_prog[0]);
319 
320    OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
321    OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
322 
323    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
324    OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
325    OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
326 
327    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
328    OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
329    OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |
330                      A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
331                      A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
332 
333    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
334    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
335    OUT_RING(ring, 0x0000ffff);
336 
337    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
338    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
339    OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
340                      A2XX_RB_COLORCONTROL_BLEND_DISABLE |
341                      A2XX_RB_COLORCONTROL_ROP_CODE(12) |
342                      A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
343                      A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
344 
345    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
346    OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
347    OUT_RING(ring, A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(FACTOR_ONE) |
348                      A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(BLEND2_DST_PLUS_SRC) |
349                      A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(FACTOR_ZERO) |
350                      A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(FACTOR_ONE) |
351                      A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(BLEND2_DST_PLUS_SRC) |
352                      A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(FACTOR_ZERO));
353 
354    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
355    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
356    OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_DISABLE |
357                      xy2d(0, 0));      /* PA_SC_WINDOW_SCISSOR_TL */
358    OUT_RING(ring, xy2d(bin_w, bin_h)); /* PA_SC_WINDOW_SCISSOR_BR */
359 
360    OUT_PKT3(ring, CP_SET_CONSTANT, 5);
361    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
362    OUT_RING(ring, fui((float)bin_w / 2.0));  /* PA_CL_VPORT_XSCALE */
363    OUT_RING(ring, fui((float)bin_w / 2.0));  /* PA_CL_VPORT_XOFFSET */
364    OUT_RING(ring, fui(-(float)bin_h / 2.0)); /* PA_CL_VPORT_YSCALE */
365    OUT_RING(ring, fui((float)bin_h / 2.0));  /* PA_CL_VPORT_YOFFSET */
366 
367    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
368    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
369    OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT |
370                      A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT | // XXX check this???
371                      A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
372                      A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
373                      A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
374                      A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
375 
376    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
377    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
378    OUT_RING(ring, 0x00000000);
379 
380    if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
381       emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
382 
383    if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR))
384       emit_mem2gmem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
385 
386    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
387    OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
388    OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
389                      A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
390                      A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
391                      A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
392                      A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
393                      A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
394                      A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
395 
396    /* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */
397 }
398 
399 static void
patch_draws(struct fd_batch * batch,enum pc_di_vis_cull_mode vismode)400 patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
401 {
402    unsigned i;
403 
404    if (!is_a20x(batch->ctx->screen)) {
405       /* identical to a3xx */
406       for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
407          struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
408          *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
409       }
410       util_dynarray_clear(&batch->draw_patches);
411       return;
412    }
413 
414    if (vismode == USE_VISIBILITY)
415       return;
416 
417    for (i = 0; i < batch->draw_patches.size / sizeof(uint32_t *); i++) {
418       uint32_t *ptr =
419          *util_dynarray_element(&batch->draw_patches, uint32_t *, i);
420       unsigned cnt = ptr[0] >> 16 & 0xfff; /* 5 with idx buffer, 3 without */
421 
422       /* convert CP_DRAW_INDX_BIN to a CP_DRAW_INDX
423        * replace first two DWORDS with NOP and move the rest down
424        * (we don't want to have to move the idx buffer reloc)
425        */
426       ptr[0] = CP_TYPE3_PKT | (CP_NOP << 8);
427       ptr[1] = 0x00000000;
428 
429       ptr[4] = ptr[2] & ~(1 << 14 | 1 << 15); /* remove cull_enable bits */
430       ptr[2] = CP_TYPE3_PKT | ((cnt - 2) << 16) | (CP_DRAW_INDX << 8);
431       ptr[3] = 0x00000000;
432    }
433 }
434 
435 static void
fd2_emit_sysmem_prep(struct fd_batch * batch)436 fd2_emit_sysmem_prep(struct fd_batch *batch)
437 {
438    struct fd_context *ctx = batch->ctx;
439    struct fd_ringbuffer *ring = batch->gmem;
440    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
441    struct pipe_surface *psurf = pfb->cbufs[0];
442 
443    if (!psurf)
444       return;
445 
446    struct fd_resource *rsc = fd_resource(psurf->texture);
447    uint32_t offset =
448       fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
449    uint32_t pitch = fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level);
450 
451    assert((pitch & 31) == 0);
452    assert((offset & 0xfff) == 0);
453 
454    fd2_emit_restore(ctx, ring);
455 
456    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
457    OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
458    OUT_RING(ring, A2XX_RB_SURFACE_INFO_SURFACE_PITCH(pitch));
459 
460    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
461    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
462    OUT_RELOC(ring, rsc->bo, offset,
463              COND(!rsc->layout.tile_mode, A2XX_RB_COLOR_INFO_LINEAR) |
464                 A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) |
465                 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)),
466              0);
467 
468    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
469    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
470    OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE);
471    OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(pfb->width) |
472                      A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(pfb->height));
473 
474    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
475    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
476    OUT_RING(ring,
477             A2XX_PA_SC_WINDOW_OFFSET_X(0) | A2XX_PA_SC_WINDOW_OFFSET_Y(0));
478 
479    patch_draws(batch, IGNORE_VISIBILITY);
480    util_dynarray_clear(&batch->draw_patches);
481    util_dynarray_clear(&batch->shader_patches);
482 }
483 
484 /* before first tile */
485 static void
fd2_emit_tile_init(struct fd_batch * batch)486 fd2_emit_tile_init(struct fd_batch *batch) assert_dt
487 {
488    struct fd_context *ctx = batch->ctx;
489    struct fd_ringbuffer *ring = batch->gmem;
490    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
491    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
492    enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
493    uint32_t reg;
494 
495    fd2_emit_restore(ctx, ring);
496 
497    prepare_tile_fini_ib(batch);
498 
499    OUT_PKT3(ring, CP_SET_CONSTANT, 4);
500    OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
501    OUT_RING(ring, gmem->bin_w); /* RB_SURFACE_INFO */
502    OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
503                      A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
504    reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
505    if (pfb->zsbuf)
506       reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
507    OUT_RING(ring, reg); /* RB_DEPTH_INFO */
508 
509    /* fast clear patches */
510    int depth_size = -1;
511    int color_size = -1;
512 
513    if (pfb->cbufs[0])
514       color_size = util_format_get_blocksizebits(format) == 32 ? 4 : 2;
515 
516    if (pfb->zsbuf)
517       depth_size = fd_pipe2depth(pfb->zsbuf->format) == 1 ? 4 : 2;
518 
519    for (int i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) {
520       struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i);
521       uint32_t color_base = 0, depth_base = gmem->zsbuf_base[0];
522       uint32_t size, lines;
523 
524       /* note: 1 "line" is 512 bytes in both color/depth areas (1K total) */
525       switch (patch->val) {
526       case GMEM_PATCH_FASTCLEAR_COLOR:
527          size = align(gmem->bin_w * gmem->bin_h * color_size, 0x8000);
528          lines = size / 1024;
529          depth_base = size / 2;
530          break;
531       case GMEM_PATCH_FASTCLEAR_DEPTH:
532          size = align(gmem->bin_w * gmem->bin_h * depth_size, 0x8000);
533          lines = size / 1024;
534          color_base = depth_base;
535          depth_base = depth_base + size / 2;
536          break;
537       case GMEM_PATCH_FASTCLEAR_COLOR_DEPTH:
538          lines =
539             align(gmem->bin_w * gmem->bin_h * color_size * 2, 0x8000) / 1024;
540          break;
541       case GMEM_PATCH_RESTORE_INFO:
542          patch->cs[0] = gmem->bin_w;
543          patch->cs[1] = A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
544                         A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format));
545          patch->cs[2] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
546          if (pfb->zsbuf)
547             patch->cs[2] |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(
548                fd_pipe2depth(pfb->zsbuf->format));
549          continue;
550       default:
551          continue;
552       }
553 
554       patch->cs[0] = A2XX_PA_SC_SCREEN_SCISSOR_BR_X(32) |
555                      A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(lines);
556       patch->cs[4] = A2XX_RB_COLOR_INFO_BASE(color_base) |
557                      A2XX_RB_COLOR_INFO_FORMAT(COLORX_8_8_8_8);
558       patch->cs[5] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base) |
559                      A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(1);
560    }
561    util_dynarray_clear(&batch->gmem_patches);
562 
563    /* set to zero, for some reason hardware doesn't like certain values */
564    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
565    OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
566    OUT_RING(ring, 0);
567 
568    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
569    OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
570    OUT_RING(ring, 0);
571 
572    if (use_hw_binning(batch)) {
573       /* patch out unneeded memory exports by changing EXEC CF to EXEC_END
574        *
575        * in the shader compiler, we guarantee that the shader ends with
576        * a specific pattern of ALLOC/EXEC CF pairs for the hw binning exports
577        *
578        * the since patches point only to dwords and CFs are 1.5 dwords
579        * the patch is aligned and might point to a ALLOC CF
580        */
581       for (int i = 0; i < batch->shader_patches.size / sizeof(void *); i++) {
582          instr_cf_t *cf =
583             *util_dynarray_element(&batch->shader_patches, instr_cf_t *, i);
584          if (cf->opc == ALLOC)
585             cf++;
586          assert(cf->opc == EXEC);
587          assert(cf[ctx->screen->info->num_vsc_pipes * 2 - 2].opc == EXEC_END);
588          cf[2 * (gmem->num_vsc_pipes - 1)].opc = EXEC_END;
589       }
590 
591       patch_draws(batch, USE_VISIBILITY);
592 
593       /* initialize shader constants for the binning memexport */
594       OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 4);
595       OUT_RING(ring, 0x0000000C);
596 
597       for (int i = 0; i < gmem->num_vsc_pipes; i++) {
598          /* allocate in 64k increments to avoid reallocs */
599          uint32_t bo_size = align(batch->num_vertices, 0x10000);
600          if (!ctx->vsc_pipe_bo[i] ||
601              fd_bo_size(ctx->vsc_pipe_bo[i]) < bo_size) {
602             if (ctx->vsc_pipe_bo[i])
603                fd_bo_del(ctx->vsc_pipe_bo[i]);
604             ctx->vsc_pipe_bo[i] =
605                fd_bo_new(ctx->dev, bo_size, 0, "vsc_pipe[%u]", i);
606             assert(ctx->vsc_pipe_bo[i]);
607          }
608 
609          /* memory export address (export32):
610           * .x: (base_address >> 2) | 0x40000000 (?)
611           * .y: index (float) - set by shader
612           * .z: 0x4B00D000 (?)
613           * .w: 0x4B000000 (?) | max_index (?)
614           */
615          OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0x40000000, -2);
616          OUT_RING(ring, 0x00000000);
617          OUT_RING(ring, 0x4B00D000);
618          OUT_RING(ring, 0x4B000000 | bo_size);
619       }
620 
621       OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 8);
622       OUT_RING(ring, 0x0000018C);
623 
624       for (int i = 0; i < gmem->num_vsc_pipes; i++) {
625          const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
626          float off_x, off_y, mul_x, mul_y;
627 
628          /* const to tranform from [-1,1] to bin coordinates for this pipe
629           * for x/y, [0,256/2040] = 0, [256/2040,512/2040] = 1, etc
630           * 8 possible values on x/y axis,
631           * to clip at binning stage: only use center 6x6
632           * TODO: set the z parameters too so that hw binning
633           * can clip primitives in Z too
634           */
635 
636          mul_x = 1.0f / (float)(gmem->bin_w * 8);
637          mul_y = 1.0f / (float)(gmem->bin_h * 8);
638          off_x = -pipe->x * (1.0 / 8.0f) + 0.125f - mul_x * gmem->minx;
639          off_y = -pipe->y * (1.0 / 8.0f) + 0.125f - mul_y * gmem->miny;
640 
641          OUT_RING(ring, fui(off_x * (256.0f / 255.0f)));
642          OUT_RING(ring, fui(off_y * (256.0f / 255.0f)));
643          OUT_RING(ring, 0x3f000000);
644          OUT_RING(ring, fui(0.0f));
645 
646          OUT_RING(ring, fui(mul_x * (256.0f / 255.0f)));
647          OUT_RING(ring, fui(mul_y * (256.0f / 255.0f)));
648          OUT_RING(ring, fui(0.0f));
649          OUT_RING(ring, fui(0.0f));
650       }
651 
652       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
653       OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
654       OUT_RING(ring, 0);
655 
656       fd2_emit_ib(ring, batch->binning);
657 
658       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
659       OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
660       OUT_RING(ring, 0x00000002);
661    } else {
662       patch_draws(batch, IGNORE_VISIBILITY);
663    }
664 
665    util_dynarray_clear(&batch->draw_patches);
666    util_dynarray_clear(&batch->shader_patches);
667 }
668 
669 /* before mem2gmem */
670 static void
fd2_emit_tile_prep(struct fd_batch * batch,const struct fd_tile * tile)671 fd2_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
672 {
673    struct fd_ringbuffer *ring = batch->gmem;
674    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
675    enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
676 
677    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
678    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
679    OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(1) | /* RB_COLOR_INFO */
680                      A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
681 
682    /* setup screen scissor for current tile (same for mem2gmem): */
683    OUT_PKT3(ring, CP_SET_CONSTANT, 3);
684    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
685    OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_X(0) |
686                      A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(0));
687    OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
688                      A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
689 }
690 
691 /* before IB to rendering cmds: */
692 static void
fd2_emit_tile_renderprep(struct fd_batch * batch,const struct fd_tile * tile)693 fd2_emit_tile_renderprep(struct fd_batch *batch,
694                          const struct fd_tile *tile) assert_dt
695 {
696    struct fd_context *ctx = batch->ctx;
697    struct fd2_context *fd2_ctx = fd2_context(ctx);
698    struct fd_ringbuffer *ring = batch->gmem;
699    struct pipe_framebuffer_state *pfb = &batch->framebuffer;
700    enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
701 
702    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
703    OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
704    OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
705                      A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
706 
707    /* setup window scissor and offset for current tile (different
708     * from mem2gmem):
709     */
710    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
711    OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
712    OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) |
713                      A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff));
714 
715    /* write SCISSOR_BR to memory so fast clear path can restore from it */
716    OUT_PKT3(ring, CP_MEM_WRITE, 2);
717    OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 60, 0, 0);
718    OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
719                      A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
720 
721    /* set the copy offset for gmem2mem */
722    OUT_PKT3(ring, CP_SET_CONSTANT, 2);
723    OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_OFFSET));
724    OUT_RING(ring, A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) |
725                      A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff));
726 
727    /* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */
728    if (is_a20x(ctx->screen)) {
729       OUT_PKT3(ring, CP_SET_CONSTANT, 5);
730       OUT_RING(ring, 0x00000580);
731       OUT_RING(ring, fui(tile->xoff));
732       OUT_RING(ring, fui(tile->yoff));
733       OUT_RING(ring, fui(0.0f));
734       OUT_RING(ring, fui(0.0f));
735    }
736 
737    if (use_hw_binning(batch)) {
738       struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
739 
740       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
741       OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
742       OUT_RING(ring, tile->n);
743 
744       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
745       OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
746       OUT_RING(ring, tile->n);
747 
748       /* TODO only emit this when tile->p changes */
749       OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
750       OUT_RELOC(ring, pipe_bo, 0, 0, 0);
751    }
752 }
753 
754 void
fd2_gmem_init(struct pipe_context * pctx)755 fd2_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
756 {
757    struct fd_context *ctx = fd_context(pctx);
758 
759    ctx->emit_sysmem_prep = fd2_emit_sysmem_prep;
760    ctx->emit_tile_init = fd2_emit_tile_init;
761    ctx->emit_tile_prep = fd2_emit_tile_prep;
762    ctx->emit_tile_mem2gmem = fd2_emit_tile_mem2gmem;
763    ctx->emit_tile_renderprep = fd2_emit_tile_renderprep;
764    ctx->emit_tile_gmem2mem = fd2_emit_tile_gmem2mem;
765 }
766