1 /*
2  * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
3  * Copyright © 2018 Google, Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  * Authors:
25  *    Rob Clark <robclark@freedesktop.org>
26  */
27 
28 #include "util/format_srgb.h"
29 #include "util/half_float.h"
30 #include "util/u_dump.h"
31 #include "util/u_log.h"
32 
33 #include "freedreno_blitter.h"
34 #include "freedreno_fence.h"
35 #include "freedreno_resource.h"
36 #include "freedreno_tracepoints.h"
37 
38 #include "fd6_blitter.h"
39 #include "fd6_emit.h"
40 #include "fd6_format.h"
41 #include "fd6_resource.h"
42 
43 static inline enum a6xx_2d_ifmt
fd6_ifmt(enum a6xx_format fmt)44 fd6_ifmt(enum a6xx_format fmt)
45 {
46    switch (fmt) {
47    case FMT6_A8_UNORM:
48    case FMT6_8_UNORM:
49    case FMT6_8_SNORM:
50    case FMT6_8_8_UNORM:
51    case FMT6_8_8_SNORM:
52    case FMT6_8_8_8_8_UNORM:
53    case FMT6_8_8_8_X8_UNORM:
54    case FMT6_8_8_8_8_SNORM:
55    case FMT6_4_4_4_4_UNORM:
56    case FMT6_5_5_5_1_UNORM:
57    case FMT6_5_6_5_UNORM:
58       return R2D_UNORM8;
59 
60    case FMT6_32_UINT:
61    case FMT6_32_SINT:
62    case FMT6_32_32_UINT:
63    case FMT6_32_32_SINT:
64    case FMT6_32_32_32_32_UINT:
65    case FMT6_32_32_32_32_SINT:
66       return R2D_INT32;
67 
68    case FMT6_16_UINT:
69    case FMT6_16_SINT:
70    case FMT6_16_16_UINT:
71    case FMT6_16_16_SINT:
72    case FMT6_16_16_16_16_UINT:
73    case FMT6_16_16_16_16_SINT:
74    case FMT6_10_10_10_2_UINT:
75       return R2D_INT16;
76 
77    case FMT6_8_UINT:
78    case FMT6_8_SINT:
79    case FMT6_8_8_UINT:
80    case FMT6_8_8_SINT:
81    case FMT6_8_8_8_8_UINT:
82    case FMT6_8_8_8_8_SINT:
83    case FMT6_Z24_UNORM_S8_UINT:
84    case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
85       return R2D_INT8;
86 
87    case FMT6_16_UNORM:
88    case FMT6_16_SNORM:
89    case FMT6_16_16_UNORM:
90    case FMT6_16_16_SNORM:
91    case FMT6_16_16_16_16_UNORM:
92    case FMT6_16_16_16_16_SNORM:
93    case FMT6_32_FLOAT:
94    case FMT6_32_32_FLOAT:
95    case FMT6_32_32_32_32_FLOAT:
96       return R2D_FLOAT32;
97 
98    case FMT6_16_FLOAT:
99    case FMT6_16_16_FLOAT:
100    case FMT6_16_16_16_16_FLOAT:
101    case FMT6_11_11_10_FLOAT:
102    case FMT6_10_10_10_2_UNORM_DEST:
103       return R2D_FLOAT16;
104 
105    default:
106       unreachable("bad format");
107       return 0;
108    }
109 }
110 
111 /* Make sure none of the requested dimensions extend beyond the size of the
112  * resource.  Not entirely sure why this happens, but sometimes it does, and
113  * w/ 2d blt doesn't have wrap modes like a sampler, so force those cases
114  * back to u_blitter
115  */
116 static bool
ok_dims(const struct pipe_resource * r,const struct pipe_box * b,int lvl)117 ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl)
118 {
119    int last_layer =
120       r->target == PIPE_TEXTURE_3D ? u_minify(r->depth0, lvl) : r->array_size;
121 
122    return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) &&
123           (b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) &&
124           (b->z >= 0) && (b->z + b->depth <= last_layer);
125 }
126 
127 static bool
ok_format(enum pipe_format pfmt)128 ok_format(enum pipe_format pfmt)
129 {
130    enum a6xx_format fmt = fd6_color_format(pfmt, TILE6_LINEAR);
131 
132    if (util_format_is_compressed(pfmt))
133       return true;
134 
135    switch (pfmt) {
136    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
137    case PIPE_FORMAT_Z24X8_UNORM:
138    case PIPE_FORMAT_Z16_UNORM:
139    case PIPE_FORMAT_Z32_UNORM:
140    case PIPE_FORMAT_Z32_FLOAT:
141    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
142    case PIPE_FORMAT_S8_UINT:
143       return true;
144    default:
145       break;
146    }
147 
148    if (fmt == FMT6_NONE)
149       return false;
150 
151    return true;
152 }
153 
154 #define DEBUG_BLIT          0
155 #define DEBUG_BLIT_FALLBACK 0
156 
157 #define fail_if(cond)                                                          \
158    do {                                                                        \
159       if (cond) {                                                              \
160          if (DEBUG_BLIT_FALLBACK) {                                            \
161             fprintf(stderr, "falling back: %s for blit:\n", #cond);            \
162             dump_blit_info(info);                                              \
163          }                                                                     \
164          return false;                                                         \
165       }                                                                        \
166    } while (0)
167 
168 static bool
is_ubwc(struct pipe_resource * prsc,unsigned level)169 is_ubwc(struct pipe_resource *prsc, unsigned level)
170 {
171    return fd_resource_ubwc_enabled(fd_resource(prsc), level);
172 }
173 
174 static void
dump_blit_info(const struct pipe_blit_info * info)175 dump_blit_info(const struct pipe_blit_info *info)
176 {
177    util_dump_blit_info(stderr, info);
178    fprintf(stderr, "\ndst resource: ");
179    util_dump_resource(stderr, info->dst.resource);
180    if (is_ubwc(info->dst.resource, info->dst.level))
181       fprintf(stderr, " (ubwc)");
182    fprintf(stderr, "\nsrc resource: ");
183    util_dump_resource(stderr, info->src.resource);
184    if (is_ubwc(info->src.resource, info->src.level))
185       fprintf(stderr, " (ubwc)");
186    fprintf(stderr, "\n");
187 }
188 
189 static bool
can_do_blit(const struct pipe_blit_info * info)190 can_do_blit(const struct pipe_blit_info *info)
191 {
192    /* I think we can do scaling, but not in z dimension since that would
193     * require blending..
194     */
195    fail_if(info->dst.box.depth != info->src.box.depth);
196 
197    /* Fail if unsupported format: */
198    fail_if(!ok_format(info->src.format));
199    fail_if(!ok_format(info->dst.format));
200 
201    debug_assert(!util_format_is_compressed(info->src.format));
202    debug_assert(!util_format_is_compressed(info->dst.format));
203 
204    fail_if(!ok_dims(info->src.resource, &info->src.box, info->src.level));
205 
206    fail_if(!ok_dims(info->dst.resource, &info->dst.box, info->dst.level));
207 
208    debug_assert(info->dst.box.width >= 0);
209    debug_assert(info->dst.box.height >= 0);
210    debug_assert(info->dst.box.depth >= 0);
211 
212    fail_if(info->dst.resource->nr_samples > 1);
213 
214    fail_if(info->window_rectangle_include);
215 
216    const struct util_format_description *src_desc =
217       util_format_description(info->src.format);
218    const struct util_format_description *dst_desc =
219       util_format_description(info->dst.format);
220    const int common_channels =
221       MIN2(src_desc->nr_channels, dst_desc->nr_channels);
222 
223    if (info->mask & PIPE_MASK_RGBA) {
224       for (int i = 0; i < common_channels; i++) {
225          fail_if(memcmp(&src_desc->channel[i], &dst_desc->channel[i],
226                         sizeof(src_desc->channel[0])));
227       }
228    }
229 
230    fail_if(info->alpha_blend);
231 
232    return true;
233 }
234 
235 static void
emit_setup(struct fd_batch * batch)236 emit_setup(struct fd_batch *batch)
237 {
238    struct fd_ringbuffer *ring = batch->draw;
239    struct fd_screen *screen = batch->ctx->screen;
240 
241    fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
242    fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
243    fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
244    fd6_event_write(batch, ring, PC_CCU_INVALIDATE_DEPTH, false);
245 
246    /* normal BLIT_OP_SCALE operation needs bypass RB_CCU_CNTL */
247    OUT_WFI5(ring);
248    OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
249    OUT_RING(ring, A6XX_RB_CCU_CNTL_COLOR_OFFSET(screen->ccu_offset_bypass));
250 }
251 
252 static void
emit_blit_setup(struct fd_ringbuffer * ring,enum pipe_format pfmt,bool scissor_enable,union pipe_color_union * color,uint32_t unknown_8c01)253 emit_blit_setup(struct fd_ringbuffer *ring, enum pipe_format pfmt,
254                 bool scissor_enable, union pipe_color_union *color,
255                 uint32_t unknown_8c01)
256 {
257    enum a6xx_format fmt = fd6_color_format(pfmt, TILE6_LINEAR);
258    bool is_srgb = util_format_is_srgb(pfmt);
259    enum a6xx_2d_ifmt ifmt = fd6_ifmt(fmt);
260 
261    if (is_srgb) {
262       assert(ifmt == R2D_UNORM8);
263       ifmt = R2D_UNORM8_SRGB;
264    }
265 
266    uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
267                         A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt) |
268                         A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt) |
269                         COND(color, A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR) |
270                         COND(scissor_enable, A6XX_RB_2D_BLIT_CNTL_SCISSOR);
271 
272    OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
273    OUT_RING(ring, blit_cntl);
274 
275    OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
276    OUT_RING(ring, blit_cntl);
277 
278    if (fmt == FMT6_10_10_10_2_UNORM_DEST)
279       fmt = FMT6_16_16_16_16_FLOAT;
280 
281    /* This register is probably badly named... it seems that it's
282     * controlling the internal/accumulator format or something like
283     * that. It's certainly not tied to only the src format.
284     */
285    OUT_PKT4(ring, REG_A6XX_SP_2D_DST_FORMAT, 1);
286    OUT_RING(
287       ring,
288       A6XX_SP_2D_DST_FORMAT_COLOR_FORMAT(fmt) |
289          COND(util_format_is_pure_sint(pfmt), A6XX_SP_2D_DST_FORMAT_SINT) |
290          COND(util_format_is_pure_uint(pfmt), A6XX_SP_2D_DST_FORMAT_UINT) |
291          COND(is_srgb, A6XX_SP_2D_DST_FORMAT_SRGB) |
292          A6XX_SP_2D_DST_FORMAT_MASK(0xf));
293 
294    OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1);
295    OUT_RING(ring, unknown_8c01);
296 }
297 
298 /* buffers need to be handled specially since x/width can exceed the bounds
299  * supported by hw.. if necessary decompose into (potentially) two 2D blits
300  */
301 static void
emit_blit_buffer(struct fd_context * ctx,struct fd_ringbuffer * ring,const struct pipe_blit_info * info)302 emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
303                  const struct pipe_blit_info *info)
304 {
305    const struct pipe_box *sbox = &info->src.box;
306    const struct pipe_box *dbox = &info->dst.box;
307    struct fd_resource *src, *dst;
308    unsigned sshift, dshift;
309 
310    if (DEBUG_BLIT) {
311       fprintf(stderr, "buffer blit: ");
312       dump_blit_info(info);
313    }
314 
315    src = fd_resource(info->src.resource);
316    dst = fd_resource(info->dst.resource);
317 
318    debug_assert(src->layout.cpp == 1);
319    debug_assert(dst->layout.cpp == 1);
320    debug_assert(info->src.resource->format == info->dst.resource->format);
321    debug_assert((sbox->y == 0) && (sbox->height == 1));
322    debug_assert((dbox->y == 0) && (dbox->height == 1));
323    debug_assert((sbox->z == 0) && (sbox->depth == 1));
324    debug_assert((dbox->z == 0) && (dbox->depth == 1));
325    debug_assert(sbox->width == dbox->width);
326    debug_assert(info->src.level == 0);
327    debug_assert(info->dst.level == 0);
328 
329    /*
330     * Buffers can have dimensions bigger than max width, remap into
331     * multiple 1d blits to fit within max dimension
332     *
333     * Note that blob uses .ARRAY_PITCH=128 for blitting buffers, which
334     * seems to prevent overfetch related faults.  Not quite sure what
335     * the deal is there.
336     *
337     * Low 6 bits of SRC/DST addresses need to be zero (ie. address
338     * aligned to 64) so we need to shift src/dst x1/x2 to make up the
339     * difference.  On top of already splitting up the blit so width
340     * isn't > 16k.
341     *
342     * We perhaps could do a bit better, if src and dst are aligned but
343     * in the worst case this means we have to split the copy up into
344     * 16k (0x4000) minus 64 (0x40).
345     */
346 
347    sshift = sbox->x & 0x3f;
348    dshift = dbox->x & 0x3f;
349 
350    emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, NULL, 0);
351 
352    for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
353       unsigned soff, doff, w, p;
354 
355       soff = (sbox->x + off) & ~0x3f;
356       doff = (dbox->x + off) & ~0x3f;
357 
358       w = MIN2(sbox->width - off, (0x4000 - 0x40));
359       p = align(w, 64);
360 
361       debug_assert((soff + w) <= fd_bo_size(src->bo));
362       debug_assert((doff + w) <= fd_bo_size(dst->bo));
363 
364       /*
365        * Emit source:
366        */
367       OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
368       OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(FMT6_8_UNORM) |
369                         A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
370                         A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
371       OUT_RING(ring,
372                A6XX_SP_PS_2D_SRC_SIZE_WIDTH(sshift + w) |
373                   A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */
374       OUT_RELOC(ring, src->bo, soff, 0, 0);          /* SP_PS_2D_SRC_LO/HI */
375       OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(p));
376 
377       OUT_RING(ring, 0x00000000);
378       OUT_RING(ring, 0x00000000);
379       OUT_RING(ring, 0x00000000);
380       OUT_RING(ring, 0x00000000);
381       OUT_RING(ring, 0x00000000);
382 
383       /*
384        * Emit destination:
385        */
386       OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
387       OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(FMT6_8_UNORM) |
388                         A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
389                         A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
390       OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
391       OUT_RING(ring, A6XX_RB_2D_DST_PITCH(p));
392       OUT_RING(ring, 0x00000000);
393       OUT_RING(ring, 0x00000000);
394       OUT_RING(ring, 0x00000000);
395       OUT_RING(ring, 0x00000000);
396       OUT_RING(ring, 0x00000000);
397 
398       /*
399        * Blit command:
400        */
401       OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
402       OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(sshift));
403       OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(sshift + w - 1));
404       OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(0));
405       OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(0));
406 
407       OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
408       OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dshift) | A6XX_GRAS_2D_DST_TL_Y(0));
409       OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dshift + w - 1) |
410                         A6XX_GRAS_2D_DST_BR_Y(0));
411 
412       OUT_PKT7(ring, CP_EVENT_WRITE, 1);
413       OUT_RING(ring, 0x3f);
414       OUT_WFI5(ring);
415 
416       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
417       OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_UNKNOWN_8E04_blit);
418 
419       OUT_PKT7(ring, CP_BLIT, 1);
420       OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
421 
422       OUT_WFI5(ring);
423 
424       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
425       OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
426    }
427 }
428 
429 static void
fd6_clear_ubwc(struct fd_batch * batch,struct fd_resource * rsc)430 fd6_clear_ubwc(struct fd_batch *batch, struct fd_resource *rsc) assert_dt
431 {
432    struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
433    union pipe_color_union color = {};
434 
435    emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, &color, 0);
436 
437    OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
438    OUT_RING(ring, 0x00000000);
439    OUT_RING(ring, 0x00000000);
440    OUT_RING(ring, 0x00000000);
441    OUT_RING(ring, 0x00000000);
442    OUT_RING(ring, 0x00000000);
443    OUT_RING(ring, 0x00000000);
444    OUT_RING(ring, 0x00000000);
445    OUT_RING(ring, 0x00000000);
446    OUT_RING(ring, 0x00000000);
447    OUT_RING(ring, 0x00000000);
448    OUT_RING(ring, 0x00000000);
449    OUT_RING(ring, 0x00000000);
450    OUT_RING(ring, 0x00000000);
451 
452    OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
453    OUT_RING(ring, 0x00000000);
454    OUT_RING(ring, 0x00000000);
455    OUT_RING(ring, 0x00000000);
456    OUT_RING(ring, 0x00000000);
457 
458    OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
459    OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(0));
460    OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(0));
461    OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(0));
462    OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(0));
463 
464    unsigned size = rsc->layout.slices[0].offset;
465    unsigned offset = 0;
466 
467    /* We could be more clever here and realize that we could use a
468     * larger width if the size is aligned to something more than a
469     * single page.. or even use a format larger than r8 in those
470     * cases. But for normal sized textures and even up to 16k x 16k
471     * at <= 4byte/pixel, we'll only go thru the loop once
472     */
473    const unsigned w = 0x1000;
474 
475    /* ubwc size should always be page aligned: */
476    assert((size % w) == 0);
477 
478    while (size > 0) {
479       const unsigned h = MIN2(0x4000, size / w);
480       /* width is already aligned to a suitable pitch: */
481       const unsigned p = w;
482 
483       /*
484        * Emit destination:
485        */
486       OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
487       OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(FMT6_8_UNORM) |
488                         A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
489                         A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
490       OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_2D_DST_LO/HI */
491       OUT_RING(ring, A6XX_RB_2D_DST_PITCH(p));
492       OUT_RING(ring, 0x00000000);
493       OUT_RING(ring, 0x00000000);
494       OUT_RING(ring, 0x00000000);
495       OUT_RING(ring, 0x00000000);
496       OUT_RING(ring, 0x00000000);
497 
498       /*
499        * Blit command:
500        */
501 
502       OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
503       OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
504       OUT_RING(ring,
505                A6XX_GRAS_2D_DST_BR_X(w - 1) | A6XX_GRAS_2D_DST_BR_Y(h - 1));
506 
507       OUT_PKT7(ring, CP_EVENT_WRITE, 1);
508       OUT_RING(ring, 0x3f);
509       OUT_WFI5(ring);
510 
511       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
512       OUT_RING(ring, batch->ctx->screen->info->a6xx.magic.RB_UNKNOWN_8E04_blit);
513 
514       OUT_PKT7(ring, CP_BLIT, 1);
515       OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
516 
517       OUT_WFI5(ring);
518 
519       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
520       OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
521 
522       offset += w * h;
523       size -= w * h;
524    }
525 
526    fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
527    fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
528    fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
529    fd_wfi(batch, ring);
530    fd6_cache_inv(batch, ring);
531 }
532 
533 static void
emit_blit_dst(struct fd_ringbuffer * ring,struct pipe_resource * prsc,enum pipe_format pfmt,unsigned level,unsigned layer)534 emit_blit_dst(struct fd_ringbuffer *ring, struct pipe_resource *prsc,
535               enum pipe_format pfmt, unsigned level, unsigned layer)
536 {
537    struct fd_resource *dst = fd_resource(prsc);
538    enum a6xx_format fmt = fd6_color_format(pfmt, dst->layout.tile_mode);
539    enum a6xx_tile_mode tile = fd_resource_tile_mode(prsc, level);
540    enum a3xx_color_swap swap = fd6_color_swap(pfmt, dst->layout.tile_mode);
541    uint32_t pitch = fd_resource_pitch(dst, level);
542    bool ubwc_enabled = fd_resource_ubwc_enabled(dst, level);
543    unsigned off = fd_resource_offset(dst, level, layer);
544 
545    if (fmt == FMT6_Z24_UNORM_S8_UINT)
546       fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
547 
548    OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
549    OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(fmt) |
550                      A6XX_RB_2D_DST_INFO_TILE_MODE(tile) |
551                      A6XX_RB_2D_DST_INFO_COLOR_SWAP(swap) |
552                      COND(util_format_is_srgb(pfmt), A6XX_RB_2D_DST_INFO_SRGB) |
553                      COND(ubwc_enabled, A6XX_RB_2D_DST_INFO_FLAGS));
554    OUT_RELOC(ring, dst->bo, off, 0, 0); /* RB_2D_DST_LO/HI */
555    OUT_RING(ring, A6XX_RB_2D_DST_PITCH(pitch));
556    OUT_RING(ring, 0x00000000);
557    OUT_RING(ring, 0x00000000);
558    OUT_RING(ring, 0x00000000);
559    OUT_RING(ring, 0x00000000);
560    OUT_RING(ring, 0x00000000);
561 
562    if (ubwc_enabled) {
563       OUT_PKT4(ring, REG_A6XX_RB_2D_DST_FLAGS, 6);
564       fd6_emit_flag_reference(ring, dst, level, layer);
565       OUT_RING(ring, 0x00000000);
566       OUT_RING(ring, 0x00000000);
567       OUT_RING(ring, 0x00000000);
568    }
569 }
570 
571 static void
emit_blit_src(struct fd_ringbuffer * ring,const struct pipe_blit_info * info,unsigned layer,unsigned nr_samples,bool sample_0)572 emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info,
573               unsigned layer, unsigned nr_samples, bool sample_0)
574 {
575    struct fd_resource *src = fd_resource(info->src.resource);
576    enum a6xx_format sfmt = fd6_texture_format(info->src.format, src->layout.tile_mode);
577    enum a6xx_tile_mode stile =
578       fd_resource_tile_mode(info->src.resource, info->src.level);
579    enum a3xx_color_swap sswap = fd6_texture_swap(info->src.format, src->layout.tile_mode);
580    uint32_t pitch = fd_resource_pitch(src, info->src.level);
581    bool subwc_enabled = fd_resource_ubwc_enabled(src, info->src.level);
582    unsigned soff = fd_resource_offset(src, info->src.level, layer);
583    uint32_t width = u_minify(src->b.b.width0, info->src.level) * nr_samples;
584    uint32_t height = u_minify(src->b.b.height0, info->src.level);
585    uint32_t filter = 0;
586 
587    if (info->filter == PIPE_TEX_FILTER_LINEAR)
588       filter = A6XX_SP_PS_2D_SRC_INFO_FILTER;
589 
590    enum a3xx_msaa_samples samples = fd_msaa_samples(src->b.b.nr_samples);
591 
592    if (info->src.format == PIPE_FORMAT_A8_UNORM)
593       sfmt = FMT6_A8_UNORM;
594 
595    OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
596    OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
597                      A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(stile) |
598                      A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(sswap) |
599                      A6XX_SP_PS_2D_SRC_INFO_SAMPLES(samples) |
600                      COND(samples > MSAA_ONE && !sample_0,
601                           A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
602                      COND(subwc_enabled, A6XX_SP_PS_2D_SRC_INFO_FLAGS) |
603                      COND(util_format_is_srgb(info->src.format),
604                           A6XX_SP_PS_2D_SRC_INFO_SRGB) |
605                      0x500000 | filter);
606    OUT_RING(ring,
607             A6XX_SP_PS_2D_SRC_SIZE_WIDTH(width) |
608                A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(height)); /* SP_PS_2D_SRC_SIZE */
609    OUT_RELOC(ring, src->bo, soff, 0, 0);               /* SP_PS_2D_SRC_LO/HI */
610    OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(pitch));
611 
612    OUT_RING(ring, 0x00000000);
613    OUT_RING(ring, 0x00000000);
614    OUT_RING(ring, 0x00000000);
615    OUT_RING(ring, 0x00000000);
616    OUT_RING(ring, 0x00000000);
617 
618    if (subwc_enabled) {
619       OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_FLAGS, 6);
620       fd6_emit_flag_reference(ring, src, info->src.level, layer);
621       OUT_RING(ring, 0x00000000);
622       OUT_RING(ring, 0x00000000);
623       OUT_RING(ring, 0x00000000);
624    }
625 }
626 
627 static void
emit_blit_texture(struct fd_context * ctx,struct fd_ringbuffer * ring,const struct pipe_blit_info * info,bool sample_0)628 emit_blit_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
629                   const struct pipe_blit_info *info, bool sample_0)
630 {
631    const struct pipe_box *sbox = &info->src.box;
632    const struct pipe_box *dbox = &info->dst.box;
633    struct fd_resource *dst;
634    int sx1, sy1, sx2, sy2;
635    int dx1, dy1, dx2, dy2;
636 
637    if (DEBUG_BLIT) {
638       fprintf(stderr, "texture blit: ");
639       dump_blit_info(info);
640    }
641 
642    dst = fd_resource(info->dst.resource);
643 
644    uint32_t nr_samples = fd_resource_nr_samples(&dst->b.b);
645 
646    sx1 = sbox->x * nr_samples;
647    sy1 = sbox->y;
648    sx2 = (sbox->x + sbox->width) * nr_samples - 1;
649    sy2 = sbox->y + sbox->height - 1;
650 
651    OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
652    OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(sx1));
653    OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(sx2));
654    OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(sy1));
655    OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(sy2));
656 
657    dx1 = dbox->x * nr_samples;
658    dy1 = dbox->y;
659    dx2 = (dbox->x + dbox->width) * nr_samples - 1;
660    dy2 = dbox->y + dbox->height - 1;
661 
662    OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
663    OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dx1) | A6XX_GRAS_2D_DST_TL_Y(dy1));
664    OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dx2) | A6XX_GRAS_2D_DST_BR_Y(dy2));
665 
666    if (info->scissor_enable) {
667       OUT_PKT4(ring, REG_A6XX_GRAS_2D_RESOLVE_CNTL_1, 2);
668       OUT_RING(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_X(info->scissor.minx) |
669                         A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.miny));
670       OUT_RING(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_X(info->scissor.maxx - 1) |
671                         A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.maxy - 1));
672    }
673 
674    emit_blit_setup(ring, info->dst.format, info->scissor_enable, NULL, 0);
675 
676    for (unsigned i = 0; i < info->dst.box.depth; i++) {
677 
678       emit_blit_src(ring, info, sbox->z + i, nr_samples, sample_0);
679       emit_blit_dst(ring, info->dst.resource, info->dst.format, info->dst.level,
680                     dbox->z + i);
681 
682       /*
683        * Blit command:
684        */
685       OUT_PKT7(ring, CP_EVENT_WRITE, 1);
686       OUT_RING(ring, 0x3f);
687       OUT_WFI5(ring);
688 
689       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
690       OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_UNKNOWN_8E04_blit);
691 
692       OUT_PKT7(ring, CP_BLIT, 1);
693       OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
694 
695       OUT_WFI5(ring);
696 
697       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
698       OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
699    }
700 }
701 
702 static void
emit_clear_color(struct fd_ringbuffer * ring,enum pipe_format pfmt,union pipe_color_union * color)703 emit_clear_color(struct fd_ringbuffer *ring, enum pipe_format pfmt,
704                  union pipe_color_union *color)
705 {
706    switch (pfmt) {
707    case PIPE_FORMAT_Z24X8_UNORM:
708    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
709    case PIPE_FORMAT_X24S8_UINT: {
710       uint32_t depth_unorm24 = color->f[0] * ((1u << 24) - 1);
711       uint8_t stencil = color->ui[1];
712       color->ui[0] = depth_unorm24 & 0xff;
713       color->ui[1] = (depth_unorm24 >> 8) & 0xff;
714       color->ui[2] = (depth_unorm24 >> 16) & 0xff;
715       color->ui[3] = stencil;
716       break;
717    }
718    default:
719       break;
720    }
721 
722    OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
723    switch (fd6_ifmt(fd6_color_format(pfmt, TILE6_LINEAR))) {
724    case R2D_UNORM8:
725    case R2D_UNORM8_SRGB:
726       /* The r2d ifmt is badly named, it also covers the signed case: */
727       if (util_format_is_snorm(pfmt)) {
728          OUT_RING(ring, float_to_byte_tex(color->f[0]));
729          OUT_RING(ring, float_to_byte_tex(color->f[1]));
730          OUT_RING(ring, float_to_byte_tex(color->f[2]));
731          OUT_RING(ring, float_to_byte_tex(color->f[3]));
732       } else {
733          OUT_RING(ring, float_to_ubyte(color->f[0]));
734          OUT_RING(ring, float_to_ubyte(color->f[1]));
735          OUT_RING(ring, float_to_ubyte(color->f[2]));
736          OUT_RING(ring, float_to_ubyte(color->f[3]));
737       }
738       break;
739    case R2D_FLOAT16:
740       OUT_RING(ring, _mesa_float_to_half(color->f[0]));
741       OUT_RING(ring, _mesa_float_to_half(color->f[1]));
742       OUT_RING(ring, _mesa_float_to_half(color->f[2]));
743       OUT_RING(ring, _mesa_float_to_half(color->f[3]));
744       break;
745    case R2D_FLOAT32:
746    case R2D_INT32:
747    case R2D_INT16:
748    case R2D_INT8:
749    default:
750       OUT_RING(ring, color->ui[0]);
751       OUT_RING(ring, color->ui[1]);
752       OUT_RING(ring, color->ui[2]);
753       OUT_RING(ring, color->ui[3]);
754       break;
755    }
756 }
757 
758 /**
759  * Handle conversion of clear color
760  */
761 static union pipe_color_union
convert_color(enum pipe_format format,union pipe_color_union * pcolor)762 convert_color(enum pipe_format format, union pipe_color_union *pcolor)
763 {
764    union pipe_color_union color = *pcolor;
765 
766    /* For solid-fill blits, the hw isn't going to convert from
767     * linear to srgb for us:
768     */
769    if (util_format_is_srgb(format)) {
770       for (int i = 0; i < 3; i++)
771          color.f[i] = util_format_linear_to_srgb_float(color.f[i]);
772    }
773 
774    if (util_format_is_snorm(format)) {
775       for (int i = 0; i < 3; i++)
776          color.f[i] = CLAMP(color.f[i], -1.0f, 1.0f);
777    }
778 
779    /* Note that float_to_ubyte() already clamps, for the unorm case */
780 
781    return color;
782 }
783 
784 void
fd6_clear_surface(struct fd_context * ctx,struct fd_ringbuffer * ring,struct pipe_surface * psurf,uint32_t width,uint32_t height,union pipe_color_union * color,uint32_t unknown_8c01)785 fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring,
786                   struct pipe_surface *psurf, uint32_t width, uint32_t height,
787                   union pipe_color_union *color, uint32_t unknown_8c01)
788 {
789    if (DEBUG_BLIT) {
790       fprintf(stderr, "surface clear:\ndst resource: ");
791       util_dump_resource(stderr, psurf->texture);
792       fprintf(stderr, "\n");
793    }
794 
795    uint32_t nr_samples = fd_resource_nr_samples(psurf->texture);
796    OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
797    OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
798    OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(width * nr_samples - 1) |
799                      A6XX_GRAS_2D_DST_BR_Y(height - 1));
800 
801    union pipe_color_union clear_color = convert_color(psurf->format, color);
802 
803    emit_clear_color(ring, psurf->format, &clear_color);
804    emit_blit_setup(ring, psurf->format, false, &clear_color, unknown_8c01);
805 
806    for (unsigned i = psurf->u.tex.first_layer; i <= psurf->u.tex.last_layer;
807         i++) {
808       emit_blit_dst(ring, psurf->texture, psurf->format, psurf->u.tex.level, i);
809 
810       /*
811        * Blit command:
812        */
813       OUT_PKT7(ring, CP_EVENT_WRITE, 1);
814       OUT_RING(ring, 0x3f);
815       OUT_WFI5(ring);
816 
817       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
818       OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_UNKNOWN_8E04_blit);
819 
820       OUT_PKT7(ring, CP_BLIT, 1);
821       OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
822 
823       OUT_WFI5(ring);
824 
825       OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
826       OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
827    }
828 }
829 
830 void
fd6_resolve_tile(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,uint32_t unknown_8c01)831 fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring,
832                  uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01)
833 {
834    const struct fd_gmem_stateobj *gmem = batch->gmem_state;
835    uint64_t gmem_base = batch->ctx->screen->gmem_base + base;
836    uint32_t gmem_pitch = gmem->bin_w * batch->framebuffer.samples *
837                          util_format_get_blocksize(psurf->format);
838 
839    OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
840    OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
841    OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(psurf->width - 1) |
842                      A6XX_GRAS_2D_DST_BR_Y(psurf->height - 1));
843 
844    OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
845    OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(0));
846    OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(psurf->width - 1));
847    OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(0));
848    OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(psurf->height - 1));
849 
850    /* Enable scissor bit, which will take into account the window scissor
851     * which is set per-tile
852     */
853    emit_blit_setup(ring, psurf->format, true, NULL, unknown_8c01);
854 
855    /* We shouldn't be using GMEM in the layered rendering case: */
856    assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
857 
858    emit_blit_dst(ring, psurf->texture, psurf->format, psurf->u.tex.level,
859                  psurf->u.tex.first_layer);
860 
861    enum a6xx_format sfmt = fd6_color_format(psurf->format, TILE6_LINEAR);
862    enum a3xx_msaa_samples samples = fd_msaa_samples(batch->framebuffer.samples);
863 
864    OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
865    OUT_RING(ring,
866             A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
867             A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_2) |
868             A6XX_SP_PS_2D_SRC_INFO_SAMPLES(samples) |
869             COND(samples > MSAA_ONE, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
870             COND(util_format_is_srgb(psurf->format), A6XX_SP_PS_2D_SRC_INFO_SRGB) |
871             A6XX_SP_PS_2D_SRC_INFO_UNK20 | A6XX_SP_PS_2D_SRC_INFO_UNK22);
872    OUT_RING(ring, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(psurf->width) |
873                   A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(psurf->height));
874    OUT_RING(ring, gmem_base);       /* SP_PS_2D_SRC_LO */
875    OUT_RING(ring, gmem_base >> 32); /* SP_PS_2D_SRC_HI */
876    OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(gmem_pitch));
877    OUT_RING(ring, 0x00000000);
878    OUT_RING(ring, 0x00000000);
879    OUT_RING(ring, 0x00000000);
880    OUT_RING(ring, 0x00000000);
881    OUT_RING(ring, 0x00000000);
882 
883    /* sync GMEM writes with CACHE. */
884    fd6_cache_inv(batch, ring);
885 
886    /* Wait for CACHE_INVALIDATE to land */
887    fd_wfi(batch, ring);
888 
889    OUT_PKT7(ring, CP_BLIT, 1);
890    OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
891 
892    OUT_WFI5(ring);
893 
894    /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
895     * sysmem, and we generally assume that GMEM renderpasses leave their
896     * results in sysmem, so we need to flush manually here.
897     */
898    fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
899    fd_wfi(batch, ring);
900 }
901 
902 static bool
handle_rgba_blit(struct fd_context * ctx,const struct pipe_blit_info * info,bool sample_0)903 handle_rgba_blit(struct fd_context *ctx,
904                  const struct pipe_blit_info *info, bool sample_0) assert_dt
905 {
906    struct fd_batch *batch;
907 
908    debug_assert(!(info->mask & PIPE_MASK_ZS));
909 
910    if (!can_do_blit(info))
911       return false;
912 
913    struct fd_resource *src = fd_resource(info->src.resource);
914    struct fd_resource *dst = fd_resource(info->dst.resource);
915 
916    fd6_validate_format(ctx, src, info->src.format);
917    fd6_validate_format(ctx, dst, info->dst.format);
918 
919    batch = fd_bc_alloc_batch(ctx, true);
920 
921    fd_screen_lock(ctx->screen);
922 
923    fd_batch_resource_read(batch, src);
924    fd_batch_resource_write(batch, dst);
925 
926    fd_screen_unlock(ctx->screen);
927 
928    ASSERTED bool ret = fd_batch_lock_submit(batch);
929    assert(ret);
930 
931    /* Marking the batch as needing flush must come after the batch
932     * dependency tracking (resource_read()/resource_write()), as that
933     * can trigger a flush
934     */
935    fd_batch_needs_flush(batch);
936 
937    fd_batch_update_queries(batch);
938 
939    emit_setup(batch);
940 
941    DBG_BLIT(info, batch);
942 
943    trace_start_blit(&batch->trace, batch->draw, info->src.resource->target,
944                     info->dst.resource->target);
945 
946    if ((info->src.resource->target == PIPE_BUFFER) &&
947        (info->dst.resource->target == PIPE_BUFFER)) {
948       assert(src->layout.tile_mode == TILE6_LINEAR);
949       assert(dst->layout.tile_mode == TILE6_LINEAR);
950       emit_blit_buffer(ctx, batch->draw, info);
951    } else {
952       /* I don't *think* we need to handle blits between buffer <-> !buffer */
953       debug_assert(info->src.resource->target != PIPE_BUFFER);
954       debug_assert(info->dst.resource->target != PIPE_BUFFER);
955       emit_blit_texture(ctx, batch->draw, info, sample_0);
956    }
957 
958    trace_end_blit(&batch->trace, batch->draw);
959 
960    fd6_event_write(batch, batch->draw, PC_CCU_FLUSH_COLOR_TS, true);
961    fd6_event_write(batch, batch->draw, PC_CCU_FLUSH_DEPTH_TS, true);
962    fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
963    fd_wfi(batch, batch->draw);
964    fd6_cache_inv(batch, batch->draw);
965 
966    fd_batch_unlock_submit(batch);
967 
968    fd_batch_flush(batch);
969    fd_batch_reference(&batch, NULL);
970 
971    /* Acc query state will have been dirtied by our fd_batch_update_queries, so
972     * the ctx->batch may need to turn its queries back on.
973     */
974    ctx->update_active_queries = true;
975 
976    return true;
977 }
978 
979 /**
980  * Re-written z/s blits can still fail for various reasons (for example MSAA).
981  * But we want to do the fallback blit with the re-written pipe_blit_info,
982  * in particular as u_blitter cannot blit stencil.  So handle the fallback
983  * ourself and never "fail".
984  */
985 static bool
do_rewritten_blit(struct fd_context * ctx,const struct pipe_blit_info * info,bool sample_0)986 do_rewritten_blit(struct fd_context *ctx,
987                   const struct pipe_blit_info *info, bool sample_0) assert_dt
988 {
989    bool success = handle_rgba_blit(ctx, info, sample_0);
990    if (!success) {
991       if (sample_0 && !util_format_is_pure_integer(info->src.format))
992          mesa_logw("sample averaging on fallback blit when we shouldn't.");
993       success = fd_blitter_blit(ctx, info);
994    }
995    debug_assert(success); /* fallback should never fail! */
996    return success;
997 }
998 
999 /**
1000  * Handle depth/stencil blits either via u_blitter and/or re-writing the
1001  * blit into an equivilant format that we can handle
1002  */
1003 static bool
handle_zs_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1004 handle_zs_blit(struct fd_context *ctx,
1005                const struct pipe_blit_info *info) assert_dt
1006 {
1007    struct pipe_blit_info blit = *info;
1008 
1009    if (DEBUG_BLIT) {
1010       fprintf(stderr, "---- handle_zs_blit: ");
1011       dump_blit_info(info);
1012    }
1013 
1014    if (info->src.format != info->dst.format)
1015       return false;
1016 
1017    struct fd_resource *src = fd_resource(info->src.resource);
1018    struct fd_resource *dst = fd_resource(info->dst.resource);
1019 
1020    switch (info->dst.format) {
1021    case PIPE_FORMAT_S8_UINT:
1022       debug_assert(info->mask == PIPE_MASK_S);
1023       blit.mask = PIPE_MASK_R;
1024       blit.src.format = PIPE_FORMAT_R8_UINT;
1025       blit.dst.format = PIPE_FORMAT_R8_UINT;
1026       return do_rewritten_blit(ctx, &blit, true);
1027 
1028    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1029       if (info->mask & PIPE_MASK_Z) {
1030          blit.mask = PIPE_MASK_R;
1031          blit.src.format = PIPE_FORMAT_R32_FLOAT;
1032          blit.dst.format = PIPE_FORMAT_R32_FLOAT;
1033          do_rewritten_blit(ctx, &blit, true);
1034       }
1035 
1036       if (info->mask & PIPE_MASK_S) {
1037          blit.mask = PIPE_MASK_R;
1038          blit.src.format = PIPE_FORMAT_R8_UINT;
1039          blit.dst.format = PIPE_FORMAT_R8_UINT;
1040          blit.src.resource = &src->stencil->b.b;
1041          blit.dst.resource = &dst->stencil->b.b;
1042          do_rewritten_blit(ctx, &blit, true);
1043       }
1044 
1045       return true;
1046 
1047    case PIPE_FORMAT_Z16_UNORM:
1048       blit.mask = PIPE_MASK_R;
1049       blit.src.format = PIPE_FORMAT_R16_UNORM;
1050       blit.dst.format = PIPE_FORMAT_R16_UNORM;
1051       return do_rewritten_blit(ctx, &blit, true);
1052 
1053    case PIPE_FORMAT_Z32_UNORM:
1054    case PIPE_FORMAT_Z32_FLOAT:
1055       debug_assert(info->mask == PIPE_MASK_Z);
1056       blit.mask = PIPE_MASK_R;
1057       blit.src.format = PIPE_FORMAT_R32_UINT;
1058       blit.dst.format = PIPE_FORMAT_R32_UINT;
1059       return do_rewritten_blit(ctx, &blit, true);
1060 
1061    case PIPE_FORMAT_Z24X8_UNORM:
1062    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1063       blit.mask = 0;
1064       if (info->mask & PIPE_MASK_Z)
1065          blit.mask |= PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B;
1066       if (info->mask & PIPE_MASK_S)
1067          blit.mask |= PIPE_MASK_A;
1068       blit.src.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
1069       blit.dst.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
1070       /* non-UBWC Z24_UNORM_S8_UINT_AS_R8G8B8A8 is broken on a630, fall back to
1071        * 8888_unorm.
1072        */
1073       if (!ctx->screen->info->a6xx.has_z24uint_s8uint) {
1074          if (!src->layout.ubwc && !dst->layout.ubwc) {
1075             blit.src.format = PIPE_FORMAT_RGBA8888_UINT;
1076             blit.dst.format = PIPE_FORMAT_RGBA8888_UINT;
1077          } else {
1078             if (!src->layout.ubwc)
1079                blit.src.format = PIPE_FORMAT_RGBA8888_UNORM;
1080             if (!dst->layout.ubwc)
1081                blit.dst.format = PIPE_FORMAT_RGBA8888_UNORM;
1082          }
1083       }
1084       if (info->src.resource->nr_samples > 1 && blit.src.format != PIPE_FORMAT_RGBA8888_UINT)
1085          mesa_logw("sample averaging on fallback z24s8 blit when we shouldn't.");
1086       return fd_blitter_blit(ctx, &blit);
1087 
1088    default:
1089       return false;
1090    }
1091 }
1092 
1093 static bool
handle_compressed_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1094 handle_compressed_blit(struct fd_context *ctx,
1095                        const struct pipe_blit_info *info) assert_dt
1096 {
1097    struct pipe_blit_info blit = *info;
1098 
1099    if (DEBUG_BLIT) {
1100       fprintf(stderr, "---- handle_compressed_blit: ");
1101       dump_blit_info(info);
1102    }
1103 
1104    if (info->src.format != info->dst.format)
1105       return fd_blitter_blit(ctx, info);
1106 
1107    if (util_format_get_blocksize(info->src.format) == 8) {
1108       blit.src.format = blit.dst.format = PIPE_FORMAT_R16G16B16A16_UINT;
1109    } else {
1110       debug_assert(util_format_get_blocksize(info->src.format) == 16);
1111       blit.src.format = blit.dst.format = PIPE_FORMAT_R32G32B32A32_UINT;
1112    }
1113 
1114    int bw = util_format_get_blockwidth(info->src.format);
1115    int bh = util_format_get_blockheight(info->src.format);
1116 
1117    /* NOTE: x/y *must* be aligned to block boundary (ie. in
1118     * glCompressedTexSubImage2D()) but width/height may not
1119     * be:
1120     */
1121 
1122    debug_assert((blit.src.box.x % bw) == 0);
1123    debug_assert((blit.src.box.y % bh) == 0);
1124 
1125    blit.src.box.x /= bw;
1126    blit.src.box.y /= bh;
1127    blit.src.box.width = DIV_ROUND_UP(blit.src.box.width, bw);
1128    blit.src.box.height = DIV_ROUND_UP(blit.src.box.height, bh);
1129 
1130    debug_assert((blit.dst.box.x % bw) == 0);
1131    debug_assert((blit.dst.box.y % bh) == 0);
1132 
1133    blit.dst.box.x /= bw;
1134    blit.dst.box.y /= bh;
1135    blit.dst.box.width = DIV_ROUND_UP(blit.dst.box.width, bw);
1136    blit.dst.box.height = DIV_ROUND_UP(blit.dst.box.height, bh);
1137 
1138    return do_rewritten_blit(ctx, &blit, false);
1139 }
1140 
1141 /**
1142  * For SNORM formats, copy them as the equivalent UNORM format.  If we treat
1143  * them as snorm then the 0x80 (-1.0 snorm8) value will get clamped to 0x81
1144  * (also -1.0), when we're supposed to be memcpying the bits. See
1145  * https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/2917 for discussion.
1146  */
1147 static bool
handle_snorm_copy_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1148 handle_snorm_copy_blit(struct fd_context *ctx,
1149                        const struct pipe_blit_info *info)
1150    assert_dt
1151 {
1152    /* If we're interpolating the pixels, we can't just treat the values as unorm. */
1153    if (info->filter == PIPE_TEX_FILTER_LINEAR)
1154       return false;
1155 
1156    struct pipe_blit_info blit = *info;
1157 
1158    blit.src.format = blit.dst.format = util_format_snorm_to_unorm(info->src.format);
1159 
1160    return do_rewritten_blit(ctx, &blit, false);
1161 }
1162 
1163 static bool
fd6_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1164 fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info) assert_dt
1165 {
1166    if (info->mask & PIPE_MASK_ZS)
1167       return handle_zs_blit(ctx, info);
1168 
1169    if (util_format_is_compressed(info->src.format) ||
1170        util_format_is_compressed(info->dst.format))
1171       return handle_compressed_blit(ctx, info);
1172 
1173    if ((info->src.format == info->dst.format) &&
1174        util_format_is_snorm(info->src.format))
1175       return handle_snorm_copy_blit(ctx, info);
1176 
1177    return handle_rgba_blit(ctx, info, false);
1178 }
1179 
1180 void
fd6_blitter_init(struct pipe_context * pctx)1181 fd6_blitter_init(struct pipe_context *pctx) disable_thread_safety_analysis
1182 {
1183    struct fd_context *ctx = fd_context(pctx);
1184 
1185    ctx->clear_ubwc = fd6_clear_ubwc;
1186    ctx->validate_format = fd6_validate_format;
1187 
1188    if (FD_DBG(NOBLIT))
1189       return;
1190 
1191    ctx->blit = fd6_blit;
1192 }
1193 
1194 unsigned
fd6_tile_mode(const struct pipe_resource * tmpl)1195 fd6_tile_mode(const struct pipe_resource *tmpl)
1196 {
1197    /* if the mipmap level 0 is still too small to be tiled, then don't
1198     * bother pretending:
1199     */
1200    if (fd_resource_level_linear(tmpl, 0))
1201       return TILE6_LINEAR;
1202 
1203    /* basically just has to be a format we can blit, so uploads/downloads
1204     * via linear staging buffer works:
1205     */
1206    if (ok_format(tmpl->format))
1207       return TILE6_3;
1208 
1209    return TILE6_LINEAR;
1210 }
1211