1 /*
2 * Copyright (C) 2017 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Rob Clark <robclark@freedesktop.org>
26 */
27
28 #include "util/format_srgb.h"
29 #include "util/half_float.h"
30 #include "util/u_dump.h"
31 #include "util/u_log.h"
32
33 #include "freedreno_blitter.h"
34 #include "freedreno_fence.h"
35 #include "freedreno_resource.h"
36 #include "freedreno_tracepoints.h"
37
38 #include "fd6_blitter.h"
39 #include "fd6_emit.h"
40 #include "fd6_format.h"
41 #include "fd6_resource.h"
42
43 static inline enum a6xx_2d_ifmt
fd6_ifmt(enum a6xx_format fmt)44 fd6_ifmt(enum a6xx_format fmt)
45 {
46 switch (fmt) {
47 case FMT6_A8_UNORM:
48 case FMT6_8_UNORM:
49 case FMT6_8_SNORM:
50 case FMT6_8_8_UNORM:
51 case FMT6_8_8_SNORM:
52 case FMT6_8_8_8_8_UNORM:
53 case FMT6_8_8_8_X8_UNORM:
54 case FMT6_8_8_8_8_SNORM:
55 case FMT6_4_4_4_4_UNORM:
56 case FMT6_5_5_5_1_UNORM:
57 case FMT6_5_6_5_UNORM:
58 return R2D_UNORM8;
59
60 case FMT6_32_UINT:
61 case FMT6_32_SINT:
62 case FMT6_32_32_UINT:
63 case FMT6_32_32_SINT:
64 case FMT6_32_32_32_32_UINT:
65 case FMT6_32_32_32_32_SINT:
66 return R2D_INT32;
67
68 case FMT6_16_UINT:
69 case FMT6_16_SINT:
70 case FMT6_16_16_UINT:
71 case FMT6_16_16_SINT:
72 case FMT6_16_16_16_16_UINT:
73 case FMT6_16_16_16_16_SINT:
74 case FMT6_10_10_10_2_UINT:
75 return R2D_INT16;
76
77 case FMT6_8_UINT:
78 case FMT6_8_SINT:
79 case FMT6_8_8_UINT:
80 case FMT6_8_8_SINT:
81 case FMT6_8_8_8_8_UINT:
82 case FMT6_8_8_8_8_SINT:
83 case FMT6_Z24_UNORM_S8_UINT:
84 case FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8:
85 return R2D_INT8;
86
87 case FMT6_16_UNORM:
88 case FMT6_16_SNORM:
89 case FMT6_16_16_UNORM:
90 case FMT6_16_16_SNORM:
91 case FMT6_16_16_16_16_UNORM:
92 case FMT6_16_16_16_16_SNORM:
93 case FMT6_32_FLOAT:
94 case FMT6_32_32_FLOAT:
95 case FMT6_32_32_32_32_FLOAT:
96 return R2D_FLOAT32;
97
98 case FMT6_16_FLOAT:
99 case FMT6_16_16_FLOAT:
100 case FMT6_16_16_16_16_FLOAT:
101 case FMT6_11_11_10_FLOAT:
102 case FMT6_10_10_10_2_UNORM_DEST:
103 return R2D_FLOAT16;
104
105 default:
106 unreachable("bad format");
107 return 0;
108 }
109 }
110
111 /* Make sure none of the requested dimensions extend beyond the size of the
112 * resource. Not entirely sure why this happens, but sometimes it does, and
113 * w/ 2d blt doesn't have wrap modes like a sampler, so force those cases
114 * back to u_blitter
115 */
116 static bool
ok_dims(const struct pipe_resource * r,const struct pipe_box * b,int lvl)117 ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl)
118 {
119 int last_layer =
120 r->target == PIPE_TEXTURE_3D ? u_minify(r->depth0, lvl) : r->array_size;
121
122 return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) &&
123 (b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) &&
124 (b->z >= 0) && (b->z + b->depth <= last_layer);
125 }
126
127 static bool
ok_format(enum pipe_format pfmt)128 ok_format(enum pipe_format pfmt)
129 {
130 enum a6xx_format fmt = fd6_color_format(pfmt, TILE6_LINEAR);
131
132 if (util_format_is_compressed(pfmt))
133 return true;
134
135 switch (pfmt) {
136 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
137 case PIPE_FORMAT_Z24X8_UNORM:
138 case PIPE_FORMAT_Z16_UNORM:
139 case PIPE_FORMAT_Z32_UNORM:
140 case PIPE_FORMAT_Z32_FLOAT:
141 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
142 case PIPE_FORMAT_S8_UINT:
143 return true;
144 default:
145 break;
146 }
147
148 if (fmt == FMT6_NONE)
149 return false;
150
151 return true;
152 }
153
154 #define DEBUG_BLIT 0
155 #define DEBUG_BLIT_FALLBACK 0
156
157 #define fail_if(cond) \
158 do { \
159 if (cond) { \
160 if (DEBUG_BLIT_FALLBACK) { \
161 fprintf(stderr, "falling back: %s for blit:\n", #cond); \
162 dump_blit_info(info); \
163 } \
164 return false; \
165 } \
166 } while (0)
167
168 static bool
is_ubwc(struct pipe_resource * prsc,unsigned level)169 is_ubwc(struct pipe_resource *prsc, unsigned level)
170 {
171 return fd_resource_ubwc_enabled(fd_resource(prsc), level);
172 }
173
174 static void
dump_blit_info(const struct pipe_blit_info * info)175 dump_blit_info(const struct pipe_blit_info *info)
176 {
177 util_dump_blit_info(stderr, info);
178 fprintf(stderr, "\ndst resource: ");
179 util_dump_resource(stderr, info->dst.resource);
180 if (is_ubwc(info->dst.resource, info->dst.level))
181 fprintf(stderr, " (ubwc)");
182 fprintf(stderr, "\nsrc resource: ");
183 util_dump_resource(stderr, info->src.resource);
184 if (is_ubwc(info->src.resource, info->src.level))
185 fprintf(stderr, " (ubwc)");
186 fprintf(stderr, "\n");
187 }
188
189 static bool
can_do_blit(const struct pipe_blit_info * info)190 can_do_blit(const struct pipe_blit_info *info)
191 {
192 /* I think we can do scaling, but not in z dimension since that would
193 * require blending..
194 */
195 fail_if(info->dst.box.depth != info->src.box.depth);
196
197 /* Fail if unsupported format: */
198 fail_if(!ok_format(info->src.format));
199 fail_if(!ok_format(info->dst.format));
200
201 debug_assert(!util_format_is_compressed(info->src.format));
202 debug_assert(!util_format_is_compressed(info->dst.format));
203
204 fail_if(!ok_dims(info->src.resource, &info->src.box, info->src.level));
205
206 fail_if(!ok_dims(info->dst.resource, &info->dst.box, info->dst.level));
207
208 debug_assert(info->dst.box.width >= 0);
209 debug_assert(info->dst.box.height >= 0);
210 debug_assert(info->dst.box.depth >= 0);
211
212 fail_if(info->dst.resource->nr_samples > 1);
213
214 fail_if(info->window_rectangle_include);
215
216 const struct util_format_description *src_desc =
217 util_format_description(info->src.format);
218 const struct util_format_description *dst_desc =
219 util_format_description(info->dst.format);
220 const int common_channels =
221 MIN2(src_desc->nr_channels, dst_desc->nr_channels);
222
223 if (info->mask & PIPE_MASK_RGBA) {
224 for (int i = 0; i < common_channels; i++) {
225 fail_if(memcmp(&src_desc->channel[i], &dst_desc->channel[i],
226 sizeof(src_desc->channel[0])));
227 }
228 }
229
230 fail_if(info->alpha_blend);
231
232 return true;
233 }
234
235 static void
emit_setup(struct fd_batch * batch)236 emit_setup(struct fd_batch *batch)
237 {
238 struct fd_ringbuffer *ring = batch->draw;
239 struct fd_screen *screen = batch->ctx->screen;
240
241 fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
242 fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
243 fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false);
244 fd6_event_write(batch, ring, PC_CCU_INVALIDATE_DEPTH, false);
245
246 /* normal BLIT_OP_SCALE operation needs bypass RB_CCU_CNTL */
247 OUT_WFI5(ring);
248 OUT_PKT4(ring, REG_A6XX_RB_CCU_CNTL, 1);
249 OUT_RING(ring, A6XX_RB_CCU_CNTL_COLOR_OFFSET(screen->ccu_offset_bypass));
250 }
251
252 static void
emit_blit_setup(struct fd_ringbuffer * ring,enum pipe_format pfmt,bool scissor_enable,union pipe_color_union * color,uint32_t unknown_8c01)253 emit_blit_setup(struct fd_ringbuffer *ring, enum pipe_format pfmt,
254 bool scissor_enable, union pipe_color_union *color,
255 uint32_t unknown_8c01)
256 {
257 enum a6xx_format fmt = fd6_color_format(pfmt, TILE6_LINEAR);
258 bool is_srgb = util_format_is_srgb(pfmt);
259 enum a6xx_2d_ifmt ifmt = fd6_ifmt(fmt);
260
261 if (is_srgb) {
262 assert(ifmt == R2D_UNORM8);
263 ifmt = R2D_UNORM8_SRGB;
264 }
265
266 uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL_MASK(0xf) |
267 A6XX_RB_2D_BLIT_CNTL_COLOR_FORMAT(fmt) |
268 A6XX_RB_2D_BLIT_CNTL_IFMT(ifmt) |
269 COND(color, A6XX_RB_2D_BLIT_CNTL_SOLID_COLOR) |
270 COND(scissor_enable, A6XX_RB_2D_BLIT_CNTL_SCISSOR);
271
272 OUT_PKT4(ring, REG_A6XX_RB_2D_BLIT_CNTL, 1);
273 OUT_RING(ring, blit_cntl);
274
275 OUT_PKT4(ring, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
276 OUT_RING(ring, blit_cntl);
277
278 if (fmt == FMT6_10_10_10_2_UNORM_DEST)
279 fmt = FMT6_16_16_16_16_FLOAT;
280
281 /* This register is probably badly named... it seems that it's
282 * controlling the internal/accumulator format or something like
283 * that. It's certainly not tied to only the src format.
284 */
285 OUT_PKT4(ring, REG_A6XX_SP_2D_DST_FORMAT, 1);
286 OUT_RING(
287 ring,
288 A6XX_SP_2D_DST_FORMAT_COLOR_FORMAT(fmt) |
289 COND(util_format_is_pure_sint(pfmt), A6XX_SP_2D_DST_FORMAT_SINT) |
290 COND(util_format_is_pure_uint(pfmt), A6XX_SP_2D_DST_FORMAT_UINT) |
291 COND(is_srgb, A6XX_SP_2D_DST_FORMAT_SRGB) |
292 A6XX_SP_2D_DST_FORMAT_MASK(0xf));
293
294 OUT_PKT4(ring, REG_A6XX_RB_2D_UNKNOWN_8C01, 1);
295 OUT_RING(ring, unknown_8c01);
296 }
297
298 /* buffers need to be handled specially since x/width can exceed the bounds
299 * supported by hw.. if necessary decompose into (potentially) two 2D blits
300 */
301 static void
emit_blit_buffer(struct fd_context * ctx,struct fd_ringbuffer * ring,const struct pipe_blit_info * info)302 emit_blit_buffer(struct fd_context *ctx, struct fd_ringbuffer *ring,
303 const struct pipe_blit_info *info)
304 {
305 const struct pipe_box *sbox = &info->src.box;
306 const struct pipe_box *dbox = &info->dst.box;
307 struct fd_resource *src, *dst;
308 unsigned sshift, dshift;
309
310 if (DEBUG_BLIT) {
311 fprintf(stderr, "buffer blit: ");
312 dump_blit_info(info);
313 }
314
315 src = fd_resource(info->src.resource);
316 dst = fd_resource(info->dst.resource);
317
318 debug_assert(src->layout.cpp == 1);
319 debug_assert(dst->layout.cpp == 1);
320 debug_assert(info->src.resource->format == info->dst.resource->format);
321 debug_assert((sbox->y == 0) && (sbox->height == 1));
322 debug_assert((dbox->y == 0) && (dbox->height == 1));
323 debug_assert((sbox->z == 0) && (sbox->depth == 1));
324 debug_assert((dbox->z == 0) && (dbox->depth == 1));
325 debug_assert(sbox->width == dbox->width);
326 debug_assert(info->src.level == 0);
327 debug_assert(info->dst.level == 0);
328
329 /*
330 * Buffers can have dimensions bigger than max width, remap into
331 * multiple 1d blits to fit within max dimension
332 *
333 * Note that blob uses .ARRAY_PITCH=128 for blitting buffers, which
334 * seems to prevent overfetch related faults. Not quite sure what
335 * the deal is there.
336 *
337 * Low 6 bits of SRC/DST addresses need to be zero (ie. address
338 * aligned to 64) so we need to shift src/dst x1/x2 to make up the
339 * difference. On top of already splitting up the blit so width
340 * isn't > 16k.
341 *
342 * We perhaps could do a bit better, if src and dst are aligned but
343 * in the worst case this means we have to split the copy up into
344 * 16k (0x4000) minus 64 (0x40).
345 */
346
347 sshift = sbox->x & 0x3f;
348 dshift = dbox->x & 0x3f;
349
350 emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, NULL, 0);
351
352 for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {
353 unsigned soff, doff, w, p;
354
355 soff = (sbox->x + off) & ~0x3f;
356 doff = (dbox->x + off) & ~0x3f;
357
358 w = MIN2(sbox->width - off, (0x4000 - 0x40));
359 p = align(w, 64);
360
361 debug_assert((soff + w) <= fd_bo_size(src->bo));
362 debug_assert((doff + w) <= fd_bo_size(dst->bo));
363
364 /*
365 * Emit source:
366 */
367 OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
368 OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(FMT6_8_UNORM) |
369 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_LINEAR) |
370 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(WZYX) | 0x500000);
371 OUT_RING(ring,
372 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(sshift + w) |
373 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(1)); /* SP_PS_2D_SRC_SIZE */
374 OUT_RELOC(ring, src->bo, soff, 0, 0); /* SP_PS_2D_SRC_LO/HI */
375 OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(p));
376
377 OUT_RING(ring, 0x00000000);
378 OUT_RING(ring, 0x00000000);
379 OUT_RING(ring, 0x00000000);
380 OUT_RING(ring, 0x00000000);
381 OUT_RING(ring, 0x00000000);
382
383 /*
384 * Emit destination:
385 */
386 OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
387 OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(FMT6_8_UNORM) |
388 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
389 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
390 OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
391 OUT_RING(ring, A6XX_RB_2D_DST_PITCH(p));
392 OUT_RING(ring, 0x00000000);
393 OUT_RING(ring, 0x00000000);
394 OUT_RING(ring, 0x00000000);
395 OUT_RING(ring, 0x00000000);
396 OUT_RING(ring, 0x00000000);
397
398 /*
399 * Blit command:
400 */
401 OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
402 OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(sshift));
403 OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(sshift + w - 1));
404 OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(0));
405 OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(0));
406
407 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
408 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dshift) | A6XX_GRAS_2D_DST_TL_Y(0));
409 OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dshift + w - 1) |
410 A6XX_GRAS_2D_DST_BR_Y(0));
411
412 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
413 OUT_RING(ring, 0x3f);
414 OUT_WFI5(ring);
415
416 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
417 OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_UNKNOWN_8E04_blit);
418
419 OUT_PKT7(ring, CP_BLIT, 1);
420 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
421
422 OUT_WFI5(ring);
423
424 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
425 OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
426 }
427 }
428
429 static void
fd6_clear_ubwc(struct fd_batch * batch,struct fd_resource * rsc)430 fd6_clear_ubwc(struct fd_batch *batch, struct fd_resource *rsc) assert_dt
431 {
432 struct fd_ringbuffer *ring = fd_batch_get_prologue(batch);
433 union pipe_color_union color = {};
434
435 emit_blit_setup(ring, PIPE_FORMAT_R8_UNORM, false, &color, 0);
436
437 OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 13);
438 OUT_RING(ring, 0x00000000);
439 OUT_RING(ring, 0x00000000);
440 OUT_RING(ring, 0x00000000);
441 OUT_RING(ring, 0x00000000);
442 OUT_RING(ring, 0x00000000);
443 OUT_RING(ring, 0x00000000);
444 OUT_RING(ring, 0x00000000);
445 OUT_RING(ring, 0x00000000);
446 OUT_RING(ring, 0x00000000);
447 OUT_RING(ring, 0x00000000);
448 OUT_RING(ring, 0x00000000);
449 OUT_RING(ring, 0x00000000);
450 OUT_RING(ring, 0x00000000);
451
452 OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
453 OUT_RING(ring, 0x00000000);
454 OUT_RING(ring, 0x00000000);
455 OUT_RING(ring, 0x00000000);
456 OUT_RING(ring, 0x00000000);
457
458 OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
459 OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(0));
460 OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(0));
461 OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(0));
462 OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(0));
463
464 unsigned size = rsc->layout.slices[0].offset;
465 unsigned offset = 0;
466
467 /* We could be more clever here and realize that we could use a
468 * larger width if the size is aligned to something more than a
469 * single page.. or even use a format larger than r8 in those
470 * cases. But for normal sized textures and even up to 16k x 16k
471 * at <= 4byte/pixel, we'll only go thru the loop once
472 */
473 const unsigned w = 0x1000;
474
475 /* ubwc size should always be page aligned: */
476 assert((size % w) == 0);
477
478 while (size > 0) {
479 const unsigned h = MIN2(0x4000, size / w);
480 /* width is already aligned to a suitable pitch: */
481 const unsigned p = w;
482
483 /*
484 * Emit destination:
485 */
486 OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
487 OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(FMT6_8_UNORM) |
488 A6XX_RB_2D_DST_INFO_TILE_MODE(TILE6_LINEAR) |
489 A6XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));
490 OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_2D_DST_LO/HI */
491 OUT_RING(ring, A6XX_RB_2D_DST_PITCH(p));
492 OUT_RING(ring, 0x00000000);
493 OUT_RING(ring, 0x00000000);
494 OUT_RING(ring, 0x00000000);
495 OUT_RING(ring, 0x00000000);
496 OUT_RING(ring, 0x00000000);
497
498 /*
499 * Blit command:
500 */
501
502 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
503 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
504 OUT_RING(ring,
505 A6XX_GRAS_2D_DST_BR_X(w - 1) | A6XX_GRAS_2D_DST_BR_Y(h - 1));
506
507 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
508 OUT_RING(ring, 0x3f);
509 OUT_WFI5(ring);
510
511 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
512 OUT_RING(ring, batch->ctx->screen->info->a6xx.magic.RB_UNKNOWN_8E04_blit);
513
514 OUT_PKT7(ring, CP_BLIT, 1);
515 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
516
517 OUT_WFI5(ring);
518
519 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
520 OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
521
522 offset += w * h;
523 size -= w * h;
524 }
525
526 fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
527 fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true);
528 fd6_event_write(batch, ring, CACHE_FLUSH_TS, true);
529 fd_wfi(batch, ring);
530 fd6_cache_inv(batch, ring);
531 }
532
533 static void
emit_blit_dst(struct fd_ringbuffer * ring,struct pipe_resource * prsc,enum pipe_format pfmt,unsigned level,unsigned layer)534 emit_blit_dst(struct fd_ringbuffer *ring, struct pipe_resource *prsc,
535 enum pipe_format pfmt, unsigned level, unsigned layer)
536 {
537 struct fd_resource *dst = fd_resource(prsc);
538 enum a6xx_format fmt = fd6_color_format(pfmt, dst->layout.tile_mode);
539 enum a6xx_tile_mode tile = fd_resource_tile_mode(prsc, level);
540 enum a3xx_color_swap swap = fd6_color_swap(pfmt, dst->layout.tile_mode);
541 uint32_t pitch = fd_resource_pitch(dst, level);
542 bool ubwc_enabled = fd_resource_ubwc_enabled(dst, level);
543 unsigned off = fd_resource_offset(dst, level, layer);
544
545 if (fmt == FMT6_Z24_UNORM_S8_UINT)
546 fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
547
548 OUT_PKT4(ring, REG_A6XX_RB_2D_DST_INFO, 9);
549 OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(fmt) |
550 A6XX_RB_2D_DST_INFO_TILE_MODE(tile) |
551 A6XX_RB_2D_DST_INFO_COLOR_SWAP(swap) |
552 COND(util_format_is_srgb(pfmt), A6XX_RB_2D_DST_INFO_SRGB) |
553 COND(ubwc_enabled, A6XX_RB_2D_DST_INFO_FLAGS));
554 OUT_RELOC(ring, dst->bo, off, 0, 0); /* RB_2D_DST_LO/HI */
555 OUT_RING(ring, A6XX_RB_2D_DST_PITCH(pitch));
556 OUT_RING(ring, 0x00000000);
557 OUT_RING(ring, 0x00000000);
558 OUT_RING(ring, 0x00000000);
559 OUT_RING(ring, 0x00000000);
560 OUT_RING(ring, 0x00000000);
561
562 if (ubwc_enabled) {
563 OUT_PKT4(ring, REG_A6XX_RB_2D_DST_FLAGS, 6);
564 fd6_emit_flag_reference(ring, dst, level, layer);
565 OUT_RING(ring, 0x00000000);
566 OUT_RING(ring, 0x00000000);
567 OUT_RING(ring, 0x00000000);
568 }
569 }
570
571 static void
emit_blit_src(struct fd_ringbuffer * ring,const struct pipe_blit_info * info,unsigned layer,unsigned nr_samples,bool sample_0)572 emit_blit_src(struct fd_ringbuffer *ring, const struct pipe_blit_info *info,
573 unsigned layer, unsigned nr_samples, bool sample_0)
574 {
575 struct fd_resource *src = fd_resource(info->src.resource);
576 enum a6xx_format sfmt = fd6_texture_format(info->src.format, src->layout.tile_mode);
577 enum a6xx_tile_mode stile =
578 fd_resource_tile_mode(info->src.resource, info->src.level);
579 enum a3xx_color_swap sswap = fd6_texture_swap(info->src.format, src->layout.tile_mode);
580 uint32_t pitch = fd_resource_pitch(src, info->src.level);
581 bool subwc_enabled = fd_resource_ubwc_enabled(src, info->src.level);
582 unsigned soff = fd_resource_offset(src, info->src.level, layer);
583 uint32_t width = u_minify(src->b.b.width0, info->src.level) * nr_samples;
584 uint32_t height = u_minify(src->b.b.height0, info->src.level);
585 uint32_t filter = 0;
586
587 if (info->filter == PIPE_TEX_FILTER_LINEAR)
588 filter = A6XX_SP_PS_2D_SRC_INFO_FILTER;
589
590 enum a3xx_msaa_samples samples = fd_msaa_samples(src->b.b.nr_samples);
591
592 if (info->src.format == PIPE_FORMAT_A8_UNORM)
593 sfmt = FMT6_A8_UNORM;
594
595 OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
596 OUT_RING(ring, A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
597 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(stile) |
598 A6XX_SP_PS_2D_SRC_INFO_COLOR_SWAP(sswap) |
599 A6XX_SP_PS_2D_SRC_INFO_SAMPLES(samples) |
600 COND(samples > MSAA_ONE && !sample_0,
601 A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
602 COND(subwc_enabled, A6XX_SP_PS_2D_SRC_INFO_FLAGS) |
603 COND(util_format_is_srgb(info->src.format),
604 A6XX_SP_PS_2D_SRC_INFO_SRGB) |
605 0x500000 | filter);
606 OUT_RING(ring,
607 A6XX_SP_PS_2D_SRC_SIZE_WIDTH(width) |
608 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(height)); /* SP_PS_2D_SRC_SIZE */
609 OUT_RELOC(ring, src->bo, soff, 0, 0); /* SP_PS_2D_SRC_LO/HI */
610 OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(pitch));
611
612 OUT_RING(ring, 0x00000000);
613 OUT_RING(ring, 0x00000000);
614 OUT_RING(ring, 0x00000000);
615 OUT_RING(ring, 0x00000000);
616 OUT_RING(ring, 0x00000000);
617
618 if (subwc_enabled) {
619 OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_FLAGS, 6);
620 fd6_emit_flag_reference(ring, src, info->src.level, layer);
621 OUT_RING(ring, 0x00000000);
622 OUT_RING(ring, 0x00000000);
623 OUT_RING(ring, 0x00000000);
624 }
625 }
626
627 static void
emit_blit_texture(struct fd_context * ctx,struct fd_ringbuffer * ring,const struct pipe_blit_info * info,bool sample_0)628 emit_blit_texture(struct fd_context *ctx, struct fd_ringbuffer *ring,
629 const struct pipe_blit_info *info, bool sample_0)
630 {
631 const struct pipe_box *sbox = &info->src.box;
632 const struct pipe_box *dbox = &info->dst.box;
633 struct fd_resource *dst;
634 int sx1, sy1, sx2, sy2;
635 int dx1, dy1, dx2, dy2;
636
637 if (DEBUG_BLIT) {
638 fprintf(stderr, "texture blit: ");
639 dump_blit_info(info);
640 }
641
642 dst = fd_resource(info->dst.resource);
643
644 uint32_t nr_samples = fd_resource_nr_samples(&dst->b.b);
645
646 sx1 = sbox->x * nr_samples;
647 sy1 = sbox->y;
648 sx2 = (sbox->x + sbox->width) * nr_samples - 1;
649 sy2 = sbox->y + sbox->height - 1;
650
651 OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
652 OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(sx1));
653 OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(sx2));
654 OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(sy1));
655 OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(sy2));
656
657 dx1 = dbox->x * nr_samples;
658 dy1 = dbox->y;
659 dx2 = (dbox->x + dbox->width) * nr_samples - 1;
660 dy2 = dbox->y + dbox->height - 1;
661
662 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
663 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(dx1) | A6XX_GRAS_2D_DST_TL_Y(dy1));
664 OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(dx2) | A6XX_GRAS_2D_DST_BR_Y(dy2));
665
666 if (info->scissor_enable) {
667 OUT_PKT4(ring, REG_A6XX_GRAS_2D_RESOLVE_CNTL_1, 2);
668 OUT_RING(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_X(info->scissor.minx) |
669 A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.miny));
670 OUT_RING(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1_X(info->scissor.maxx - 1) |
671 A6XX_GRAS_2D_RESOLVE_CNTL_1_Y(info->scissor.maxy - 1));
672 }
673
674 emit_blit_setup(ring, info->dst.format, info->scissor_enable, NULL, 0);
675
676 for (unsigned i = 0; i < info->dst.box.depth; i++) {
677
678 emit_blit_src(ring, info, sbox->z + i, nr_samples, sample_0);
679 emit_blit_dst(ring, info->dst.resource, info->dst.format, info->dst.level,
680 dbox->z + i);
681
682 /*
683 * Blit command:
684 */
685 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
686 OUT_RING(ring, 0x3f);
687 OUT_WFI5(ring);
688
689 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
690 OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_UNKNOWN_8E04_blit);
691
692 OUT_PKT7(ring, CP_BLIT, 1);
693 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
694
695 OUT_WFI5(ring);
696
697 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
698 OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
699 }
700 }
701
702 static void
emit_clear_color(struct fd_ringbuffer * ring,enum pipe_format pfmt,union pipe_color_union * color)703 emit_clear_color(struct fd_ringbuffer *ring, enum pipe_format pfmt,
704 union pipe_color_union *color)
705 {
706 switch (pfmt) {
707 case PIPE_FORMAT_Z24X8_UNORM:
708 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
709 case PIPE_FORMAT_X24S8_UINT: {
710 uint32_t depth_unorm24 = color->f[0] * ((1u << 24) - 1);
711 uint8_t stencil = color->ui[1];
712 color->ui[0] = depth_unorm24 & 0xff;
713 color->ui[1] = (depth_unorm24 >> 8) & 0xff;
714 color->ui[2] = (depth_unorm24 >> 16) & 0xff;
715 color->ui[3] = stencil;
716 break;
717 }
718 default:
719 break;
720 }
721
722 OUT_PKT4(ring, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
723 switch (fd6_ifmt(fd6_color_format(pfmt, TILE6_LINEAR))) {
724 case R2D_UNORM8:
725 case R2D_UNORM8_SRGB:
726 /* The r2d ifmt is badly named, it also covers the signed case: */
727 if (util_format_is_snorm(pfmt)) {
728 OUT_RING(ring, float_to_byte_tex(color->f[0]));
729 OUT_RING(ring, float_to_byte_tex(color->f[1]));
730 OUT_RING(ring, float_to_byte_tex(color->f[2]));
731 OUT_RING(ring, float_to_byte_tex(color->f[3]));
732 } else {
733 OUT_RING(ring, float_to_ubyte(color->f[0]));
734 OUT_RING(ring, float_to_ubyte(color->f[1]));
735 OUT_RING(ring, float_to_ubyte(color->f[2]));
736 OUT_RING(ring, float_to_ubyte(color->f[3]));
737 }
738 break;
739 case R2D_FLOAT16:
740 OUT_RING(ring, _mesa_float_to_half(color->f[0]));
741 OUT_RING(ring, _mesa_float_to_half(color->f[1]));
742 OUT_RING(ring, _mesa_float_to_half(color->f[2]));
743 OUT_RING(ring, _mesa_float_to_half(color->f[3]));
744 break;
745 case R2D_FLOAT32:
746 case R2D_INT32:
747 case R2D_INT16:
748 case R2D_INT8:
749 default:
750 OUT_RING(ring, color->ui[0]);
751 OUT_RING(ring, color->ui[1]);
752 OUT_RING(ring, color->ui[2]);
753 OUT_RING(ring, color->ui[3]);
754 break;
755 }
756 }
757
758 /**
759 * Handle conversion of clear color
760 */
761 static union pipe_color_union
convert_color(enum pipe_format format,union pipe_color_union * pcolor)762 convert_color(enum pipe_format format, union pipe_color_union *pcolor)
763 {
764 union pipe_color_union color = *pcolor;
765
766 /* For solid-fill blits, the hw isn't going to convert from
767 * linear to srgb for us:
768 */
769 if (util_format_is_srgb(format)) {
770 for (int i = 0; i < 3; i++)
771 color.f[i] = util_format_linear_to_srgb_float(color.f[i]);
772 }
773
774 if (util_format_is_snorm(format)) {
775 for (int i = 0; i < 3; i++)
776 color.f[i] = CLAMP(color.f[i], -1.0f, 1.0f);
777 }
778
779 /* Note that float_to_ubyte() already clamps, for the unorm case */
780
781 return color;
782 }
783
784 void
fd6_clear_surface(struct fd_context * ctx,struct fd_ringbuffer * ring,struct pipe_surface * psurf,uint32_t width,uint32_t height,union pipe_color_union * color,uint32_t unknown_8c01)785 fd6_clear_surface(struct fd_context *ctx, struct fd_ringbuffer *ring,
786 struct pipe_surface *psurf, uint32_t width, uint32_t height,
787 union pipe_color_union *color, uint32_t unknown_8c01)
788 {
789 if (DEBUG_BLIT) {
790 fprintf(stderr, "surface clear:\ndst resource: ");
791 util_dump_resource(stderr, psurf->texture);
792 fprintf(stderr, "\n");
793 }
794
795 uint32_t nr_samples = fd_resource_nr_samples(psurf->texture);
796 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
797 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
798 OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(width * nr_samples - 1) |
799 A6XX_GRAS_2D_DST_BR_Y(height - 1));
800
801 union pipe_color_union clear_color = convert_color(psurf->format, color);
802
803 emit_clear_color(ring, psurf->format, &clear_color);
804 emit_blit_setup(ring, psurf->format, false, &clear_color, unknown_8c01);
805
806 for (unsigned i = psurf->u.tex.first_layer; i <= psurf->u.tex.last_layer;
807 i++) {
808 emit_blit_dst(ring, psurf->texture, psurf->format, psurf->u.tex.level, i);
809
810 /*
811 * Blit command:
812 */
813 OUT_PKT7(ring, CP_EVENT_WRITE, 1);
814 OUT_RING(ring, 0x3f);
815 OUT_WFI5(ring);
816
817 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
818 OUT_RING(ring, ctx->screen->info->a6xx.magic.RB_UNKNOWN_8E04_blit);
819
820 OUT_PKT7(ring, CP_BLIT, 1);
821 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
822
823 OUT_WFI5(ring);
824
825 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_8E04, 1);
826 OUT_RING(ring, 0); /* RB_UNKNOWN_8E04 */
827 }
828 }
829
830 void
fd6_resolve_tile(struct fd_batch * batch,struct fd_ringbuffer * ring,uint32_t base,struct pipe_surface * psurf,uint32_t unknown_8c01)831 fd6_resolve_tile(struct fd_batch *batch, struct fd_ringbuffer *ring,
832 uint32_t base, struct pipe_surface *psurf, uint32_t unknown_8c01)
833 {
834 const struct fd_gmem_stateobj *gmem = batch->gmem_state;
835 uint64_t gmem_base = batch->ctx->screen->gmem_base + base;
836 uint32_t gmem_pitch = gmem->bin_w * batch->framebuffer.samples *
837 util_format_get_blocksize(psurf->format);
838
839 OUT_PKT4(ring, REG_A6XX_GRAS_2D_DST_TL, 2);
840 OUT_RING(ring, A6XX_GRAS_2D_DST_TL_X(0) | A6XX_GRAS_2D_DST_TL_Y(0));
841 OUT_RING(ring, A6XX_GRAS_2D_DST_BR_X(psurf->width - 1) |
842 A6XX_GRAS_2D_DST_BR_Y(psurf->height - 1));
843
844 OUT_PKT4(ring, REG_A6XX_GRAS_2D_SRC_TL_X, 4);
845 OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_X(0));
846 OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_X(psurf->width - 1));
847 OUT_RING(ring, A6XX_GRAS_2D_SRC_TL_Y(0));
848 OUT_RING(ring, A6XX_GRAS_2D_SRC_BR_Y(psurf->height - 1));
849
850 /* Enable scissor bit, which will take into account the window scissor
851 * which is set per-tile
852 */
853 emit_blit_setup(ring, psurf->format, true, NULL, unknown_8c01);
854
855 /* We shouldn't be using GMEM in the layered rendering case: */
856 assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
857
858 emit_blit_dst(ring, psurf->texture, psurf->format, psurf->u.tex.level,
859 psurf->u.tex.first_layer);
860
861 enum a6xx_format sfmt = fd6_color_format(psurf->format, TILE6_LINEAR);
862 enum a3xx_msaa_samples samples = fd_msaa_samples(batch->framebuffer.samples);
863
864 OUT_PKT4(ring, REG_A6XX_SP_PS_2D_SRC_INFO, 10);
865 OUT_RING(ring,
866 A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |
867 A6XX_SP_PS_2D_SRC_INFO_TILE_MODE(TILE6_2) |
868 A6XX_SP_PS_2D_SRC_INFO_SAMPLES(samples) |
869 COND(samples > MSAA_ONE, A6XX_SP_PS_2D_SRC_INFO_SAMPLES_AVERAGE) |
870 COND(util_format_is_srgb(psurf->format), A6XX_SP_PS_2D_SRC_INFO_SRGB) |
871 A6XX_SP_PS_2D_SRC_INFO_UNK20 | A6XX_SP_PS_2D_SRC_INFO_UNK22);
872 OUT_RING(ring, A6XX_SP_PS_2D_SRC_SIZE_WIDTH(psurf->width) |
873 A6XX_SP_PS_2D_SRC_SIZE_HEIGHT(psurf->height));
874 OUT_RING(ring, gmem_base); /* SP_PS_2D_SRC_LO */
875 OUT_RING(ring, gmem_base >> 32); /* SP_PS_2D_SRC_HI */
876 OUT_RING(ring, A6XX_SP_PS_2D_SRC_PITCH_PITCH(gmem_pitch));
877 OUT_RING(ring, 0x00000000);
878 OUT_RING(ring, 0x00000000);
879 OUT_RING(ring, 0x00000000);
880 OUT_RING(ring, 0x00000000);
881 OUT_RING(ring, 0x00000000);
882
883 /* sync GMEM writes with CACHE. */
884 fd6_cache_inv(batch, ring);
885
886 /* Wait for CACHE_INVALIDATE to land */
887 fd_wfi(batch, ring);
888
889 OUT_PKT7(ring, CP_BLIT, 1);
890 OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_SCALE));
891
892 OUT_WFI5(ring);
893
894 /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
895 * sysmem, and we generally assume that GMEM renderpasses leave their
896 * results in sysmem, so we need to flush manually here.
897 */
898 fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true);
899 fd_wfi(batch, ring);
900 }
901
902 static bool
handle_rgba_blit(struct fd_context * ctx,const struct pipe_blit_info * info,bool sample_0)903 handle_rgba_blit(struct fd_context *ctx,
904 const struct pipe_blit_info *info, bool sample_0) assert_dt
905 {
906 struct fd_batch *batch;
907
908 debug_assert(!(info->mask & PIPE_MASK_ZS));
909
910 if (!can_do_blit(info))
911 return false;
912
913 struct fd_resource *src = fd_resource(info->src.resource);
914 struct fd_resource *dst = fd_resource(info->dst.resource);
915
916 fd6_validate_format(ctx, src, info->src.format);
917 fd6_validate_format(ctx, dst, info->dst.format);
918
919 batch = fd_bc_alloc_batch(ctx, true);
920
921 fd_screen_lock(ctx->screen);
922
923 fd_batch_resource_read(batch, src);
924 fd_batch_resource_write(batch, dst);
925
926 fd_screen_unlock(ctx->screen);
927
928 ASSERTED bool ret = fd_batch_lock_submit(batch);
929 assert(ret);
930
931 /* Marking the batch as needing flush must come after the batch
932 * dependency tracking (resource_read()/resource_write()), as that
933 * can trigger a flush
934 */
935 fd_batch_needs_flush(batch);
936
937 fd_batch_update_queries(batch);
938
939 emit_setup(batch);
940
941 DBG_BLIT(info, batch);
942
943 trace_start_blit(&batch->trace, batch->draw, info->src.resource->target,
944 info->dst.resource->target);
945
946 if ((info->src.resource->target == PIPE_BUFFER) &&
947 (info->dst.resource->target == PIPE_BUFFER)) {
948 assert(src->layout.tile_mode == TILE6_LINEAR);
949 assert(dst->layout.tile_mode == TILE6_LINEAR);
950 emit_blit_buffer(ctx, batch->draw, info);
951 } else {
952 /* I don't *think* we need to handle blits between buffer <-> !buffer */
953 debug_assert(info->src.resource->target != PIPE_BUFFER);
954 debug_assert(info->dst.resource->target != PIPE_BUFFER);
955 emit_blit_texture(ctx, batch->draw, info, sample_0);
956 }
957
958 trace_end_blit(&batch->trace, batch->draw);
959
960 fd6_event_write(batch, batch->draw, PC_CCU_FLUSH_COLOR_TS, true);
961 fd6_event_write(batch, batch->draw, PC_CCU_FLUSH_DEPTH_TS, true);
962 fd6_event_write(batch, batch->draw, CACHE_FLUSH_TS, true);
963 fd_wfi(batch, batch->draw);
964 fd6_cache_inv(batch, batch->draw);
965
966 fd_batch_unlock_submit(batch);
967
968 fd_batch_flush(batch);
969 fd_batch_reference(&batch, NULL);
970
971 /* Acc query state will have been dirtied by our fd_batch_update_queries, so
972 * the ctx->batch may need to turn its queries back on.
973 */
974 ctx->update_active_queries = true;
975
976 return true;
977 }
978
979 /**
980 * Re-written z/s blits can still fail for various reasons (for example MSAA).
981 * But we want to do the fallback blit with the re-written pipe_blit_info,
982 * in particular as u_blitter cannot blit stencil. So handle the fallback
983 * ourself and never "fail".
984 */
985 static bool
do_rewritten_blit(struct fd_context * ctx,const struct pipe_blit_info * info,bool sample_0)986 do_rewritten_blit(struct fd_context *ctx,
987 const struct pipe_blit_info *info, bool sample_0) assert_dt
988 {
989 bool success = handle_rgba_blit(ctx, info, sample_0);
990 if (!success) {
991 if (sample_0 && !util_format_is_pure_integer(info->src.format))
992 mesa_logw("sample averaging on fallback blit when we shouldn't.");
993 success = fd_blitter_blit(ctx, info);
994 }
995 debug_assert(success); /* fallback should never fail! */
996 return success;
997 }
998
999 /**
1000 * Handle depth/stencil blits either via u_blitter and/or re-writing the
1001 * blit into an equivilant format that we can handle
1002 */
1003 static bool
handle_zs_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1004 handle_zs_blit(struct fd_context *ctx,
1005 const struct pipe_blit_info *info) assert_dt
1006 {
1007 struct pipe_blit_info blit = *info;
1008
1009 if (DEBUG_BLIT) {
1010 fprintf(stderr, "---- handle_zs_blit: ");
1011 dump_blit_info(info);
1012 }
1013
1014 if (info->src.format != info->dst.format)
1015 return false;
1016
1017 struct fd_resource *src = fd_resource(info->src.resource);
1018 struct fd_resource *dst = fd_resource(info->dst.resource);
1019
1020 switch (info->dst.format) {
1021 case PIPE_FORMAT_S8_UINT:
1022 debug_assert(info->mask == PIPE_MASK_S);
1023 blit.mask = PIPE_MASK_R;
1024 blit.src.format = PIPE_FORMAT_R8_UINT;
1025 blit.dst.format = PIPE_FORMAT_R8_UINT;
1026 return do_rewritten_blit(ctx, &blit, true);
1027
1028 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1029 if (info->mask & PIPE_MASK_Z) {
1030 blit.mask = PIPE_MASK_R;
1031 blit.src.format = PIPE_FORMAT_R32_FLOAT;
1032 blit.dst.format = PIPE_FORMAT_R32_FLOAT;
1033 do_rewritten_blit(ctx, &blit, true);
1034 }
1035
1036 if (info->mask & PIPE_MASK_S) {
1037 blit.mask = PIPE_MASK_R;
1038 blit.src.format = PIPE_FORMAT_R8_UINT;
1039 blit.dst.format = PIPE_FORMAT_R8_UINT;
1040 blit.src.resource = &src->stencil->b.b;
1041 blit.dst.resource = &dst->stencil->b.b;
1042 do_rewritten_blit(ctx, &blit, true);
1043 }
1044
1045 return true;
1046
1047 case PIPE_FORMAT_Z16_UNORM:
1048 blit.mask = PIPE_MASK_R;
1049 blit.src.format = PIPE_FORMAT_R16_UNORM;
1050 blit.dst.format = PIPE_FORMAT_R16_UNORM;
1051 return do_rewritten_blit(ctx, &blit, true);
1052
1053 case PIPE_FORMAT_Z32_UNORM:
1054 case PIPE_FORMAT_Z32_FLOAT:
1055 debug_assert(info->mask == PIPE_MASK_Z);
1056 blit.mask = PIPE_MASK_R;
1057 blit.src.format = PIPE_FORMAT_R32_UINT;
1058 blit.dst.format = PIPE_FORMAT_R32_UINT;
1059 return do_rewritten_blit(ctx, &blit, true);
1060
1061 case PIPE_FORMAT_Z24X8_UNORM:
1062 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1063 blit.mask = 0;
1064 if (info->mask & PIPE_MASK_Z)
1065 blit.mask |= PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B;
1066 if (info->mask & PIPE_MASK_S)
1067 blit.mask |= PIPE_MASK_A;
1068 blit.src.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
1069 blit.dst.format = PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
1070 /* non-UBWC Z24_UNORM_S8_UINT_AS_R8G8B8A8 is broken on a630, fall back to
1071 * 8888_unorm.
1072 */
1073 if (!ctx->screen->info->a6xx.has_z24uint_s8uint) {
1074 if (!src->layout.ubwc && !dst->layout.ubwc) {
1075 blit.src.format = PIPE_FORMAT_RGBA8888_UINT;
1076 blit.dst.format = PIPE_FORMAT_RGBA8888_UINT;
1077 } else {
1078 if (!src->layout.ubwc)
1079 blit.src.format = PIPE_FORMAT_RGBA8888_UNORM;
1080 if (!dst->layout.ubwc)
1081 blit.dst.format = PIPE_FORMAT_RGBA8888_UNORM;
1082 }
1083 }
1084 if (info->src.resource->nr_samples > 1 && blit.src.format != PIPE_FORMAT_RGBA8888_UINT)
1085 mesa_logw("sample averaging on fallback z24s8 blit when we shouldn't.");
1086 return fd_blitter_blit(ctx, &blit);
1087
1088 default:
1089 return false;
1090 }
1091 }
1092
1093 static bool
handle_compressed_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1094 handle_compressed_blit(struct fd_context *ctx,
1095 const struct pipe_blit_info *info) assert_dt
1096 {
1097 struct pipe_blit_info blit = *info;
1098
1099 if (DEBUG_BLIT) {
1100 fprintf(stderr, "---- handle_compressed_blit: ");
1101 dump_blit_info(info);
1102 }
1103
1104 if (info->src.format != info->dst.format)
1105 return fd_blitter_blit(ctx, info);
1106
1107 if (util_format_get_blocksize(info->src.format) == 8) {
1108 blit.src.format = blit.dst.format = PIPE_FORMAT_R16G16B16A16_UINT;
1109 } else {
1110 debug_assert(util_format_get_blocksize(info->src.format) == 16);
1111 blit.src.format = blit.dst.format = PIPE_FORMAT_R32G32B32A32_UINT;
1112 }
1113
1114 int bw = util_format_get_blockwidth(info->src.format);
1115 int bh = util_format_get_blockheight(info->src.format);
1116
1117 /* NOTE: x/y *must* be aligned to block boundary (ie. in
1118 * glCompressedTexSubImage2D()) but width/height may not
1119 * be:
1120 */
1121
1122 debug_assert((blit.src.box.x % bw) == 0);
1123 debug_assert((blit.src.box.y % bh) == 0);
1124
1125 blit.src.box.x /= bw;
1126 blit.src.box.y /= bh;
1127 blit.src.box.width = DIV_ROUND_UP(blit.src.box.width, bw);
1128 blit.src.box.height = DIV_ROUND_UP(blit.src.box.height, bh);
1129
1130 debug_assert((blit.dst.box.x % bw) == 0);
1131 debug_assert((blit.dst.box.y % bh) == 0);
1132
1133 blit.dst.box.x /= bw;
1134 blit.dst.box.y /= bh;
1135 blit.dst.box.width = DIV_ROUND_UP(blit.dst.box.width, bw);
1136 blit.dst.box.height = DIV_ROUND_UP(blit.dst.box.height, bh);
1137
1138 return do_rewritten_blit(ctx, &blit, false);
1139 }
1140
1141 /**
1142 * For SNORM formats, copy them as the equivalent UNORM format. If we treat
1143 * them as snorm then the 0x80 (-1.0 snorm8) value will get clamped to 0x81
1144 * (also -1.0), when we're supposed to be memcpying the bits. See
1145 * https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/2917 for discussion.
1146 */
1147 static bool
handle_snorm_copy_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1148 handle_snorm_copy_blit(struct fd_context *ctx,
1149 const struct pipe_blit_info *info)
1150 assert_dt
1151 {
1152 /* If we're interpolating the pixels, we can't just treat the values as unorm. */
1153 if (info->filter == PIPE_TEX_FILTER_LINEAR)
1154 return false;
1155
1156 struct pipe_blit_info blit = *info;
1157
1158 blit.src.format = blit.dst.format = util_format_snorm_to_unorm(info->src.format);
1159
1160 return do_rewritten_blit(ctx, &blit, false);
1161 }
1162
1163 static bool
fd6_blit(struct fd_context * ctx,const struct pipe_blit_info * info)1164 fd6_blit(struct fd_context *ctx, const struct pipe_blit_info *info) assert_dt
1165 {
1166 if (info->mask & PIPE_MASK_ZS)
1167 return handle_zs_blit(ctx, info);
1168
1169 if (util_format_is_compressed(info->src.format) ||
1170 util_format_is_compressed(info->dst.format))
1171 return handle_compressed_blit(ctx, info);
1172
1173 if ((info->src.format == info->dst.format) &&
1174 util_format_is_snorm(info->src.format))
1175 return handle_snorm_copy_blit(ctx, info);
1176
1177 return handle_rgba_blit(ctx, info, false);
1178 }
1179
1180 void
fd6_blitter_init(struct pipe_context * pctx)1181 fd6_blitter_init(struct pipe_context *pctx) disable_thread_safety_analysis
1182 {
1183 struct fd_context *ctx = fd_context(pctx);
1184
1185 ctx->clear_ubwc = fd6_clear_ubwc;
1186 ctx->validate_format = fd6_validate_format;
1187
1188 if (FD_DBG(NOBLIT))
1189 return;
1190
1191 ctx->blit = fd6_blit;
1192 }
1193
1194 unsigned
fd6_tile_mode(const struct pipe_resource * tmpl)1195 fd6_tile_mode(const struct pipe_resource *tmpl)
1196 {
1197 /* if the mipmap level 0 is still too small to be tiled, then don't
1198 * bother pretending:
1199 */
1200 if (fd_resource_level_linear(tmpl, 0))
1201 return TILE6_LINEAR;
1202
1203 /* basically just has to be a format we can blit, so uploads/downloads
1204 * via linear staging buffer works:
1205 */
1206 if (ok_format(tmpl->format))
1207 return TILE6_3;
1208
1209 return TILE6_LINEAR;
1210 }
1211