1 /*
2  * Copyright (C) 2020-2021 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *   Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  *   Boris Brezillon <boris.brezillon@collabora.com>
26  */
27 
28 #include <math.h>
29 #include <stdio.h>
30 #include "pan_blend.h"
31 #include "pan_blitter.h"
32 #include "pan_cs.h"
33 #include "pan_encoder.h"
34 #include "pan_pool.h"
35 #include "pan_shader.h"
36 #include "pan_scoreboard.h"
37 #include "pan_texture.h"
38 #include "panfrost-quirks.h"
39 #include "compiler/nir/nir_builder.h"
40 #include "util/u_math.h"
41 
42 #if PAN_ARCH >= 6
43 /* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
44  * missing in many cases. We instead use software paths as fallbacks to
45  * implement blits, which are done as TILER jobs. No vertex shader is
46  * necessary since we can supply screen-space coordinates directly.
47  *
48  * This is primarily designed as a fallback for preloads but could be extended
49  * for other clears/blits if needed in the future. */
50 
51 static enum mali_register_file_format
blit_type_to_reg_fmt(nir_alu_type in)52 blit_type_to_reg_fmt(nir_alu_type in)
53 {
54         switch (in) {
55         case nir_type_float32:
56                 return MALI_REGISTER_FILE_FORMAT_F32;
57         case nir_type_int32:
58                 return MALI_REGISTER_FILE_FORMAT_I32;
59         case nir_type_uint32:
60                 return MALI_REGISTER_FILE_FORMAT_U32;
61         default:
62                 unreachable("Invalid blit type");
63         }
64 }
65 #endif
66 
67 struct pan_blit_surface {
68         gl_frag_result loc : 4;
69         nir_alu_type type : 8;
70         enum mali_texture_dimension dim : 2;
71         bool array : 1;
72         unsigned src_samples: 5;
73         unsigned dst_samples: 5;
74 };
75 
76 struct pan_blit_shader_key {
77         struct pan_blit_surface surfaces[8];
78 };
79 
80 struct pan_blit_shader_data {
81         struct pan_blit_shader_key key;
82         mali_ptr address;
83         unsigned blend_ret_offsets[8];
84         nir_alu_type blend_types[8];
85 };
86 
87 struct pan_blit_blend_shader_key {
88         enum pipe_format format;
89         nir_alu_type type;
90         unsigned rt : 3;
91         unsigned nr_samples : 5;
92         unsigned pad : 24;
93 };
94 
95 struct pan_blit_blend_shader_data {
96         struct pan_blit_blend_shader_key key;
97         mali_ptr address;
98 };
99 
100 struct pan_blit_rsd_key {
101         struct {
102                 enum pipe_format format;
103                 nir_alu_type type : 8;
104                 unsigned src_samples : 5;
105                 unsigned dst_samples : 5;
106                 enum mali_texture_dimension dim : 2;
107                 bool array : 1;
108         } rts[8], z, s;
109 };
110 
111 struct pan_blit_rsd_data {
112         struct pan_blit_rsd_key key;
113         mali_ptr address;
114 };
115 
116 #if PAN_ARCH >= 5
117 static void
pan_blitter_emit_blend(const struct panfrost_device * dev,unsigned rt,const struct pan_image_view * iview,const struct pan_blit_shader_data * blit_shader,mali_ptr blend_shader,void * out)118 pan_blitter_emit_blend(const struct panfrost_device *dev,
119                        unsigned rt,
120                        const struct pan_image_view *iview,
121                        const struct pan_blit_shader_data *blit_shader,
122                        mali_ptr blend_shader,
123                        void *out)
124 {
125         pan_pack(out, BLEND, cfg) {
126                 if (!iview) {
127                         cfg.enable = false;
128 #if PAN_ARCH >= 6
129                         cfg.internal.mode = MALI_BLEND_MODE_OFF;
130 #endif
131                         continue;
132                 }
133 
134                 cfg.round_to_fb_precision = true;
135                 cfg.srgb = util_format_is_srgb(iview->format);
136 
137 #if PAN_ARCH >= 6
138                 cfg.internal.mode = blend_shader ?
139                                     MALI_BLEND_MODE_SHADER :
140                                     MALI_BLEND_MODE_OPAQUE;
141 #endif
142 
143                 if (!blend_shader) {
144                         cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
145                         cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
146                         cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
147                         cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
148                         cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
149                         cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
150                         cfg.equation.color_mask = 0xf;
151 
152 #if PAN_ARCH >= 6
153                         nir_alu_type type = blit_shader->key.surfaces[rt].type;
154 
155                         cfg.internal.fixed_function.num_comps = 4;
156                         cfg.internal.fixed_function.conversion.memory_format =
157                                 panfrost_format_to_bifrost_blend(dev, iview->format, false);
158                         cfg.internal.fixed_function.conversion.register_format =
159                                 blit_type_to_reg_fmt(type);
160 
161                         cfg.internal.fixed_function.rt = rt;
162 #endif
163                 } else {
164 #if PAN_ARCH >= 6
165                         cfg.internal.shader.pc = blend_shader;
166                         if (blit_shader->blend_ret_offsets[rt]) {
167                                 cfg.internal.shader.return_value =
168                                         blit_shader->address +
169                                         blit_shader->blend_ret_offsets[rt];
170                         }
171 #else
172                         cfg.blend_shader = true;
173                         cfg.shader_pc = blend_shader;
174 #endif
175                 }
176         }
177 }
178 #endif
179 
180 static void
pan_blitter_emit_rsd(const struct panfrost_device * dev,const struct pan_blit_shader_data * blit_shader,unsigned rt_count,const struct pan_image_view ** rts,mali_ptr * blend_shaders,const struct pan_image_view * z,const struct pan_image_view * s,void * out)181 pan_blitter_emit_rsd(const struct panfrost_device *dev,
182                      const struct pan_blit_shader_data *blit_shader,
183                      unsigned rt_count,
184                      const struct pan_image_view **rts,
185                      mali_ptr *blend_shaders,
186                      const struct pan_image_view *z,
187                      const struct pan_image_view *s,
188                      void *out)
189 {
190         unsigned tex_count = 0;
191         bool zs = (z || s);
192         bool ms = false;
193 
194         for (unsigned i = 0; i < rt_count; i++) {
195                 if (rts[i]) {
196                         tex_count++;
197                         if (rts[i]->nr_samples > 1)
198                                 ms = true;
199                 }
200         }
201 
202         if (z) {
203                 if (z->image->layout.nr_samples > 1)
204                         ms = true;
205                 tex_count++;
206         }
207 
208         if (s) {
209                 if (s->image->layout.nr_samples > 1)
210                         ms = true;
211                 tex_count++;
212         }
213 
214         pan_pack(out, RENDERER_STATE, cfg) {
215                 assert(blit_shader->address);
216                 cfg.shader.shader = blit_shader->address;
217                 cfg.shader.varying_count = 1;
218                 cfg.shader.texture_count = tex_count;
219                 cfg.shader.sampler_count = 1;
220 
221                 cfg.properties.stencil_from_shader = s != NULL;
222                 cfg.properties.depth_source =
223                         z ?
224                         MALI_DEPTH_SOURCE_SHADER :
225                         MALI_DEPTH_SOURCE_FIXED_FUNCTION;
226 
227                 cfg.multisample_misc.sample_mask = 0xFFFF;
228                 cfg.multisample_misc.multisample_enable = ms;
229                 cfg.multisample_misc.evaluate_per_sample = ms;
230                 cfg.multisample_misc.depth_write_mask = z != NULL;
231                 cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
232 
233                 cfg.stencil_mask_misc.stencil_enable = s != NULL;
234                 cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
235                 cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
236                 cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
237                 cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
238                 cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
239                 cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
240                 cfg.stencil_front.mask = 0xFF;
241                 cfg.stencil_back = cfg.stencil_front;
242 
243 #if PAN_ARCH >= 6
244                 if (zs) {
245                         cfg.properties.zs_update_operation =
246                                 MALI_PIXEL_KILL_FORCE_LATE;
247                         cfg.properties.pixel_kill_operation =
248                                 MALI_PIXEL_KILL_FORCE_LATE;
249                 } else {
250                         cfg.properties.zs_update_operation =
251                                 MALI_PIXEL_KILL_STRONG_EARLY;
252                         cfg.properties.pixel_kill_operation =
253                                 MALI_PIXEL_KILL_FORCE_EARLY;
254                 }
255 
256                 /* We can only allow blit shader fragments to kill if they write all
257                  * colour outputs. This is true for our colour (non-Z/S) blit shaders,
258                  * but obviously not true for Z/S shaders. However, blit shaders
259                  * otherwise lack side effects, so other fragments may kill them.
260                  * However, while shaders writing Z/S can normally be killed, on v6
261                  * for frame shaders it can cause GPU timeouts, so only allow colour
262                  * blit shaders to be killed. */
263 
264                 cfg.properties.allow_forward_pixel_to_kill = !zs;
265                 cfg.properties.allow_forward_pixel_to_be_killed = (dev->arch >= 7) || !zs;
266 
267                 cfg.preload.fragment.coverage = true;
268                 cfg.preload.fragment.sample_mask_id = ms;
269 #else
270                 mali_ptr blend_shader = blend_shaders ?
271                         panfrost_last_nonnull(blend_shaders, rt_count) : 0;
272 
273                 cfg.properties.work_register_count = 4;
274                 cfg.properties.force_early_z = !zs;
275                 cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
276 
277                 /* Set even on v5 for erratum workaround */
278 #if PAN_ARCH == 5
279                 cfg.legacy_blend_shader = blend_shader;
280 #else
281                 cfg.blend_shader = blend_shader;
282                 cfg.stencil_mask_misc.write_enable = true;
283                 cfg.stencil_mask_misc.dither_disable = true;
284                 cfg.multisample_misc.blend_shader = !!blend_shader;
285                 cfg.blend_shader = blend_shader;
286                 if (!cfg.multisample_misc.blend_shader) {
287                         cfg.blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
288                         cfg.blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
289                         cfg.blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
290                         cfg.blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
291                         cfg.blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
292                         cfg.blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
293                         cfg.blend_constant = 0;
294 
295                         if (rts && rts[0]) {
296                                 cfg.stencil_mask_misc.srgb =
297                                         util_format_is_srgb(rts[0]->format);
298                                 cfg.blend_equation.color_mask = 0xf;
299                         }
300                }
301 #endif
302 #endif
303         }
304 
305 #if PAN_ARCH >= 5
306         for (unsigned i = 0; i < MAX2(rt_count, 1); ++i) {
307                 void *dest = out + pan_size(RENDERER_STATE) + pan_size(BLEND) * i;
308                 const struct pan_image_view *rt_view = rts ? rts[i] : NULL;
309                 mali_ptr blend_shader = blend_shaders ? blend_shaders[i] : 0;
310 
311                 pan_blitter_emit_blend(dev, i, rt_view, blit_shader,
312                                        blend_shader, dest);
313         }
314 #endif
315 }
316 
317 static void
pan_blitter_get_blend_shaders(struct panfrost_device * dev,unsigned rt_count,const struct pan_image_view ** rts,const struct pan_blit_shader_data * blit_shader,mali_ptr * blend_shaders)318 pan_blitter_get_blend_shaders(struct panfrost_device *dev,
319                               unsigned rt_count,
320                               const struct pan_image_view **rts,
321                               const struct pan_blit_shader_data *blit_shader,
322                               mali_ptr *blend_shaders)
323 {
324         if (!rt_count)
325                 return;
326 
327         struct pan_blend_state blend_state = {
328                 .rt_count = rt_count,
329         };
330 
331         for (unsigned i = 0; i < rt_count; i++) {
332                 if (!rts[i] || panfrost_blendable_formats_v7[rts[i]->format].internal)
333                         continue;
334 
335                 struct pan_blit_blend_shader_key key = {
336                         .format = rts[i]->format,
337                         .rt = i,
338                         .nr_samples = rts[i]->image->layout.nr_samples,
339                         .type = blit_shader->blend_types[i],
340                 };
341 
342                 pthread_mutex_lock(&dev->blitter.shaders.lock);
343                 struct hash_entry *he =
344                         _mesa_hash_table_search(dev->blitter.shaders.blend, &key);
345                 struct pan_blit_blend_shader_data *blend_shader = he ? he->data : NULL;
346                 if (blend_shader) {
347                          blend_shaders[i] = blend_shader->address;
348                          pthread_mutex_unlock(&dev->blitter.shaders.lock);
349                          continue;
350                 }
351 
352                 blend_shader = rzalloc(dev->blitter.shaders.blend,
353                                        struct pan_blit_blend_shader_data);
354                 blend_shader->key = key;
355 
356                 blend_state.rts[i] = (struct pan_blend_rt_state) {
357                         .format = rts[i]->format,
358                         .nr_samples = rts[i]->image->layout.nr_samples,
359                         .equation = {
360                                 .blend_enable = true,
361                                 .rgb_src_factor = BLEND_FACTOR_ZERO,
362                                 .rgb_invert_src_factor = true,
363                                 .rgb_dst_factor = BLEND_FACTOR_ZERO,
364                                 .rgb_func = BLEND_FUNC_ADD,
365                                 .alpha_src_factor = BLEND_FACTOR_ZERO,
366                                 .alpha_invert_src_factor = true,
367                                 .alpha_dst_factor = BLEND_FACTOR_ZERO,
368                                 .alpha_func = BLEND_FUNC_ADD,
369                                 .color_mask = 0xf,
370                         },
371                 };
372 
373                 pthread_mutex_lock(&dev->blend_shaders.lock);
374                 struct pan_blend_shader_variant *b =
375                         GENX(pan_blend_get_shader_locked)(dev, &blend_state,
376                                                           blit_shader->blend_types[i],
377                                                           nir_type_float32, /* unused */
378                                                           i);
379 
380                 ASSERTED unsigned full_threads =
381                         (dev->arch >= 7) ? 32 : ((dev->arch == 6) ? 64 : 4);
382                 assert(b->work_reg_count <= full_threads);
383                 struct panfrost_ptr bin =
384                         pan_pool_alloc_aligned(dev->blitter.shaders.pool,
385                                                b->binary.size,
386                                                PAN_ARCH >= 6 ? 128 : 64);
387                 memcpy(bin.cpu, b->binary.data, b->binary.size);
388 
389                 blend_shader->address = bin.gpu | b->first_tag;
390                 pthread_mutex_unlock(&dev->blend_shaders.lock);
391                 _mesa_hash_table_insert(dev->blitter.shaders.blend,
392                                         &blend_shader->key, blend_shader);
393                 pthread_mutex_unlock(&dev->blitter.shaders.lock);
394                 blend_shaders[i] = blend_shader->address;
395         }
396 }
397 
398 static const struct pan_blit_shader_data *
pan_blitter_get_blit_shader(struct panfrost_device * dev,const struct pan_blit_shader_key * key)399 pan_blitter_get_blit_shader(struct panfrost_device *dev,
400                             const struct pan_blit_shader_key *key)
401 {
402         pthread_mutex_lock(&dev->blitter.shaders.lock);
403         struct hash_entry *he = _mesa_hash_table_search(dev->blitter.shaders.blit, key);
404         struct pan_blit_shader_data *shader = he ? he->data : NULL;
405 
406         if (shader)
407                 goto out;
408 
409         unsigned coord_comps = 0;
410         unsigned sig_offset = 0;
411         char sig[256];
412         bool first = true;
413         for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
414                 const char *type_str, *dim_str;
415                 if (key->surfaces[i].type == nir_type_invalid)
416                         continue;
417 
418                 switch (key->surfaces[i].type) {
419                 case nir_type_float32: type_str = "float"; break;
420                 case nir_type_uint32: type_str = "uint"; break;
421                 case nir_type_int32: type_str = "int"; break;
422                 default: unreachable("Invalid type\n");
423                 }
424 
425                 switch (key->surfaces[i].dim) {
426                 case MALI_TEXTURE_DIMENSION_CUBE: dim_str = "cube"; break;
427                 case MALI_TEXTURE_DIMENSION_1D: dim_str = "1D"; break;
428                 case MALI_TEXTURE_DIMENSION_2D: dim_str = "2D"; break;
429                 case MALI_TEXTURE_DIMENSION_3D: dim_str = "3D"; break;
430                 default: unreachable("Invalid dim\n");
431                 }
432 
433                 coord_comps = MAX2(coord_comps,
434                                    (key->surfaces[i].dim ? : 3) +
435                                    (key->surfaces[i].array ? 1 : 0));
436                 first = false;
437 
438                 if (sig_offset >= sizeof(sig))
439                         continue;
440 
441                 sig_offset += snprintf(sig + sig_offset, sizeof(sig) - sig_offset,
442                                        "%s[%s;%s;%s%s;src_samples=%d,dst_samples=%d]",
443                                        first ? "" : ",",
444                                        gl_frag_result_name(key->surfaces[i].loc),
445                                        type_str, dim_str,
446                                        key->surfaces[i].array ? "[]" : "",
447                                        key->surfaces[i].src_samples,
448                                        key->surfaces[i].dst_samples);
449         }
450 
451         nir_builder b =
452                 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
453                                                GENX(pan_shader_get_compiler_options)(),
454                                                "pan_blit(%s)", sig);
455         b.shader->info.internal = true;
456 
457         nir_variable *coord_var =
458                 nir_variable_create(b.shader, nir_var_shader_in,
459                                     glsl_vector_type(GLSL_TYPE_FLOAT, coord_comps),
460                                     "coord");
461         coord_var->data.location = VARYING_SLOT_TEX0;
462 
463         nir_ssa_def *coord = nir_load_var(&b, coord_var);
464 
465         unsigned active_count = 0;
466         for (unsigned i = 0; i < ARRAY_SIZE(key->surfaces); i++) {
467                 if (key->surfaces[i].type == nir_type_invalid)
468                         continue;
469 
470                 /* Resolve operations only work for N -> 1 samples. */
471                 assert(key->surfaces[i].dst_samples == 1 ||
472                        key->surfaces[i].src_samples == key->surfaces[i].dst_samples);
473 
474                 static const char *out_names[] = {
475                         "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7",
476                 };
477 
478                 unsigned ncomps = key->surfaces[i].loc >= FRAG_RESULT_DATA0 ? 4 : 1;
479                 nir_variable *out =
480                         nir_variable_create(b.shader, nir_var_shader_out,
481                                             glsl_vector_type(GLSL_TYPE_FLOAT, ncomps),
482                                             out_names[active_count]);
483                 out->data.location = key->surfaces[i].loc;
484                 out->data.driver_location = active_count;
485 
486                 bool resolve = key->surfaces[i].src_samples > key->surfaces[i].dst_samples;
487                 bool ms = key->surfaces[i].src_samples > 1;
488                 enum glsl_sampler_dim sampler_dim;
489 
490                 switch (key->surfaces[i].dim) {
491                 case MALI_TEXTURE_DIMENSION_1D:
492                         sampler_dim = GLSL_SAMPLER_DIM_1D;
493                         break;
494                 case MALI_TEXTURE_DIMENSION_2D:
495                         sampler_dim = ms ?
496                                       GLSL_SAMPLER_DIM_MS :
497                                       GLSL_SAMPLER_DIM_2D;
498                         break;
499                 case MALI_TEXTURE_DIMENSION_3D:
500                         sampler_dim = GLSL_SAMPLER_DIM_3D;
501                         break;
502                 case MALI_TEXTURE_DIMENSION_CUBE:
503                         sampler_dim = GLSL_SAMPLER_DIM_CUBE;
504                         break;
505                 }
506 
507                 nir_ssa_def *res = NULL;
508 
509                 if (resolve) {
510                         /* When resolving a float type, we need to calculate
511                          * the average of all samples. For integer resolve, GL
512                          * and Vulkan say that one sample should be chosen
513                          * without telling which. Let's just pick the first one
514                          * in that case.
515                          */
516                         nir_alu_type base_type =
517                                 nir_alu_type_get_base_type(key->surfaces[i].type);
518                         unsigned nsamples = base_type == nir_type_float ?
519                                             key->surfaces[i].src_samples : 1;
520 
521                         for (unsigned s = 0; s < nsamples; s++) {
522                                 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
523 
524                                 tex->op = nir_texop_txf_ms;
525                                 tex->dest_type = key->surfaces[i].type;
526                                 tex->texture_index = active_count;
527                                 tex->is_array = key->surfaces[i].array;
528                                 tex->sampler_dim = sampler_dim;
529 
530                                 tex->src[0].src_type = nir_tex_src_coord;
531                                 tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
532                                 tex->coord_components = coord_comps;
533 
534                                 tex->src[1].src_type = nir_tex_src_ms_index;
535                                 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, s));
536 
537                                 tex->src[2].src_type = nir_tex_src_lod;
538                                 tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
539                                 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
540                                 nir_builder_instr_insert(&b, &tex->instr);
541 
542                                 res = res ? nir_fadd(&b, res, &tex->dest.ssa) : &tex->dest.ssa;
543 			}
544 
545                         if (base_type == nir_type_float) {
546                                 unsigned type_sz =
547                                         nir_alu_type_get_type_size(key->surfaces[i].type);
548                                 res = nir_fmul(&b, res,
549                                                nir_imm_floatN_t(&b, 1.0f / nsamples, type_sz));
550                         }
551                 } else {
552                         nir_tex_instr *tex =
553                                 nir_tex_instr_create(b.shader, ms ? 3 : 1);
554 
555                         tex->dest_type = key->surfaces[i].type;
556                         tex->texture_index = active_count;
557                         tex->is_array = key->surfaces[i].array;
558                         tex->sampler_dim = sampler_dim;
559 
560                         if (ms) {
561                                 tex->op = nir_texop_txf_ms;
562 
563                                 tex->src[0].src_type = nir_tex_src_coord;
564                                 tex->src[0].src = nir_src_for_ssa(nir_f2i32(&b, coord));
565                                 tex->coord_components = coord_comps;
566 
567                                 tex->src[1].src_type = nir_tex_src_ms_index;
568                                 tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
569 
570                                 tex->src[2].src_type = nir_tex_src_lod;
571                                 tex->src[2].src = nir_src_for_ssa(nir_imm_int(&b, 0));
572                         } else {
573                                 tex->op = nir_texop_tex;
574 
575                                 tex->src[0].src_type = nir_tex_src_coord;
576                                 tex->src[0].src = nir_src_for_ssa(coord);
577                                 tex->coord_components = coord_comps;
578                         }
579 
580                         nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
581                         nir_builder_instr_insert(&b, &tex->instr);
582                         res = &tex->dest.ssa;
583                 }
584 
585                 assert(res);
586 
587                 if (key->surfaces[i].loc >= FRAG_RESULT_DATA0) {
588                         nir_store_var(&b, out, res, 0xFF);
589                 } else {
590                         unsigned c = key->surfaces[i].loc == FRAG_RESULT_STENCIL ? 1 : 0;
591                         nir_store_var(&b, out, nir_channel(&b, res, c), 0xFF);
592                 }
593                 active_count++;
594         }
595 
596         struct panfrost_compile_inputs inputs = {
597                 .gpu_id = dev->gpu_id,
598                 .is_blit = true,
599         };
600         struct util_dynarray binary;
601         struct pan_shader_info info;
602 
603         util_dynarray_init(&binary, NULL);
604 
605         GENX(pan_shader_compile)(b.shader, &inputs, &binary, &info);
606 
607         shader = rzalloc(dev->blitter.shaders.blit,
608                          struct pan_blit_shader_data);
609         shader->key = *key;
610         shader->address =
611                 pan_pool_upload_aligned(dev->blitter.shaders.pool,
612                                         binary.data, binary.size,
613                                         PAN_ARCH >= 6 ? 128 : 64);
614 
615         util_dynarray_fini(&binary);
616         ralloc_free(b.shader);
617 
618 #if PAN_ARCH <= 5
619         shader->address |= info.midgard.first_tag;
620 #else
621         for (unsigned i = 0; i < ARRAY_SIZE(shader->blend_ret_offsets); i++) {
622                 shader->blend_ret_offsets[i] = info.bifrost.blend[i].return_offset;
623                 shader->blend_types[i] = info.bifrost.blend[i].type;
624         }
625 #endif
626 
627         _mesa_hash_table_insert(dev->blitter.shaders.blit, &shader->key, shader);
628 
629 out:
630         pthread_mutex_unlock(&dev->blitter.shaders.lock);
631         return shader;
632 }
633 
634 static mali_ptr
pan_blitter_get_rsd(struct panfrost_device * dev,unsigned rt_count,const struct pan_image_view ** src_rts,const struct pan_image_view ** dst_rts,const struct pan_image_view * src_z,const struct pan_image_view * dst_z,const struct pan_image_view * src_s,const struct pan_image_view * dst_s)635 pan_blitter_get_rsd(struct panfrost_device *dev,
636                     unsigned rt_count,
637                     const struct pan_image_view **src_rts,
638                     const struct pan_image_view **dst_rts,
639                     const struct pan_image_view *src_z,
640                     const struct pan_image_view *dst_z,
641                     const struct pan_image_view *src_s,
642                     const struct pan_image_view *dst_s)
643 {
644         struct pan_blit_rsd_key rsd_key = { 0 };
645 
646         assert(!rt_count || (!src_z && !src_s));
647 
648         struct pan_blit_shader_key blit_key = { 0 };
649 
650         if (src_z) {
651                 assert(dst_z);
652                 rsd_key.z.format = dst_z->format;
653                 blit_key.surfaces[0].loc = FRAG_RESULT_DEPTH;
654                 rsd_key.z.type = blit_key.surfaces[0].type = nir_type_float32;
655                 rsd_key.z.src_samples = blit_key.surfaces[0].src_samples = src_z->image->layout.nr_samples;
656                 rsd_key.z.dst_samples = blit_key.surfaces[0].dst_samples = dst_z->image->layout.nr_samples;
657                 rsd_key.z.dim = blit_key.surfaces[0].dim = src_z->dim;
658                 rsd_key.z.array = blit_key.surfaces[0].array = src_z->first_layer != src_z->last_layer;
659         }
660 
661         if (src_s) {
662                 assert(dst_s);
663                 rsd_key.s.format = dst_s->format;
664                 blit_key.surfaces[1].loc = FRAG_RESULT_STENCIL;
665                 rsd_key.s.type = blit_key.surfaces[1].type = nir_type_uint32;
666                 rsd_key.s.src_samples = blit_key.surfaces[1].src_samples = src_s->image->layout.nr_samples;
667                 rsd_key.s.dst_samples = blit_key.surfaces[1].dst_samples = dst_s->image->layout.nr_samples;
668                 rsd_key.s.dim = blit_key.surfaces[1].dim = src_s->dim;
669                 rsd_key.s.array = blit_key.surfaces[1].array = src_s->first_layer != src_s->last_layer;
670         }
671 
672         for (unsigned i = 0; i < rt_count; i++) {
673                 if (!src_rts[i])
674                         continue;
675 
676                 assert(dst_rts[i]);
677                 rsd_key.rts[i].format = dst_rts[i]->format;
678                 blit_key.surfaces[i].loc = FRAG_RESULT_DATA0 + i;
679                 rsd_key.rts[i].type = blit_key.surfaces[i].type =
680                         util_format_is_pure_uint(src_rts[i]->format) ? nir_type_uint32 :
681                         util_format_is_pure_sint(src_rts[i]->format) ? nir_type_int32 :
682                         nir_type_float32;
683                 rsd_key.rts[i].src_samples = blit_key.surfaces[i].src_samples = src_rts[i]->image->layout.nr_samples;
684                 rsd_key.rts[i].dst_samples = blit_key.surfaces[i].dst_samples = dst_rts[i]->image->layout.nr_samples;
685                 rsd_key.rts[i].dim = blit_key.surfaces[i].dim = src_rts[i]->dim;
686                 rsd_key.rts[i].array = blit_key.surfaces[i].array = src_rts[i]->first_layer != src_rts[i]->last_layer;
687         }
688 
689         pthread_mutex_lock(&dev->blitter.rsds.lock);
690         struct hash_entry *he =
691                 _mesa_hash_table_search(dev->blitter.rsds.rsds, &rsd_key);
692         struct pan_blit_rsd_data *rsd = he ? he->data : NULL;
693         if (rsd)
694                 goto out;
695 
696         rsd = rzalloc(dev->blitter.rsds.rsds, struct pan_blit_rsd_data);
697         rsd->key = rsd_key;
698 
699         unsigned bd_count = PAN_ARCH >= 5 ? MAX2(rt_count, 1) : 0;
700         struct panfrost_ptr rsd_ptr =
701                 pan_pool_alloc_desc_aggregate(dev->blitter.rsds.pool,
702                                               PAN_DESC(RENDERER_STATE),
703                                               PAN_DESC_ARRAY(bd_count, BLEND));
704 
705         mali_ptr blend_shaders[8] = { 0 };
706 
707         const struct pan_blit_shader_data *blit_shader =
708                 pan_blitter_get_blit_shader(dev, &blit_key);
709 
710         pan_blitter_get_blend_shaders(dev, rt_count, dst_rts,
711                                       blit_shader, blend_shaders);
712 
713         pan_blitter_emit_rsd(dev, blit_shader,
714                              MAX2(rt_count, 1), dst_rts, blend_shaders,
715                              dst_z, dst_s, rsd_ptr.cpu);
716         rsd->address = rsd_ptr.gpu;
717         _mesa_hash_table_insert(dev->blitter.rsds.rsds, &rsd->key, rsd);
718 
719 out:
720         pthread_mutex_unlock(&dev->blitter.rsds.lock);
721         return rsd->address;
722 }
723 
724 static mali_ptr
pan_preload_get_rsd(struct panfrost_device * dev,const struct pan_fb_info * fb,bool zs)725 pan_preload_get_rsd(struct panfrost_device *dev,
726                     const struct pan_fb_info *fb,
727                     bool zs)
728 {
729         const struct pan_image_view *rts[8] = { NULL };
730         const struct pan_image_view *z = NULL, *s = NULL;
731         struct pan_image_view patched_s_view;
732         unsigned rt_count = 0;
733 
734         if (zs) {
735                 if (fb->zs.preload.z)
736                         z = fb->zs.view.zs;
737 
738                 if (fb->zs.preload.s) {
739                         const struct pan_image_view *view = fb->zs.view.s ? : fb->zs.view.zs;
740                         enum pipe_format fmt = util_format_get_depth_only(view->format);
741 
742                         switch (view->format) {
743                         case PIPE_FORMAT_Z24_UNORM_S8_UINT: fmt = PIPE_FORMAT_X24S8_UINT; break;
744                         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: fmt = PIPE_FORMAT_X32_S8X24_UINT; break;
745                         default: fmt = view->format; break;
746                         }
747 
748                         if (fmt != view->format) {
749                                 patched_s_view = *view;
750                                 patched_s_view.format = fmt;
751                                 s = &patched_s_view;
752                         } else {
753                                 s = view;
754                         }
755                 }
756         } else {
757                 for (unsigned i = 0; i < fb->rt_count; i++) {
758                         if (fb->rts[i].preload)
759                                 rts[i] = fb->rts[i].view;
760                 }
761 
762                 rt_count = fb->rt_count;
763         }
764 
765         return pan_blitter_get_rsd(dev, rt_count, rts, rts, z, z, s, s);
766 }
767 
768 static mali_ptr
pan_blit_get_rsd(struct panfrost_device * dev,const struct pan_image_view * src_views,const struct pan_image_view * dst_view)769 pan_blit_get_rsd(struct panfrost_device *dev,
770                  const struct pan_image_view *src_views,
771                  const struct pan_image_view *dst_view)
772 {
773         const struct util_format_description *desc =
774                 util_format_description(src_views[0].format);
775         const struct pan_image_view *src_rt = NULL, *dst_rt = NULL;
776         const struct pan_image_view *src_z = NULL, *dst_z = NULL;
777         const struct pan_image_view *src_s = NULL, *dst_s = NULL;
778 
779         if (util_format_has_depth(desc)) {
780                 src_z = &src_views[0];
781                 dst_z = dst_view;
782         }
783 
784         if (src_views[1].format) {
785                 src_s = &src_views[1];
786                 dst_s = dst_view;
787         } else if (util_format_has_stencil(desc)) {
788                 src_s = &src_views[0];
789                 dst_s = dst_view;
790         }
791 
792         if (!src_z && !src_s) {
793                 src_rt = &src_views[0];
794                 dst_rt = dst_view;
795         }
796 
797         return pan_blitter_get_rsd(dev, src_rt ? 1 : 0, &src_rt, &dst_rt,
798                                    src_z, dst_z, src_s, dst_s);
799 }
800 
801 static bool
pan_preload_needed(const struct pan_fb_info * fb,bool zs)802 pan_preload_needed(const struct pan_fb_info *fb, bool zs)
803 {
804         if (zs) {
805                 if (fb->zs.preload.z || fb->zs.preload.s)
806                         return true;
807         } else {
808                 for (unsigned i = 0; i < fb->rt_count; i++) {
809                         if (fb->rts[i].preload)
810                                 return true;
811                 }
812         }
813 
814         return false;
815 }
816 
817 static void
pan_blitter_emit_varying(struct pan_pool * pool,mali_ptr coordinates,struct MALI_DRAW * draw)818 pan_blitter_emit_varying(struct pan_pool *pool,
819                          mali_ptr coordinates,
820                          struct MALI_DRAW *draw)
821 {
822         /* Bifrost needs an empty desc to mark end of prefetching */
823         bool padding_buffer = PAN_ARCH >= 6;
824 
825         struct panfrost_ptr varying =
826                 pan_pool_alloc_desc(pool, ATTRIBUTE);
827         struct panfrost_ptr varying_buffer =
828                 pan_pool_alloc_desc_array(pool, (padding_buffer ? 2 : 1),
829                                           ATTRIBUTE_BUFFER);
830 
831         pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
832                 cfg.pointer = coordinates;
833                 cfg.stride = 4 * sizeof(float);
834                 cfg.size = cfg.stride * 4;
835         }
836 
837         if (padding_buffer) {
838                 pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
839                          ATTRIBUTE_BUFFER, cfg);
840         }
841 
842         pan_pack(varying.cpu, ATTRIBUTE, cfg) {
843                 cfg.buffer_index = 0;
844                 cfg.offset_enable = PAN_ARCH <= 5;
845                 cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw;
846         }
847 
848         draw->varyings = varying.gpu;
849         draw->varying_buffers = varying_buffer.gpu;
850 }
851 
852 static mali_ptr
pan_blitter_emit_sampler(struct pan_pool * pool,bool nearest_filter)853 pan_blitter_emit_sampler(struct pan_pool *pool,
854                          bool nearest_filter)
855 {
856         struct panfrost_ptr sampler =
857                  pan_pool_alloc_desc(pool, SAMPLER);
858 
859         pan_pack(sampler.cpu, SAMPLER, cfg) {
860                 cfg.seamless_cube_map = false;
861                 cfg.normalized_coordinates = false;
862                 cfg.minify_nearest = nearest_filter;
863                 cfg.magnify_nearest = nearest_filter;
864         }
865 
866         return sampler.gpu;
867 }
868 
869 static mali_ptr
pan_blitter_emit_textures(struct pan_pool * pool,unsigned tex_count,const struct pan_image_view ** views)870 pan_blitter_emit_textures(struct pan_pool *pool,
871                           unsigned tex_count,
872                           const struct pan_image_view **views)
873 {
874 #if PAN_ARCH >= 6
875         struct panfrost_ptr textures =
876                 pan_pool_alloc_desc_array(pool, tex_count, TEXTURE);
877 
878         for (unsigned i = 0; i < tex_count; i++) {
879                 void *texture = textures.cpu + (pan_size(TEXTURE) * i);
880                 size_t payload_size =
881                         GENX(panfrost_estimate_texture_payload_size)(views[i]);
882                 struct panfrost_ptr surfaces =
883                         pan_pool_alloc_aligned(pool, payload_size,
884                                                pan_alignment(SURFACE_WITH_STRIDE));
885 
886                 GENX(panfrost_new_texture)(pool->dev, views[i], texture, &surfaces);
887         }
888 
889         return textures.gpu;
890 #else
891         mali_ptr textures[8] = { 0 };
892 
893         for (unsigned i = 0; i < tex_count; i++) {
894                 size_t sz = pan_size(TEXTURE) +
895                             GENX(panfrost_estimate_texture_payload_size)(views[i]);
896                 struct panfrost_ptr texture =
897                         pan_pool_alloc_aligned(pool, sz, pan_alignment(TEXTURE));
898                 struct panfrost_ptr surfaces = {
899                         .cpu = texture.cpu + pan_size(TEXTURE),
900                         .gpu = texture.gpu + pan_size(TEXTURE),
901                 };
902 
903                 GENX(panfrost_new_texture)(pool->dev, views[i], texture.cpu, &surfaces);
904                 textures[i] = texture.gpu;
905         }
906 
907         return pan_pool_upload_aligned(pool, textures,
908                                        tex_count * sizeof(mali_ptr),
909                                        sizeof(mali_ptr));
910 #endif
911 }
912 
913 static void
pan_preload_emit_textures(struct pan_pool * pool,const struct pan_fb_info * fb,bool zs,struct MALI_DRAW * draw)914 pan_preload_emit_textures(struct pan_pool *pool,
915                           const struct pan_fb_info *fb, bool zs,
916                           struct MALI_DRAW *draw)
917 {
918         const struct pan_image_view *views[8];
919         struct pan_image_view patched_s_view;
920         unsigned tex_count = 0;
921 
922         if (zs) {
923                 if (fb->zs.preload.z)
924                         views[tex_count++] = fb->zs.view.zs;
925 
926                 if (fb->zs.preload.s) {
927                         const struct pan_image_view *view = fb->zs.view.s ? : fb->zs.view.zs;
928                         enum pipe_format fmt = util_format_get_depth_only(view->format);
929 
930                         switch (view->format) {
931                         case PIPE_FORMAT_Z24_UNORM_S8_UINT: fmt = PIPE_FORMAT_X24S8_UINT; break;
932                         case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: fmt = PIPE_FORMAT_X32_S8X24_UINT; break;
933                         default: fmt = view->format; break;
934                         }
935 
936                         if (fmt != view->format) {
937                                 patched_s_view = *view;
938                                 patched_s_view.format = fmt;
939                                 view = &patched_s_view;
940                         }
941                         views[tex_count++] = view;
942                 }
943         } else {
944                 for (unsigned i = 0; i < fb->rt_count; i++) {
945                         if (fb->rts[i].preload)
946                                 views[tex_count++] = fb->rts[i].view;
947                 }
948 
949         }
950 
951         draw->textures = pan_blitter_emit_textures(pool, tex_count, views);
952 }
953 
954 static mali_ptr
pan_blitter_emit_viewport(struct pan_pool * pool,uint16_t minx,uint16_t miny,uint16_t maxx,uint16_t maxy)955 pan_blitter_emit_viewport(struct pan_pool *pool,
956                           uint16_t minx, uint16_t miny,
957                           uint16_t maxx, uint16_t maxy)
958 {
959         struct panfrost_ptr vp = pan_pool_alloc_desc(pool, VIEWPORT);
960 
961         pan_pack(vp.cpu, VIEWPORT, cfg) {
962                 cfg.scissor_minimum_x = minx;
963                 cfg.scissor_minimum_y = miny;
964                 cfg.scissor_maximum_x = maxx;
965                 cfg.scissor_maximum_y = maxy;
966         }
967 
968         return vp.gpu;
969 }
970 
971 static void
pan_preload_emit_dcd(struct pan_pool * pool,struct pan_fb_info * fb,bool zs,mali_ptr coordinates,mali_ptr tsd,mali_ptr rsd,void * out,bool always_write)972 pan_preload_emit_dcd(struct pan_pool *pool,
973                      struct pan_fb_info *fb, bool zs,
974                      mali_ptr coordinates,
975                      mali_ptr tsd, mali_ptr rsd,
976                      void *out, bool always_write)
977 {
978         pan_pack(out, DRAW, cfg) {
979                 cfg.four_components_per_vertex = true;
980                 cfg.draw_descriptor_is_64b = true;
981                 cfg.thread_storage = tsd;
982                 cfg.state = rsd;
983 
984                 cfg.position = coordinates;
985                 pan_blitter_emit_varying(pool, coordinates, &cfg);
986                 uint16_t minx = 0, miny = 0, maxx, maxy;
987 
988 #if PAN_ARCH == 4
989                 maxx = fb->width - 1;
990                 maxy = fb->height - 1;
991 #else
992                 /* Align on 32x32 tiles */
993                 minx = fb->extent.minx & ~31;
994                 miny = fb->extent.miny & ~31;
995                 maxx = MIN2(ALIGN_POT(fb->extent.maxx + 1, 32), fb->width) - 1;
996                 maxy = MIN2(ALIGN_POT(fb->extent.maxy + 1, 32), fb->height) - 1;
997 #endif
998 
999                 cfg.viewport =
1000                         pan_blitter_emit_viewport(pool, minx, miny, maxx, maxy);
1001 
1002                 pan_preload_emit_textures(pool, fb, zs, &cfg);
1003 
1004                 cfg.samplers = pan_blitter_emit_sampler(pool, true);
1005 
1006 #if PAN_ARCH >= 6
1007                 /* Tiles updated by blit shaders are still considered
1008                  * clean (separate for colour and Z/S), allowing us to
1009                  * suppress unnecessary writeback */
1010                 cfg.clean_fragment_write = !always_write;
1011 #endif
1012         }
1013 }
1014 
1015 static void
pan_blit_emit_dcd(struct pan_pool * pool,mali_ptr src_coords,mali_ptr dst_coords,mali_ptr textures,mali_ptr samplers,mali_ptr vpd,mali_ptr tsd,mali_ptr rsd,void * out)1016 pan_blit_emit_dcd(struct pan_pool *pool,
1017                   mali_ptr src_coords, mali_ptr dst_coords,
1018                   mali_ptr textures, mali_ptr samplers,
1019                   mali_ptr vpd, mali_ptr tsd, mali_ptr rsd,
1020                   void *out)
1021 {
1022         pan_pack(out, DRAW, cfg) {
1023                 cfg.four_components_per_vertex = true;
1024                 cfg.draw_descriptor_is_64b = true;
1025                 cfg.thread_storage = tsd;
1026                 cfg.state = rsd;
1027 
1028                 cfg.position = dst_coords;
1029                 pan_blitter_emit_varying(pool, src_coords, &cfg);
1030                 cfg.viewport = vpd;
1031                 cfg.textures = textures;
1032                 cfg.samplers = samplers;
1033         }
1034 }
1035 
1036 static struct panfrost_ptr
pan_blit_emit_tiler_job(struct pan_pool * desc_pool,struct pan_scoreboard * scoreboard,mali_ptr src_coords,mali_ptr dst_coords,mali_ptr textures,mali_ptr samplers,mali_ptr vpd,mali_ptr rsd,mali_ptr tsd,mali_ptr tiler)1037 pan_blit_emit_tiler_job(struct pan_pool *desc_pool,
1038                         struct pan_scoreboard *scoreboard,
1039                         mali_ptr src_coords, mali_ptr dst_coords,
1040                         mali_ptr textures, mali_ptr samplers,
1041                         mali_ptr vpd, mali_ptr rsd, mali_ptr tsd,
1042                         mali_ptr tiler)
1043 {
1044         struct panfrost_ptr job =
1045                 pan_pool_alloc_desc(desc_pool, TILER_JOB);
1046 
1047         pan_blit_emit_dcd(desc_pool,
1048                           src_coords, dst_coords, textures, samplers,
1049                           vpd, tsd, rsd,
1050                           pan_section_ptr(job.cpu, TILER_JOB, DRAW));
1051 
1052         pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
1053                 cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
1054                 cfg.index_count = 4;
1055                 cfg.job_task_split = 6;
1056         }
1057 
1058         pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
1059                 cfg.constant = 1.0f;
1060         }
1061 
1062         void *invoc = pan_section_ptr(job.cpu,
1063                                       TILER_JOB,
1064                                       INVOCATION);
1065         panfrost_pack_work_groups_compute(invoc, 1, 4,
1066                                           1, 1, 1, 1, true, false);
1067 
1068 #if PAN_ARCH >= 6
1069         pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg);
1070         pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) {
1071                 cfg.address = tiler;
1072         }
1073 #endif
1074 
1075         panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
1076                          false, false, 0, 0, &job, false);
1077         return job;
1078 }
1079 
1080 #if PAN_ARCH >= 6
1081 static void
pan_preload_fb_alloc_pre_post_dcds(struct pan_pool * desc_pool,struct pan_fb_info * fb)1082 pan_preload_fb_alloc_pre_post_dcds(struct pan_pool *desc_pool,
1083                                    struct pan_fb_info *fb)
1084 {
1085         if (fb->bifrost.pre_post.dcds.gpu)
1086                 return;
1087 
1088         fb->bifrost.pre_post.dcds =
1089                 pan_pool_alloc_desc_array(desc_pool, 3, DRAW);
1090 }
1091 
1092 static void
pan_preload_emit_pre_frame_dcd(struct pan_pool * desc_pool,struct pan_fb_info * fb,bool zs,mali_ptr coords,mali_ptr rsd,mali_ptr tsd)1093 pan_preload_emit_pre_frame_dcd(struct pan_pool *desc_pool,
1094                                struct pan_fb_info *fb, bool zs,
1095                                mali_ptr coords, mali_ptr rsd,
1096                                mali_ptr tsd)
1097 {
1098         unsigned dcd_idx = zs ? 0 : 1;
1099         pan_preload_fb_alloc_pre_post_dcds(desc_pool, fb);
1100         assert(fb->bifrost.pre_post.dcds.cpu);
1101         void *dcd = fb->bifrost.pre_post.dcds.cpu +
1102                     (dcd_idx * pan_size(DRAW));
1103 
1104         int crc_rt = GENX(pan_select_crc_rt)(fb);
1105 
1106         bool always_write = false;
1107 
1108         /* If CRC data is currently invalid and this batch will make it valid,
1109          * write even clean tiles to make sure CRC data is updated. */
1110         if (crc_rt >= 0) {
1111                 bool *valid = fb->rts[crc_rt].crc_valid;
1112                 bool full = !fb->extent.minx && !fb->extent.miny &&
1113                         fb->extent.maxx == (fb->width - 1) &&
1114                         fb->extent.maxy == (fb->height - 1);
1115 
1116                 if (full && !(*valid))
1117                         always_write = true;
1118         }
1119 
1120         pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd, rsd, dcd, always_write);
1121         if (zs) {
1122                 enum pipe_format fmt = fb->zs.view.zs ?
1123                                        fb->zs.view.zs->image->layout.format :
1124                                        fb->zs.view.s->image->layout.format;
1125                 bool always = false;
1126 
1127                 /* If we're dealing with a combined ZS resource and only one
1128                  * component is cleared, we need to reload the whole surface
1129                  * because the zs_clean_pixel_write_enable flag is set in that
1130                  * case.
1131                  */
1132                 if (util_format_is_depth_and_stencil(fmt) &&
1133                     fb->zs.clear.z != fb->zs.clear.s)
1134                         always = true;
1135 
1136                 /* We could use INTERSECT on Bifrost v7 too, but
1137                  * EARLY_ZS_ALWAYS has the advantage of reloading the ZS tile
1138                  * buffer one or more tiles ahead, making ZS data immediately
1139                  * available for any ZS tests taking place in other shaders.
1140                  * Thing's haven't been benchmarked to determine what's
1141                  * preferable (saving bandwidth vs having ZS preloaded
1142                  * earlier), so let's leave it like that for now.
1143                  */
1144                 fb->bifrost.pre_post.modes[dcd_idx] =
1145                         desc_pool->dev->arch > 6 ?
1146                         MALI_PRE_POST_FRAME_SHADER_MODE_EARLY_ZS_ALWAYS :
1147                         always ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS :
1148                         MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
1149         } else {
1150                 fb->bifrost.pre_post.modes[dcd_idx] =
1151                         always_write ? MALI_PRE_POST_FRAME_SHADER_MODE_ALWAYS :
1152                         MALI_PRE_POST_FRAME_SHADER_MODE_INTERSECT;
1153         }
1154 }
1155 #else
1156 static struct panfrost_ptr
pan_preload_emit_tiler_job(struct pan_pool * desc_pool,struct pan_scoreboard * scoreboard,struct pan_fb_info * fb,bool zs,mali_ptr coords,mali_ptr rsd,mali_ptr tsd)1157 pan_preload_emit_tiler_job(struct pan_pool *desc_pool,
1158                            struct pan_scoreboard *scoreboard,
1159                            struct pan_fb_info *fb, bool zs,
1160                            mali_ptr coords, mali_ptr rsd, mali_ptr tsd)
1161 {
1162         struct panfrost_ptr job =
1163                 pan_pool_alloc_desc(desc_pool, TILER_JOB);
1164 
1165         pan_preload_emit_dcd(desc_pool, fb, zs, coords, tsd, rsd,
1166                              pan_section_ptr(job.cpu, TILER_JOB, DRAW),
1167                              false);
1168 
1169         pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
1170                 cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
1171                 cfg.index_count = 4;
1172                 cfg.job_task_split = 6;
1173         }
1174 
1175         pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
1176                 cfg.constant = 1.0f;
1177         }
1178 
1179         void *invoc = pan_section_ptr(job.cpu,
1180                                       TILER_JOB,
1181                                       INVOCATION);
1182         panfrost_pack_work_groups_compute(invoc, 1, 4,
1183                                           1, 1, 1, 1, true, false);
1184 
1185         panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
1186                          false, false, 0, 0, &job, true);
1187         return job;
1188 }
1189 #endif
1190 
1191 static struct panfrost_ptr
pan_preload_fb_part(struct pan_pool * pool,struct pan_scoreboard * scoreboard,struct pan_fb_info * fb,bool zs,mali_ptr coords,mali_ptr tsd,mali_ptr tiler)1192 pan_preload_fb_part(struct pan_pool *pool,
1193                     struct pan_scoreboard *scoreboard,
1194                     struct pan_fb_info *fb, bool zs,
1195                     mali_ptr coords, mali_ptr tsd, mali_ptr tiler)
1196 {
1197         struct panfrost_device *dev = pool->dev;
1198         mali_ptr rsd = pan_preload_get_rsd(dev, fb, zs);
1199         struct panfrost_ptr job = { 0 };
1200 
1201 #if PAN_ARCH >= 6
1202         pan_preload_emit_pre_frame_dcd(pool, fb, zs,
1203                                        coords, rsd, tsd);
1204 #else
1205         job = pan_preload_emit_tiler_job(pool, scoreboard,
1206                                          fb, zs, coords, rsd, tsd);
1207 #endif
1208         return job;
1209 }
1210 
1211 unsigned
GENX(pan_preload_fb)1212 GENX(pan_preload_fb)(struct pan_pool *pool,
1213                      struct pan_scoreboard *scoreboard,
1214                      struct pan_fb_info *fb,
1215                      mali_ptr tsd, mali_ptr tiler,
1216                      struct panfrost_ptr *jobs)
1217 {
1218         bool preload_zs = pan_preload_needed(fb, true);
1219         bool preload_rts = pan_preload_needed(fb, false);
1220         mali_ptr coords;
1221 
1222         if (!preload_zs && !preload_rts)
1223                 return 0;
1224 
1225         float rect[] = {
1226                 0.0, 0.0, 0.0, 1.0,
1227                 fb->width, 0.0, 0.0, 1.0,
1228                 0.0, fb->height, 0.0, 1.0,
1229                 fb->width, fb->height, 0.0, 1.0,
1230         };
1231 
1232         coords = pan_pool_upload_aligned(pool, rect,
1233                                          sizeof(rect), 64);
1234 
1235         unsigned njobs = 0;
1236         if (preload_zs) {
1237                 struct panfrost_ptr job =
1238                         pan_preload_fb_part(pool, scoreboard, fb, true,
1239                                             coords, tsd, tiler);
1240                 if (jobs && job.cpu)
1241                         jobs[njobs++] = job;
1242         }
1243 
1244         if (preload_rts) {
1245                 struct panfrost_ptr job =
1246                         pan_preload_fb_part(pool, scoreboard, fb, false,
1247                                             coords, tsd, tiler);
1248                 if (jobs && job.cpu)
1249                         jobs[njobs++] = job;
1250         }
1251 
1252         return njobs;
1253 }
1254 
1255 void
GENX(pan_blit_ctx_init)1256 GENX(pan_blit_ctx_init)(struct panfrost_device *dev,
1257                         const struct pan_blit_info *info,
1258                         struct pan_pool *blit_pool,
1259                         struct pan_blit_context *ctx)
1260 {
1261         memset(ctx, 0, sizeof(*ctx));
1262 
1263         struct pan_image_view sviews[2] = {
1264                 {
1265                         .format = info->src.planes[0].format,
1266                         .image = info->src.planes[0].image,
1267                         .dim = info->src.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
1268                                MALI_TEXTURE_DIMENSION_2D : info->src.planes[0].image->layout.dim,
1269                         .first_level = info->src.level,
1270                         .last_level = info->src.level,
1271                         .first_layer = info->src.start.layer,
1272                         .last_layer = info->src.end.layer,
1273                         .swizzle = {
1274                                 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
1275                                 PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
1276                         },
1277                 },
1278         };
1279 
1280         struct pan_image_view dview = {
1281                 .format = info->dst.planes[0].format,
1282                 .image = info->dst.planes[0].image,
1283                 .dim = info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_1D ?
1284                        MALI_TEXTURE_DIMENSION_1D : MALI_TEXTURE_DIMENSION_2D,
1285                 .first_level = info->dst.level,
1286                 .last_level = info->dst.level,
1287                 .first_layer = info->dst.start.layer,
1288                 .last_layer = info->dst.start.layer,
1289                 .swizzle = {
1290                         PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
1291                         PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W,
1292                 },
1293         };
1294 
1295         ctx->src.start.x = info->src.start.x;
1296         ctx->src.start.y = info->src.start.y;
1297         ctx->src.end.x = info->src.end.x;
1298         ctx->src.end.y = info->src.end.y;
1299         ctx->src.dim = sviews[0].dim;
1300 
1301         if (info->dst.planes[0].image->layout.dim == MALI_TEXTURE_DIMENSION_3D) {
1302                 unsigned max_z = u_minify(info->dst.planes[0].image->layout.depth, info->dst.level) - 1;
1303 
1304                 ctx->z_scale = (float)(info->src.end.z - info->src.start.z) /
1305                                (info->dst.end.z - info->dst.start.z);
1306                 assert(info->dst.start.z != info->dst.end.z);
1307                 if (info->dst.start.z > info->dst.end.z) {
1308                         ctx->dst.cur_layer = info->dst.start.z - 1;
1309                         ctx->dst.last_layer = info->dst.end.z;
1310                 } else {
1311                         ctx->dst.cur_layer = info->dst.start.z;
1312                         ctx->dst.last_layer = info->dst.end.z - 1;
1313                 }
1314                 ctx->dst.cur_layer = MIN2(MAX2(ctx->dst.cur_layer, 0), max_z);
1315                 ctx->dst.last_layer = MIN2(MAX2(ctx->dst.last_layer, 0), max_z);
1316                 ctx->dst.layer_offset = ctx->dst.cur_layer;
1317         } else {
1318                 unsigned max_layer = info->dst.planes[0].image->layout.array_size - 1;
1319                 ctx->dst.layer_offset = info->dst.start.layer;
1320                 ctx->dst.cur_layer = info->dst.start.layer;
1321                 ctx->dst.last_layer = MIN2(info->dst.end.layer, max_layer);
1322                 ctx->z_scale = 1;
1323         }
1324 
1325         if (sviews[0].dim == MALI_TEXTURE_DIMENSION_3D) {
1326                 if (info->src.start.z < info->src.end.z)
1327                         ctx->src.z_offset = info->src.start.z + fabs(ctx->z_scale * 0.5f);
1328                 else
1329                         ctx->src.z_offset = info->src.start.z - fabs(ctx->z_scale * 0.5f);
1330         } else {
1331                 ctx->src.layer_offset = info->src.start.layer;
1332         }
1333 
1334         /* Split depth and stencil */
1335         if (util_format_is_depth_and_stencil(sviews[0].format)) {
1336                 sviews[1] = sviews[0];
1337                 sviews[0].format = util_format_get_depth_only(sviews[0].format);
1338                 sviews[1].format = util_format_stencil_only(sviews[1].format);
1339         } else if (info->src.planes[1].format) {
1340                 sviews[1] = sviews[0];
1341                 sviews[1].format = info->src.planes[1].format;
1342                 sviews[1].image = info->src.planes[1].image;
1343         }
1344 
1345         ctx->rsd = pan_blit_get_rsd(dev, sviews, &dview);
1346 
1347         ASSERTED unsigned nlayers = info->src.end.layer - info->src.start.layer + 1;
1348 
1349         assert(nlayers == (info->dst.end.layer - info->dst.start.layer + 1));
1350 
1351         unsigned dst_w = u_minify(info->dst.planes[0].image->layout.width, info->dst.level);
1352         unsigned dst_h = u_minify(info->dst.planes[0].image->layout.height, info->dst.level);
1353         unsigned maxx = MIN2(MAX2(info->dst.start.x, info->dst.end.x), dst_w - 1);
1354         unsigned maxy = MIN2(MAX2(info->dst.start.y, info->dst.end.y), dst_h - 1);
1355         unsigned minx = MAX2(MIN3(info->dst.start.x, info->dst.end.x, maxx), 0);
1356         unsigned miny = MAX2(MIN3(info->dst.start.y, info->dst.end.y, maxy), 0);
1357 
1358         if (info->scissor.enable) {
1359                 minx = MAX2(minx, info->scissor.minx);
1360                 miny = MAX2(miny, info->scissor.miny);
1361                 maxx = MIN2(maxx, info->scissor.maxx);
1362                 maxy = MIN2(maxy, info->scissor.maxy);
1363         }
1364 
1365         const struct pan_image_view *sview_ptrs[] = { &sviews[0], &sviews[1] };
1366         unsigned nviews = sviews[1].format ? 2 : 1;
1367 
1368         ctx->textures = pan_blitter_emit_textures(blit_pool, nviews, sview_ptrs);
1369         ctx->samplers = pan_blitter_emit_sampler(blit_pool, info->nearest);
1370 
1371         ctx->vpd = pan_blitter_emit_viewport(blit_pool,
1372                                              minx, miny, maxx, maxy);
1373 
1374         float dst_rect[] = {
1375                 info->dst.start.x, info->dst.start.y, 0.0, 1.0,
1376                 info->dst.end.x, info->dst.start.y, 0.0, 1.0,
1377                 info->dst.start.x, info->dst.end.y, 0.0, 1.0,
1378                 info->dst.end.x, info->dst.end.y, 0.0, 1.0,
1379         };
1380 
1381         ctx->position =
1382                 pan_pool_upload_aligned(blit_pool, dst_rect,
1383                                         sizeof(dst_rect), 64);
1384 }
1385 
1386 struct panfrost_ptr
GENX(pan_blit)1387 GENX(pan_blit)(struct pan_blit_context *ctx,
1388                struct pan_pool *pool,
1389                struct pan_scoreboard *scoreboard,
1390                mali_ptr tsd, mali_ptr tiler)
1391 {
1392         if (ctx->dst.cur_layer < 0 ||
1393             (ctx->dst.last_layer >= ctx->dst.layer_offset &&
1394              ctx->dst.cur_layer > ctx->dst.last_layer) ||
1395             (ctx->dst.last_layer < ctx->dst.layer_offset &&
1396              ctx->dst.cur_layer < ctx->dst.last_layer))
1397                 return (struct panfrost_ptr){ 0 };
1398 
1399         int32_t layer = ctx->dst.cur_layer - ctx->dst.layer_offset;
1400         float src_z;
1401         if (ctx->src.dim == MALI_TEXTURE_DIMENSION_3D)
1402                 src_z = (ctx->z_scale * layer) + ctx->src.z_offset;
1403         else
1404                 src_z = ctx->src.layer_offset + layer;
1405 
1406         float src_rect[] = {
1407                 ctx->src.start.x, ctx->src.start.y, src_z, 1.0,
1408                 ctx->src.end.x, ctx->src.start.y, src_z, 1.0,
1409                 ctx->src.start.x, ctx->src.end.y, src_z, 1.0,
1410                 ctx->src.end.x, ctx->src.end.y, src_z, 1.0,
1411         };
1412 
1413         mali_ptr src_coords =
1414                 pan_pool_upload_aligned(pool, src_rect,
1415                                         sizeof(src_rect), 64);
1416 
1417         return pan_blit_emit_tiler_job(pool, scoreboard,
1418                                        src_coords, ctx->position,
1419                                        ctx->textures, ctx->samplers,
1420                                        ctx->vpd, ctx->rsd, tsd, tiler);
1421 }
1422 
pan_blit_shader_key_hash(const void * key)1423 static uint32_t pan_blit_shader_key_hash(const void *key)
1424 {
1425         return _mesa_hash_data(key, sizeof(struct pan_blit_shader_key));
1426 }
1427 
pan_blit_shader_key_equal(const void * a,const void * b)1428 static bool pan_blit_shader_key_equal(const void *a, const void *b)
1429 {
1430         return !memcmp(a, b, sizeof(struct pan_blit_shader_key));
1431 }
1432 
pan_blit_blend_shader_key_hash(const void * key)1433 static uint32_t pan_blit_blend_shader_key_hash(const void *key)
1434 {
1435         return _mesa_hash_data(key, sizeof(struct pan_blit_blend_shader_key));
1436 }
1437 
pan_blit_blend_shader_key_equal(const void * a,const void * b)1438 static bool pan_blit_blend_shader_key_equal(const void *a, const void *b)
1439 {
1440         return !memcmp(a, b, sizeof(struct pan_blit_blend_shader_key));
1441 }
1442 
pan_blit_rsd_key_hash(const void * key)1443 static uint32_t pan_blit_rsd_key_hash(const void *key)
1444 {
1445         return _mesa_hash_data(key, sizeof(struct pan_blit_rsd_key));
1446 }
1447 
pan_blit_rsd_key_equal(const void * a,const void * b)1448 static bool pan_blit_rsd_key_equal(const void *a, const void *b)
1449 {
1450         return !memcmp(a, b, sizeof(struct pan_blit_rsd_key));
1451 }
1452 
1453 static void
pan_blitter_prefill_blit_shader_cache(struct panfrost_device * dev)1454 pan_blitter_prefill_blit_shader_cache(struct panfrost_device *dev)
1455 {
1456         static const struct pan_blit_shader_key prefill[] = {
1457                 {
1458                         .surfaces[0] = {
1459                                 .loc = FRAG_RESULT_DEPTH,
1460                                 .type = nir_type_float32,
1461                                 .dim = MALI_TEXTURE_DIMENSION_2D,
1462                                 .src_samples = 1,
1463                                 .dst_samples = 1,
1464                         },
1465                 },
1466                 {
1467                         .surfaces[1] = {
1468                                 .loc = FRAG_RESULT_STENCIL,
1469                                 .type = nir_type_uint32,
1470                                 .dim = MALI_TEXTURE_DIMENSION_2D,
1471                                 .src_samples = 1,
1472                                 .dst_samples = 1,
1473                         },
1474                 },
1475                 {
1476                         .surfaces[0] = {
1477                                 .loc = FRAG_RESULT_DATA0,
1478                                 .type = nir_type_float32,
1479                                 .dim = MALI_TEXTURE_DIMENSION_2D,
1480                                 .src_samples = 1,
1481                                 .dst_samples = 1,
1482                         },
1483                 },
1484         };
1485 
1486         for (unsigned i = 0; i < ARRAY_SIZE(prefill); i++)
1487                 pan_blitter_get_blit_shader(dev, &prefill[i]);
1488 }
1489 
1490 void
GENX(pan_blitter_init)1491 GENX(pan_blitter_init)(struct panfrost_device *dev,
1492                        struct pan_pool *bin_pool,
1493                        struct pan_pool *desc_pool)
1494 {
1495         dev->blitter.shaders.blit =
1496                 _mesa_hash_table_create(NULL, pan_blit_shader_key_hash,
1497                                         pan_blit_shader_key_equal);
1498         dev->blitter.shaders.blend =
1499                 _mesa_hash_table_create(NULL, pan_blit_blend_shader_key_hash,
1500                                         pan_blit_blend_shader_key_equal);
1501         dev->blitter.shaders.pool = bin_pool;
1502         pthread_mutex_init(&dev->blitter.shaders.lock, NULL);
1503         pan_blitter_prefill_blit_shader_cache(dev);
1504 
1505         dev->blitter.rsds.pool = desc_pool;
1506         dev->blitter.rsds.rsds =
1507                 _mesa_hash_table_create(NULL, pan_blit_rsd_key_hash,
1508                                         pan_blit_rsd_key_equal);
1509         pthread_mutex_init(&dev->blitter.rsds.lock, NULL);
1510 }
1511 
1512 void
GENX(pan_blitter_cleanup)1513 GENX(pan_blitter_cleanup)(struct panfrost_device *dev)
1514 {
1515         _mesa_hash_table_destroy(dev->blitter.shaders.blit, NULL);
1516         _mesa_hash_table_destroy(dev->blitter.shaders.blend, NULL);
1517         pthread_mutex_destroy(&dev->blitter.shaders.lock);
1518         _mesa_hash_table_destroy(dev->blitter.rsds.rsds, NULL);
1519         pthread_mutex_destroy(&dev->blitter.rsds.lock);
1520 }
1521