1 /*
2 * Copyright © 2021 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "gen_macros.h"
25
26 #include "nir/nir_builder.h"
27 #include "pan_encoder.h"
28 #include "pan_shader.h"
29
30 #include "panvk_private.h"
31
32 static mali_ptr
panvk_meta_copy_img_emit_texture(struct panfrost_device * pdev,struct pan_pool * desc_pool,const struct pan_image_view * view)33 panvk_meta_copy_img_emit_texture(struct panfrost_device *pdev,
34 struct pan_pool *desc_pool,
35 const struct pan_image_view *view)
36 {
37 #if PAN_ARCH >= 6
38 struct panfrost_ptr texture =
39 pan_pool_alloc_desc(desc_pool, TEXTURE);
40 size_t payload_size =
41 GENX(panfrost_estimate_texture_payload_size)(view);
42 struct panfrost_ptr surfaces =
43 pan_pool_alloc_aligned(desc_pool, payload_size,
44 pan_alignment(SURFACE_WITH_STRIDE));
45
46 GENX(panfrost_new_texture)(pdev, view, texture.cpu, &surfaces);
47
48 return texture.gpu;
49 #else
50 size_t sz = pan_size(TEXTURE) +
51 GENX(panfrost_estimate_texture_payload_size)(view);
52 struct panfrost_ptr texture =
53 pan_pool_alloc_aligned(desc_pool, sz, pan_alignment(TEXTURE));
54 struct panfrost_ptr surfaces = {
55 .cpu = texture.cpu + pan_size(TEXTURE),
56 .gpu = texture.gpu + pan_size(TEXTURE),
57 };
58
59 GENX(panfrost_new_texture)(pdev, view, texture.cpu, &surfaces);
60
61 return pan_pool_upload_aligned(desc_pool, &texture.gpu,
62 sizeof(mali_ptr),
63 sizeof(mali_ptr));
64 #endif
65 }
66
67 static mali_ptr
panvk_meta_copy_img_emit_sampler(struct panfrost_device * pdev,struct pan_pool * desc_pool)68 panvk_meta_copy_img_emit_sampler(struct panfrost_device *pdev,
69 struct pan_pool *desc_pool)
70 {
71 struct panfrost_ptr sampler =
72 pan_pool_alloc_desc(desc_pool, SAMPLER);
73
74 pan_pack(sampler.cpu, SAMPLER, cfg) {
75 #if PAN_ARCH >= 6
76 cfg.seamless_cube_map = false;
77 #endif
78 cfg.normalized_coordinates = false;
79 cfg.minify_nearest = true;
80 cfg.magnify_nearest = true;
81 }
82
83 return sampler.gpu;
84 }
85
86 static void
panvk_meta_copy_emit_varying(struct pan_pool * pool,mali_ptr coordinates,mali_ptr * varying_bufs,mali_ptr * varyings)87 panvk_meta_copy_emit_varying(struct pan_pool *pool,
88 mali_ptr coordinates,
89 mali_ptr *varying_bufs,
90 mali_ptr *varyings)
91 {
92 /* Bifrost needs an empty desc to mark end of prefetching */
93 bool padding_buffer = PAN_ARCH >= 6;
94
95 struct panfrost_ptr varying =
96 pan_pool_alloc_desc(pool, ATTRIBUTE);
97 struct panfrost_ptr varying_buffer =
98 pan_pool_alloc_desc_array(pool, (padding_buffer ? 2 : 1),
99 ATTRIBUTE_BUFFER);
100
101 pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
102 cfg.pointer = coordinates;
103 cfg.stride = 4 * sizeof(uint32_t);
104 cfg.size = cfg.stride * 4;
105 }
106
107 if (padding_buffer) {
108 pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
109 ATTRIBUTE_BUFFER, cfg);
110 }
111
112 pan_pack(varying.cpu, ATTRIBUTE, cfg) {
113 cfg.buffer_index = 0;
114 cfg.offset_enable = PAN_ARCH <= 5;
115 cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw;
116 }
117
118 *varyings = varying.gpu;
119 *varying_bufs = varying_buffer.gpu;
120 }
121
122 static void
panvk_meta_copy_emit_dcd(struct pan_pool * pool,mali_ptr src_coords,mali_ptr dst_coords,mali_ptr texture,mali_ptr sampler,mali_ptr vpd,mali_ptr tsd,mali_ptr rsd,mali_ptr ubos,mali_ptr push_constants,void * out)123 panvk_meta_copy_emit_dcd(struct pan_pool *pool,
124 mali_ptr src_coords, mali_ptr dst_coords,
125 mali_ptr texture, mali_ptr sampler,
126 mali_ptr vpd, mali_ptr tsd, mali_ptr rsd,
127 mali_ptr ubos, mali_ptr push_constants,
128 void *out)
129 {
130 pan_pack(out, DRAW, cfg) {
131 cfg.four_components_per_vertex = true;
132 cfg.draw_descriptor_is_64b = true;
133 cfg.thread_storage = tsd;
134 cfg.state = rsd;
135 cfg.uniform_buffers = ubos;
136 cfg.push_uniforms = push_constants;
137 cfg.position = dst_coords;
138 if (src_coords) {
139 panvk_meta_copy_emit_varying(pool, src_coords,
140 &cfg.varying_buffers,
141 &cfg.varyings);
142 }
143 cfg.viewport = vpd;
144 cfg.textures = texture;
145 cfg.samplers = sampler;
146 }
147 }
148
149 static struct panfrost_ptr
panvk_meta_copy_emit_tiler_job(struct pan_pool * desc_pool,struct pan_scoreboard * scoreboard,mali_ptr src_coords,mali_ptr dst_coords,mali_ptr texture,mali_ptr sampler,mali_ptr ubo,mali_ptr push_constants,mali_ptr vpd,mali_ptr rsd,mali_ptr tsd,mali_ptr tiler)150 panvk_meta_copy_emit_tiler_job(struct pan_pool *desc_pool,
151 struct pan_scoreboard *scoreboard,
152 mali_ptr src_coords, mali_ptr dst_coords,
153 mali_ptr texture, mali_ptr sampler,
154 mali_ptr ubo, mali_ptr push_constants,
155 mali_ptr vpd, mali_ptr rsd,
156 mali_ptr tsd, mali_ptr tiler)
157 {
158 struct panfrost_ptr job =
159 pan_pool_alloc_desc(desc_pool, TILER_JOB);
160
161 panvk_meta_copy_emit_dcd(desc_pool, src_coords, dst_coords,
162 texture, sampler, vpd, tsd, rsd, ubo, push_constants,
163 pan_section_ptr(job.cpu, TILER_JOB, DRAW));
164
165 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
166 cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
167 cfg.index_count = 4;
168 cfg.job_task_split = 6;
169 }
170
171 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
172 cfg.constant = 1.0f;
173 }
174
175 void *invoc = pan_section_ptr(job.cpu,
176 TILER_JOB,
177 INVOCATION);
178 panfrost_pack_work_groups_compute(invoc, 1, 4,
179 1, 1, 1, 1, true, false);
180
181 #if PAN_ARCH >= 6
182 pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg);
183 pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) {
184 cfg.address = tiler;
185 }
186 #endif
187
188 panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
189 false, false, 0, 0, &job, false);
190 return job;
191 }
192
193 static struct panfrost_ptr
panvk_meta_copy_emit_compute_job(struct pan_pool * desc_pool,struct pan_scoreboard * scoreboard,const struct pan_compute_dim * num_wg,const struct pan_compute_dim * wg_sz,mali_ptr texture,mali_ptr sampler,mali_ptr ubo,mali_ptr push_constants,mali_ptr rsd,mali_ptr tsd)194 panvk_meta_copy_emit_compute_job(struct pan_pool *desc_pool,
195 struct pan_scoreboard *scoreboard,
196 const struct pan_compute_dim *num_wg,
197 const struct pan_compute_dim *wg_sz,
198 mali_ptr texture, mali_ptr sampler,
199 mali_ptr ubo, mali_ptr push_constants,
200 mali_ptr rsd, mali_ptr tsd)
201 {
202 struct panfrost_ptr job =
203 pan_pool_alloc_desc(desc_pool, COMPUTE_JOB);
204
205 void *invoc = pan_section_ptr(job.cpu,
206 COMPUTE_JOB,
207 INVOCATION);
208 panfrost_pack_work_groups_compute(invoc, num_wg->x, num_wg->y, num_wg->z,
209 wg_sz->x, wg_sz->y, wg_sz->z,
210 false, false);
211
212 pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
213 cfg.job_task_split = 8;
214 }
215
216 panvk_meta_copy_emit_dcd(desc_pool, 0, 0, texture, sampler,
217 0, tsd, rsd, ubo, push_constants,
218 pan_section_ptr(job.cpu, COMPUTE_JOB, DRAW));
219
220 panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_COMPUTE,
221 false, false, 0, 0, &job, false);
222 return job;
223 }
224
225
226 #if PAN_ARCH >= 6
227 static uint32_t
panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize)228 panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize)
229 {
230 switch (texelsize) {
231 case 6: return MALI_RGB16UI << 12;
232 case 8: return MALI_RG32UI << 12;
233 case 12: return MALI_RGB32UI << 12;
234 case 16: return MALI_RGBA32UI << 12;
235 default: unreachable("Invalid texel size\n");
236 }
237 }
238 #endif
239
240 static mali_ptr
panvk_meta_copy_to_img_emit_rsd(struct panfrost_device * pdev,struct pan_pool * desc_pool,mali_ptr shader,const struct pan_shader_info * shader_info,enum pipe_format fmt,unsigned wrmask,bool from_img)241 panvk_meta_copy_to_img_emit_rsd(struct panfrost_device *pdev,
242 struct pan_pool *desc_pool,
243 mali_ptr shader,
244 const struct pan_shader_info *shader_info,
245 enum pipe_format fmt, unsigned wrmask,
246 bool from_img)
247 {
248 struct panfrost_ptr rsd_ptr =
249 pan_pool_alloc_desc_aggregate(desc_pool,
250 PAN_DESC(RENDERER_STATE),
251 PAN_DESC_ARRAY(1, BLEND));
252
253 bool raw = util_format_get_blocksize(fmt) > 4;
254 unsigned fullmask = (1 << util_format_get_nr_components(fmt)) - 1;
255 bool partialwrite = fullmask != wrmask && !raw;
256 bool readstb = fullmask != wrmask && raw;
257
258 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
259 pan_shader_prepare_rsd(shader_info, shader, &cfg);
260 if (from_img) {
261 cfg.shader.varying_count = 1;
262 cfg.shader.texture_count = 1;
263 cfg.shader.sampler_count = 1;
264 }
265 cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
266 cfg.multisample_misc.sample_mask = UINT16_MAX;
267 cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
268 cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
269 cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
270 cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
271 cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
272 cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
273 cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
274 cfg.stencil_front.mask = 0xFF;
275 cfg.stencil_back = cfg.stencil_front;
276
277 #if PAN_ARCH >= 6
278 cfg.properties.allow_forward_pixel_to_be_killed = true;
279 cfg.properties.allow_forward_pixel_to_kill =
280 !partialwrite && !readstb;
281 cfg.properties.zs_update_operation =
282 MALI_PIXEL_KILL_STRONG_EARLY;
283 cfg.properties.pixel_kill_operation =
284 MALI_PIXEL_KILL_FORCE_EARLY;
285 #else
286 cfg.properties.shader_reads_tilebuffer = readstb;
287 cfg.properties.work_register_count = shader_info->work_reg_count;
288 cfg.properties.force_early_z = true;
289 cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
290 #endif
291 }
292
293 pan_pack(rsd_ptr.cpu + pan_size(RENDERER_STATE), BLEND, cfg) {
294 cfg.round_to_fb_precision = true;
295 cfg.load_destination = partialwrite;
296 cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
297 cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
298 cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
299 cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
300 cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
301 cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
302 #if PAN_ARCH >= 6
303 cfg.internal.mode =
304 partialwrite ?
305 MALI_BLEND_MODE_FIXED_FUNCTION :
306 MALI_BLEND_MODE_OPAQUE;
307 cfg.equation.color_mask = partialwrite ? wrmask : 0xf;
308 cfg.internal.fixed_function.num_comps = 4;
309 if (!raw) {
310 cfg.internal.fixed_function.conversion.memory_format =
311 panfrost_format_to_bifrost_blend(pdev, fmt, false);
312 cfg.internal.fixed_function.conversion.register_format =
313 MALI_REGISTER_FILE_FORMAT_F32;
314 } else {
315 unsigned imgtexelsz = util_format_get_blocksize(fmt);
316
317 cfg.internal.fixed_function.conversion.memory_format =
318 panvk_meta_copy_img_bifrost_raw_format(imgtexelsz);
319 cfg.internal.fixed_function.conversion.register_format =
320 (imgtexelsz & 2) ?
321 MALI_REGISTER_FILE_FORMAT_U16 :
322 MALI_REGISTER_FILE_FORMAT_U32;
323 }
324 #else
325 cfg.equation.color_mask = wrmask;
326 #endif
327 }
328
329 return rsd_ptr.gpu;
330 }
331
332 static mali_ptr
panvk_meta_copy_emit_ubo(struct panfrost_device * pdev,struct pan_pool * pool,void * data,unsigned size)333 panvk_meta_copy_emit_ubo(struct panfrost_device *pdev,
334 struct pan_pool *pool,
335 void *data, unsigned size)
336 {
337 struct panfrost_ptr ubo = pan_pool_alloc_desc(pool, UNIFORM_BUFFER);
338
339 pan_pack(ubo.cpu, UNIFORM_BUFFER, cfg) {
340 cfg.entries = DIV_ROUND_UP(size, 16);
341 cfg.pointer = pan_pool_upload_aligned(pool, data, size, 16);
342 }
343
344 return ubo.gpu;
345 }
346
347 static mali_ptr
panvk_meta_copy_emit_push_constants(struct panfrost_device * pdev,const struct panfrost_ubo_push * pushmap,struct pan_pool * pool,const void * data,unsigned size)348 panvk_meta_copy_emit_push_constants(struct panfrost_device *pdev,
349 const struct panfrost_ubo_push *pushmap,
350 struct pan_pool *pool,
351 const void *data, unsigned size)
352 {
353 assert(pushmap->count <= (size / 4));
354
355 const uint32_t *in = data;
356 uint32_t pushvals[PAN_MAX_PUSH];
357
358 for (unsigned i = 0; i < pushmap->count; i++) {
359 assert(i < ARRAY_SIZE(pushvals));
360 assert(pushmap->words[i].ubo == 0);
361 assert(pushmap->words[i].offset < size);
362 pushvals[i] = in[pushmap->words[i].offset / 4];
363 }
364
365 return pan_pool_upload_aligned(pool, pushvals, size, 16);
366 }
367
368 static mali_ptr
panvk_meta_copy_to_buf_emit_rsd(struct panfrost_device * pdev,struct pan_pool * desc_pool,mali_ptr shader,const struct pan_shader_info * shader_info,bool from_img)369 panvk_meta_copy_to_buf_emit_rsd(struct panfrost_device *pdev,
370 struct pan_pool *desc_pool,
371 mali_ptr shader,
372 const struct pan_shader_info *shader_info,
373 bool from_img)
374 {
375 struct panfrost_ptr rsd_ptr =
376 pan_pool_alloc_desc_aggregate(desc_pool,
377 PAN_DESC(RENDERER_STATE));
378
379 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
380 pan_shader_prepare_rsd(shader_info, shader, &cfg);
381 if (from_img) {
382 cfg.shader.texture_count = 1;
383 cfg.shader.sampler_count = 1;
384 }
385 }
386
387 return rsd_ptr.gpu;
388 }
389
390 static mali_ptr
panvk_meta_copy_img2img_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,enum pipe_format srcfmt,enum pipe_format dstfmt,unsigned dstmask,unsigned texdim,bool texisarray,bool is_ms,struct pan_shader_info * shader_info)391 panvk_meta_copy_img2img_shader(struct panfrost_device *pdev,
392 struct pan_pool *bin_pool,
393 enum pipe_format srcfmt,
394 enum pipe_format dstfmt, unsigned dstmask,
395 unsigned texdim, bool texisarray, bool is_ms,
396 struct pan_shader_info *shader_info)
397 {
398 nir_builder b =
399 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
400 GENX(pan_shader_get_compiler_options)(),
401 "panvk_meta_copy_img2img(srcfmt=%s,dstfmt=%s,%dD%s%s)",
402 util_format_name(srcfmt), util_format_name(dstfmt),
403 texdim, texisarray ? "[]" : "", is_ms ? ",ms" : "");
404
405 b.shader->info.internal = true;
406
407 nir_variable *coord_var =
408 nir_variable_create(b.shader, nir_var_shader_in,
409 glsl_vector_type(GLSL_TYPE_FLOAT, texdim + texisarray),
410 "coord");
411 coord_var->data.location = VARYING_SLOT_TEX0;
412 nir_ssa_def *coord = nir_f2u32(&b, nir_load_var(&b, coord_var));
413
414 nir_tex_instr *tex = nir_tex_instr_create(b.shader, is_ms ? 2 : 1);
415 tex->op = is_ms ? nir_texop_txf_ms : nir_texop_txf;
416 tex->texture_index = 0;
417 tex->is_array = texisarray;
418 tex->dest_type = util_format_is_unorm(srcfmt) ?
419 nir_type_float32 : nir_type_uint32;
420
421 switch (texdim) {
422 case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break;
423 case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break;
424 case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break;
425 default: unreachable("Invalid texture dimension");
426 }
427
428 tex->src[0].src_type = nir_tex_src_coord;
429 tex->src[0].src = nir_src_for_ssa(coord);
430 tex->coord_components = texdim + texisarray;
431
432 if (is_ms) {
433 tex->src[1].src_type = nir_tex_src_ms_index;
434 tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
435 }
436
437 nir_ssa_dest_init(&tex->instr, &tex->dest, 4,
438 nir_alu_type_get_type_size(tex->dest_type), NULL);
439 nir_builder_instr_insert(&b, &tex->instr);
440
441 nir_ssa_def *texel = &tex->dest.ssa;
442
443 unsigned dstcompsz =
444 util_format_get_component_bits(dstfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
445 unsigned ndstcomps = util_format_get_nr_components(dstfmt);
446 const struct glsl_type *outtype = NULL;
447
448 if (srcfmt == PIPE_FORMAT_R5G6B5_UNORM && dstfmt == PIPE_FORMAT_R8G8_UNORM) {
449 nir_ssa_def *rgb =
450 nir_f2u32(&b, nir_fmul(&b, texel,
451 nir_vec3(&b,
452 nir_imm_float(&b, 31),
453 nir_imm_float(&b, 63),
454 nir_imm_float(&b, 31))));
455 nir_ssa_def *rg =
456 nir_vec2(&b,
457 nir_ior(&b, nir_channel(&b, rgb, 0),
458 nir_ishl(&b, nir_channel(&b, rgb, 1),
459 nir_imm_int(&b, 5))),
460 nir_ior(&b,
461 nir_ushr_imm(&b, nir_channel(&b, rgb, 1), 3),
462 nir_ishl(&b, nir_channel(&b, rgb, 2),
463 nir_imm_int(&b, 3))));
464 rg = nir_iand_imm(&b, rg, 255);
465 texel = nir_fmul_imm(&b, nir_u2f32(&b, rg), 1.0 / 255);
466 outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
467 } else if (srcfmt == PIPE_FORMAT_R8G8_UNORM && dstfmt == PIPE_FORMAT_R5G6B5_UNORM) {
468 nir_ssa_def *rg = nir_f2u32(&b, nir_fmul_imm(&b, texel, 255));
469 nir_ssa_def *rgb =
470 nir_vec3(&b,
471 nir_channel(&b, rg, 0),
472 nir_ior(&b,
473 nir_ushr_imm(&b, nir_channel(&b, rg, 0), 5),
474 nir_ishl(&b, nir_channel(&b, rg, 1),
475 nir_imm_int(&b, 3))),
476 nir_ushr_imm(&b, nir_channel(&b, rg, 1), 3));
477 rgb = nir_iand(&b, rgb,
478 nir_vec3(&b,
479 nir_imm_int(&b, 31),
480 nir_imm_int(&b, 63),
481 nir_imm_int(&b, 31)));
482 texel = nir_fmul(&b, nir_u2f32(&b, rgb),
483 nir_vec3(&b,
484 nir_imm_float(&b, 1.0 / 31),
485 nir_imm_float(&b, 1.0 / 63),
486 nir_imm_float(&b, 1.0 / 31)));
487 outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
488 } else {
489 assert(srcfmt == dstfmt);
490 enum glsl_base_type basetype;
491 if (util_format_is_unorm(dstfmt)) {
492 basetype = GLSL_TYPE_FLOAT;
493 } else if (dstcompsz == 16) {
494 basetype = GLSL_TYPE_UINT16;
495 } else {
496 assert(dstcompsz == 32);
497 basetype = GLSL_TYPE_UINT;
498 }
499
500 if (dstcompsz == 16)
501 texel = nir_u2u16(&b, texel);
502
503 texel = nir_channels(&b, texel, (1 << ndstcomps) - 1);
504 outtype = glsl_vector_type(basetype, ndstcomps);
505 }
506
507 nir_variable *out =
508 nir_variable_create(b.shader, nir_var_shader_out, outtype, "out");
509 out->data.location = FRAG_RESULT_DATA0;
510
511 unsigned fullmask = (1 << ndstcomps) - 1;
512 if (dstcompsz > 8 && dstmask != fullmask) {
513 nir_ssa_def *oldtexel = nir_load_var(&b, out);
514 nir_ssa_def *dstcomps[4];
515
516 for (unsigned i = 0; i < ndstcomps; i++) {
517 if (dstmask & BITFIELD_BIT(i))
518 dstcomps[i] = nir_channel(&b, texel, i);
519 else
520 dstcomps[i] = nir_channel(&b, oldtexel, i);
521 }
522
523 texel = nir_vec(&b, dstcomps, ndstcomps);
524 }
525
526 nir_store_var(&b, out, texel, 0xff);
527
528 struct panfrost_compile_inputs inputs = {
529 .gpu_id = pdev->gpu_id,
530 .is_blit = true,
531 };
532
533 #if PAN_ARCH >= 6
534 pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) {
535 cfg.memory_format = (dstcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12;
536 cfg.register_format = dstcompsz == 2 ?
537 MALI_REGISTER_FILE_FORMAT_U16 :
538 MALI_REGISTER_FILE_FORMAT_U32;
539 }
540 inputs.bifrost.static_rt_conv = true;
541 #endif
542
543 struct util_dynarray binary;
544
545 util_dynarray_init(&binary, NULL);
546 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
547
548 shader_info->fs.sample_shading = is_ms;
549
550 mali_ptr shader =
551 pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
552 PAN_ARCH >= 6 ? 128 : 64);
553
554 util_dynarray_fini(&binary);
555 ralloc_free(b.shader);
556
557 return shader;
558 }
559
560 static enum pipe_format
panvk_meta_copy_img_format(enum pipe_format fmt)561 panvk_meta_copy_img_format(enum pipe_format fmt)
562 {
563 /* We can't use a non-compressed format when handling a tiled/AFBC
564 * compressed format because the tile size differ (4x4 blocks for
565 * compressed formats and 16x16 texels for non-compressed ones).
566 */
567 assert(!util_format_is_compressed(fmt));
568
569 /* Pick blendable formats when we can, otherwise pick the UINT variant
570 * matching the texel size.
571 */
572 switch (util_format_get_blocksize(fmt)) {
573 case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
574 case 12: return PIPE_FORMAT_R32G32B32_UINT;
575 case 8: return PIPE_FORMAT_R32G32_UINT;
576 case 6: return PIPE_FORMAT_R16G16B16_UINT;
577 case 4: return PIPE_FORMAT_R8G8B8A8_UNORM;
578 case 2: return (fmt == PIPE_FORMAT_R5G6B5_UNORM ||
579 fmt == PIPE_FORMAT_B5G6R5_UNORM) ?
580 PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM;
581 case 1: return PIPE_FORMAT_R8_UNORM;
582 default: unreachable("Unsupported format\n");
583 }
584 }
585
586 struct panvk_meta_copy_img2img_format_info {
587 enum pipe_format srcfmt;
588 enum pipe_format dstfmt;
589 unsigned dstmask;
590 };
591
592 static const struct panvk_meta_copy_img2img_format_info panvk_meta_copy_img2img_fmts[] = {
593 { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R8_UNORM, 0x1},
594 { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
595 { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
596 { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
597 { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
598 /* Z24S8(depth) */
599 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 },
600 /* Z24S8(stencil) */
601 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 },
602 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0xf },
603 { PIPE_FORMAT_R16G16B16_UINT, PIPE_FORMAT_R16G16B16_UINT, 0x7 },
604 { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x3 },
605 /* Z32S8X24(depth) */
606 { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x1 },
607 /* Z32S8X24(stencil) */
608 { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x2 },
609 { PIPE_FORMAT_R32G32B32_UINT, PIPE_FORMAT_R32G32B32_UINT, 0x7 },
610 { PIPE_FORMAT_R32G32B32A32_UINT, PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
611 };
612
613 static unsigned
panvk_meta_copy_img2img_format_idx(struct panvk_meta_copy_img2img_format_info key)614 panvk_meta_copy_img2img_format_idx(struct panvk_meta_copy_img2img_format_info key)
615 {
616 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
617
618 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
619 if (!memcmp(&key, &panvk_meta_copy_img2img_fmts[i], sizeof(key)))
620 return i;
621 }
622
623 unreachable("Invalid image format\n");
624 }
625
626 static unsigned
panvk_meta_copy_img_mask(enum pipe_format imgfmt,VkImageAspectFlags aspectMask)627 panvk_meta_copy_img_mask(enum pipe_format imgfmt, VkImageAspectFlags aspectMask)
628 {
629 if (aspectMask != VK_IMAGE_ASPECT_DEPTH_BIT &&
630 aspectMask != VK_IMAGE_ASPECT_STENCIL_BIT) {
631 enum pipe_format outfmt = panvk_meta_copy_img_format(imgfmt);
632
633 return (1 << util_format_get_nr_components(outfmt)) - 1;
634 }
635
636 switch (imgfmt) {
637 case PIPE_FORMAT_S8_UINT:
638 return 1;
639 case PIPE_FORMAT_Z16_UNORM:
640 return 3;
641 case PIPE_FORMAT_Z16_UNORM_S8_UINT:
642 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 3 : 8;
643 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
644 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 7 : 8;
645 case PIPE_FORMAT_Z24X8_UNORM:
646 assert(aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT);
647 return 7;
648 case PIPE_FORMAT_Z32_FLOAT:
649 return 0xf;
650 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
651 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 1 : 2;
652 default:
653 unreachable("Invalid depth format\n");
654 }
655 }
656
657 static void
panvk_meta_copy_img2img(struct panvk_cmd_buffer * cmdbuf,const struct panvk_image * src,const struct panvk_image * dst,const VkImageCopy * region)658 panvk_meta_copy_img2img(struct panvk_cmd_buffer *cmdbuf,
659 const struct panvk_image *src,
660 const struct panvk_image *dst,
661 const VkImageCopy *region)
662 {
663 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
664 struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
665 struct panvk_meta_copy_img2img_format_info key = {
666 .srcfmt = panvk_meta_copy_img_format(src->pimage.layout.format),
667 .dstfmt = panvk_meta_copy_img_format(dst->pimage.layout.format),
668 .dstmask = panvk_meta_copy_img_mask(dst->pimage.layout.format,
669 region->dstSubresource.aspectMask),
670 };
671
672 assert(src->pimage.layout.nr_samples == dst->pimage.layout.nr_samples);
673
674 unsigned texdimidx =
675 panvk_meta_copy_tex_type(src->pimage.layout.dim,
676 src->pimage.layout.array_size > 1);
677 unsigned fmtidx =
678 panvk_meta_copy_img2img_format_idx(key);
679 unsigned ms = dst->pimage.layout.nr_samples > 1 ? 1 : 0;
680
681 mali_ptr rsd =
682 cmdbuf->device->physical_device->meta.copy.img2img[ms][texdimidx][fmtidx].rsd;
683
684 struct pan_image_view srcview = {
685 .format = key.srcfmt,
686 .dim = src->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
687 MALI_TEXTURE_DIMENSION_2D : src->pimage.layout.dim,
688 .image = &src->pimage,
689 .nr_samples = src->pimage.layout.nr_samples,
690 .first_level = region->srcSubresource.mipLevel,
691 .last_level = region->srcSubresource.mipLevel,
692 .first_layer = region->srcSubresource.baseArrayLayer,
693 .last_layer = region->srcSubresource.baseArrayLayer + region->srcSubresource.layerCount - 1,
694 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
695 };
696
697 struct pan_image_view dstview = {
698 .format = key.dstfmt,
699 .dim = MALI_TEXTURE_DIMENSION_2D,
700 .image = &dst->pimage,
701 .nr_samples = dst->pimage.layout.nr_samples,
702 .first_level = region->dstSubresource.mipLevel,
703 .last_level = region->dstSubresource.mipLevel,
704 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
705 };
706
707 unsigned minx = MAX2(region->dstOffset.x, 0);
708 unsigned miny = MAX2(region->dstOffset.y, 0);
709 unsigned maxx = MAX2(region->dstOffset.x + region->extent.width - 1, 0);
710 unsigned maxy = MAX2(region->dstOffset.y + region->extent.height - 1, 0);
711
712 mali_ptr vpd =
713 panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
714 minx, miny, maxx, maxy);
715
716 float dst_rect[] = {
717 minx, miny, 0.0, 1.0,
718 maxx + 1, miny, 0.0, 1.0,
719 minx, maxy + 1, 0.0, 1.0,
720 maxx + 1, maxy + 1, 0.0, 1.0,
721 };
722
723 mali_ptr dst_coords =
724 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect,
725 sizeof(dst_rect), 64);
726
727 /* TODO: don't force preloads of dst resources if unneeded */
728
729 unsigned width = u_minify(dst->pimage.layout.width, region->dstSubresource.mipLevel);
730 unsigned height = u_minify(dst->pimage.layout.height, region->dstSubresource.mipLevel);
731 cmdbuf->state.fb.crc_valid[0] = false;
732 *fbinfo = (struct pan_fb_info){
733 .width = width,
734 .height = height,
735 .extent.minx = minx & ~31,
736 .extent.miny = miny & ~31,
737 .extent.maxx = MIN2(ALIGN_POT(maxx + 1, 32), width) - 1,
738 .extent.maxy = MIN2(ALIGN_POT(maxy + 1, 32), height) - 1,
739 .nr_samples = dst->pimage.layout.nr_samples,
740 .rt_count = 1,
741 .rts[0].view = &dstview,
742 .rts[0].preload = true,
743 .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
744 };
745
746 mali_ptr texture =
747 panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &srcview);
748 mali_ptr sampler =
749 panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base);
750
751 panvk_per_arch(cmd_close_batch)(cmdbuf);
752
753 minx = MAX2(region->srcOffset.x, 0);
754 miny = MAX2(region->srcOffset.y, 0);
755 maxx = MAX2(region->srcOffset.x + region->extent.width - 1, 0);
756 maxy = MAX2(region->srcOffset.y + region->extent.height - 1, 0);
757 assert(region->dstOffset.z >= 0);
758
759 unsigned first_src_layer = MAX2(0, region->srcOffset.z);
760 unsigned first_dst_layer = MAX2(region->dstSubresource.baseArrayLayer, region->dstOffset.z);
761 unsigned nlayers = MAX2(region->dstSubresource.layerCount, region->extent.depth);
762 for (unsigned l = 0; l < nlayers; l++) {
763 unsigned src_l = l + first_src_layer;
764 float src_rect[] = {
765 minx, miny, src_l, 1.0,
766 maxx + 1, miny, src_l, 1.0,
767 minx, maxy + 1, src_l, 1.0,
768 maxx + 1, maxy + 1, src_l, 1.0,
769 };
770
771 mali_ptr src_coords =
772 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect,
773 sizeof(src_rect), 64);
774
775 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
776
777 dstview.first_layer = dstview.last_layer = l + first_dst_layer;
778 batch->blit.src = src->pimage.data.bo;
779 batch->blit.dst = dst->pimage.data.bo;
780 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
781 panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
782 panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
783
784 mali_ptr tsd, tiler;
785
786 #if PAN_ARCH >= 6
787 tsd = batch->tls.gpu;
788 tiler = batch->tiler.descs.gpu;
789 #else
790 tsd = batch->fb.desc.gpu;
791 tiler = 0;
792 #endif
793
794 struct panfrost_ptr job;
795
796 job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base,
797 &batch->scoreboard,
798 src_coords, dst_coords,
799 texture, sampler, 0, 0,
800 vpd, rsd, tsd, tiler);
801
802 util_dynarray_append(&batch->jobs, void *, job.cpu);
803 panvk_per_arch(cmd_close_batch)(cmdbuf);
804 }
805 }
806
807 static void
panvk_meta_copy_img2img_init(struct panvk_physical_device * dev,bool is_ms)808 panvk_meta_copy_img2img_init(struct panvk_physical_device *dev, bool is_ms)
809 {
810 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
811
812 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
813 for (unsigned texdim = 1; texdim <= 3; texdim++) {
814 unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
815 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
816
817 /* No MSAA on 3D textures */
818 if (texdim == 3 && is_ms) continue;
819
820 struct pan_shader_info shader_info;
821 mali_ptr shader =
822 panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
823 panvk_meta_copy_img2img_fmts[i].srcfmt,
824 panvk_meta_copy_img2img_fmts[i].dstfmt,
825 panvk_meta_copy_img2img_fmts[i].dstmask,
826 texdim, false, is_ms, &shader_info);
827 dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
828 panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
829 shader, &shader_info,
830 panvk_meta_copy_img2img_fmts[i].dstfmt,
831 panvk_meta_copy_img2img_fmts[i].dstmask,
832 true);
833 if (texdim == 3)
834 continue;
835
836 memset(&shader_info, 0, sizeof(shader_info));
837 texdimidx = panvk_meta_copy_tex_type(texdim, true);
838 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
839 shader =
840 panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
841 panvk_meta_copy_img2img_fmts[i].srcfmt,
842 panvk_meta_copy_img2img_fmts[i].dstfmt,
843 panvk_meta_copy_img2img_fmts[i].dstmask,
844 texdim, true, is_ms, &shader_info);
845 dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
846 panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
847 shader, &shader_info,
848 panvk_meta_copy_img2img_fmts[i].dstfmt,
849 panvk_meta_copy_img2img_fmts[i].dstmask,
850 true);
851 }
852 }
853 }
854
855 void
panvk_per_arch(CmdCopyImage)856 panvk_per_arch(CmdCopyImage)(VkCommandBuffer commandBuffer,
857 VkImage srcImage,
858 VkImageLayout srcImageLayout,
859 VkImage destImage,
860 VkImageLayout destImageLayout,
861 uint32_t regionCount,
862 const VkImageCopy *pRegions)
863 {
864 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
865 VK_FROM_HANDLE(panvk_image, dst, destImage);
866 VK_FROM_HANDLE(panvk_image, src, srcImage);
867
868 for (unsigned i = 0; i < regionCount; i++) {
869 panvk_meta_copy_img2img(cmdbuf, src, dst, &pRegions[i]);
870 }
871 }
872
873 static unsigned
panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt,unsigned mask)874 panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt, unsigned mask)
875 {
876 unsigned imgtexelsz = util_format_get_blocksize(imgfmt);
877 unsigned nbufcomps = util_bitcount(mask);
878
879 if (nbufcomps == util_format_get_nr_components(imgfmt))
880 return imgtexelsz;
881
882 /* Special case for Z24 buffers which are not tightly packed */
883 if (mask == 7 && imgtexelsz == 4)
884 return 4;
885
886 /* Special case for S8 extraction from Z32_S8X24 */
887 if (mask == 2 && imgtexelsz == 8)
888 return 1;
889
890 unsigned compsz =
891 util_format_get_component_bits(imgfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
892
893 assert(!(compsz % 8));
894
895 return nbufcomps * compsz / 8;
896 }
897
898 static enum pipe_format
panvk_meta_copy_buf2img_format(enum pipe_format imgfmt)899 panvk_meta_copy_buf2img_format(enum pipe_format imgfmt)
900 {
901 /* Pick blendable formats when we can, and the FLOAT variant matching the
902 * texelsize otherwise.
903 */
904 switch (util_format_get_blocksize(imgfmt)) {
905 case 1: return PIPE_FORMAT_R8_UNORM;
906 /* AFBC stores things differently for RGB565,
907 * we can't simply map to R8G8 in that case */
908 case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
909 imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ?
910 PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM;
911 case 4: return PIPE_FORMAT_R8G8B8A8_UNORM;
912 case 6: return PIPE_FORMAT_R16G16B16_UINT;
913 case 8: return PIPE_FORMAT_R32G32_UINT;
914 case 12: return PIPE_FORMAT_R32G32B32_UINT;
915 case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
916 default: unreachable("Invalid format\n");
917 }
918 }
919
920 struct panvk_meta_copy_format_info {
921 enum pipe_format imgfmt;
922 unsigned mask;
923 };
924
925 static const struct panvk_meta_copy_format_info panvk_meta_copy_buf2img_fmts[] = {
926 { PIPE_FORMAT_R8_UNORM, 0x1 },
927 { PIPE_FORMAT_R8G8_UNORM, 0x3 },
928 { PIPE_FORMAT_R5G6B5_UNORM, 0x7 },
929 { PIPE_FORMAT_R8G8B8A8_UNORM, 0xf },
930 { PIPE_FORMAT_R16G16B16_UINT, 0x7 },
931 { PIPE_FORMAT_R32G32_UINT, 0x3 },
932 { PIPE_FORMAT_R32G32B32_UINT, 0x7 },
933 { PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
934 /* S8 -> Z24S8 */
935 { PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 },
936 /* S8 -> Z32_S8X24 */
937 { PIPE_FORMAT_R32G32_UINT, 0x2 },
938 /* Z24X8 -> Z24S8 */
939 { PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 },
940 /* Z32 -> Z32_S8X24 */
941 { PIPE_FORMAT_R32G32_UINT, 0x1 },
942 };
943
944 struct panvk_meta_copy_buf2img_info {
945 struct {
946 mali_ptr ptr;
947 struct {
948 unsigned line;
949 unsigned surf;
950 } stride;
951 } buf;
952 };
953
954 #define panvk_meta_copy_buf2img_get_info_field(b, field) \
955 nir_load_ubo((b), 1, \
956 sizeof(((struct panvk_meta_copy_buf2img_info *)0)->field) * 8, \
957 nir_imm_int(b, 0), \
958 nir_imm_int(b, offsetof(struct panvk_meta_copy_buf2img_info, field)), \
959 .align_mul = 4, \
960 .align_offset = 0, \
961 .range_base = 0, \
962 .range = ~0)
963
964 static mali_ptr
panvk_meta_copy_buf2img_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,struct panvk_meta_copy_format_info key,struct pan_shader_info * shader_info)965 panvk_meta_copy_buf2img_shader(struct panfrost_device *pdev,
966 struct pan_pool *bin_pool,
967 struct panvk_meta_copy_format_info key,
968 struct pan_shader_info *shader_info)
969 {
970 nir_builder b =
971 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
972 GENX(pan_shader_get_compiler_options)(),
973 "panvk_meta_copy_buf2img(imgfmt=%s,mask=%x)",
974 util_format_name(key.imgfmt),
975 key.mask);
976
977 b.shader->info.internal = true;
978 b.shader->info.num_ubos = 1;
979
980 nir_variable *coord_var =
981 nir_variable_create(b.shader, nir_var_shader_in,
982 glsl_vector_type(GLSL_TYPE_FLOAT, 3),
983 "coord");
984 coord_var->data.location = VARYING_SLOT_TEX0;
985 nir_ssa_def *coord = nir_load_var(&b, coord_var);
986
987 coord = nir_f2u32(&b, coord);
988
989 nir_ssa_def *bufptr =
990 panvk_meta_copy_buf2img_get_info_field(&b, buf.ptr);
991 nir_ssa_def *buflinestride =
992 panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.line);
993 nir_ssa_def *bufsurfstride =
994 panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.surf);
995
996 unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
997 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
998 unsigned writemask = key.mask;
999
1000 nir_ssa_def *offset =
1001 nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
1002 offset = nir_iadd(&b, offset,
1003 nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
1004 offset = nir_iadd(&b, offset,
1005 nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
1006 bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
1007
1008 unsigned imgcompsz =
1009 (imgtexelsz <= 4 && key.imgfmt != PIPE_FORMAT_R5G6B5_UNORM) ?
1010 1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
1011
1012 unsigned nimgcomps = imgtexelsz / imgcompsz;
1013 unsigned bufcompsz = MIN2(buftexelsz, imgcompsz);
1014 unsigned nbufcomps = buftexelsz / bufcompsz;
1015
1016 assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
1017 assert(nbufcomps <= 4 && nimgcomps <= 4);
1018
1019 nir_ssa_def *texel =
1020 nir_load_global(&b, bufptr, bufcompsz, nbufcomps, bufcompsz * 8);
1021
1022 enum glsl_base_type basetype;
1023 if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
1024 texel = nir_vec3(&b,
1025 nir_iand_imm(&b, texel, BITFIELD_MASK(5)),
1026 nir_iand_imm(&b, nir_ushr_imm(&b, texel, 5), BITFIELD_MASK(6)),
1027 nir_iand_imm(&b, nir_ushr_imm(&b, texel, 11), BITFIELD_MASK(5)));
1028 texel = nir_fmul(&b,
1029 nir_u2f32(&b, texel),
1030 nir_vec3(&b,
1031 nir_imm_float(&b, 1.0f / 31),
1032 nir_imm_float(&b, 1.0f / 63),
1033 nir_imm_float(&b, 1.0f / 31)));
1034 nimgcomps = 3;
1035 basetype = GLSL_TYPE_FLOAT;
1036 } else if (imgcompsz == 1) {
1037 assert(bufcompsz == 1);
1038 /* Blendable formats are unorm and the fixed-function blend unit
1039 * takes float values.
1040 */
1041 texel = nir_fmul(&b, nir_u2f32(&b, texel),
1042 nir_imm_float(&b, 1.0f / 255));
1043 basetype = GLSL_TYPE_FLOAT;
1044 } else {
1045 texel = nir_u2uN(&b, texel, imgcompsz * 8);
1046 basetype = imgcompsz == 2 ? GLSL_TYPE_UINT16 : GLSL_TYPE_UINT;
1047 }
1048
1049 /* We always pass the texel using 32-bit regs for now */
1050 nir_variable *out =
1051 nir_variable_create(b.shader, nir_var_shader_out,
1052 glsl_vector_type(basetype, nimgcomps),
1053 "out");
1054 out->data.location = FRAG_RESULT_DATA0;
1055
1056 uint16_t fullmask = (1 << nimgcomps) - 1;
1057
1058 assert(fullmask >= writemask);
1059
1060 if (fullmask != writemask) {
1061 unsigned first_written_comp = ffs(writemask) - 1;
1062 nir_ssa_def *oldtexel = NULL;
1063 if (imgcompsz > 1)
1064 oldtexel = nir_load_var(&b, out);
1065
1066 nir_ssa_def *texel_comps[4];
1067 for (unsigned i = 0; i < nimgcomps; i++) {
1068 if (writemask & BITFIELD_BIT(i))
1069 texel_comps[i] = nir_channel(&b, texel, i - first_written_comp);
1070 else if (imgcompsz > 1)
1071 texel_comps[i] = nir_channel(&b, oldtexel, i);
1072 else
1073 texel_comps[i] = nir_imm_intN_t(&b, 0, texel->bit_size);
1074 }
1075
1076 texel = nir_vec(&b, texel_comps, nimgcomps);
1077 }
1078
1079 nir_store_var(&b, out, texel, 0xff);
1080
1081 struct panfrost_compile_inputs inputs = {
1082 .gpu_id = pdev->gpu_id,
1083 .is_blit = true,
1084 };
1085
1086 #if PAN_ARCH >= 6
1087 pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) {
1088 cfg.memory_format = (imgcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12;
1089 cfg.register_format = imgcompsz == 2 ?
1090 MALI_REGISTER_FILE_FORMAT_U16 :
1091 MALI_REGISTER_FILE_FORMAT_U32;
1092 }
1093 inputs.bifrost.static_rt_conv = true;
1094 #endif
1095
1096 struct util_dynarray binary;
1097
1098 util_dynarray_init(&binary, NULL);
1099 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1100
1101 /* Make sure UBO words have been upgraded to push constants */
1102 assert(shader_info->ubo_count == 1);
1103
1104 mali_ptr shader =
1105 pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
1106 PAN_ARCH >= 6 ? 128 : 64);
1107
1108 util_dynarray_fini(&binary);
1109 ralloc_free(b.shader);
1110
1111 return shader;
1112 }
1113
1114 static unsigned
panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key)1115 panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key)
1116 {
1117 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
1118 if (!memcmp(&key, &panvk_meta_copy_buf2img_fmts[i], sizeof(key)))
1119 return i;
1120 }
1121
1122 unreachable("Invalid image format\n");
1123 }
1124
1125 static void
panvk_meta_copy_buf2img(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * buf,const struct panvk_image * img,const VkBufferImageCopy * region)1126 panvk_meta_copy_buf2img(struct panvk_cmd_buffer *cmdbuf,
1127 const struct panvk_buffer *buf,
1128 const struct panvk_image *img,
1129 const VkBufferImageCopy *region)
1130 {
1131 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
1132 struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
1133 unsigned minx = MAX2(region->imageOffset.x, 0);
1134 unsigned miny = MAX2(region->imageOffset.y, 0);
1135 unsigned maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0);
1136 unsigned maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
1137
1138 mali_ptr vpd =
1139 panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
1140 minx, miny, maxx, maxy);
1141
1142 float dst_rect[] = {
1143 minx, miny, 0.0, 1.0,
1144 maxx + 1, miny, 0.0, 1.0,
1145 minx, maxy + 1, 0.0, 1.0,
1146 maxx + 1, maxy + 1, 0.0, 1.0,
1147 };
1148 mali_ptr dst_coords =
1149 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect,
1150 sizeof(dst_rect), 64);
1151
1152 struct panvk_meta_copy_format_info key = {
1153 .imgfmt = panvk_meta_copy_buf2img_format(img->pimage.layout.format),
1154 .mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
1155 region->imageSubresource.aspectMask),
1156 };
1157
1158 unsigned fmtidx = panvk_meta_copy_buf2img_format_idx(key);
1159
1160 mali_ptr rsd =
1161 cmdbuf->device->physical_device->meta.copy.buf2img[fmtidx].rsd;
1162 const struct panfrost_ubo_push *pushmap =
1163 &cmdbuf->device->physical_device->meta.copy.buf2img[fmtidx].pushmap;
1164
1165 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1166 struct panvk_meta_copy_buf2img_info info = {
1167 .buf.ptr = buf->bo->ptr.gpu + buf->bo_offset + region->bufferOffset,
1168 .buf.stride.line = (region->bufferRowLength ? : region->imageExtent.width) * buftexelsz,
1169 };
1170
1171 info.buf.stride.surf =
1172 (region->bufferImageHeight ? : region->imageExtent.height) * info.buf.stride.line;
1173
1174 mali_ptr pushconsts =
1175 panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base,
1176 &info, sizeof(info));
1177 mali_ptr ubo =
1178 panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info));
1179
1180 struct pan_image_view view = {
1181 .format = key.imgfmt,
1182 .dim = MALI_TEXTURE_DIMENSION_2D,
1183 .image = &img->pimage,
1184 .nr_samples = img->pimage.layout.nr_samples,
1185 .first_level = region->imageSubresource.mipLevel,
1186 .last_level = region->imageSubresource.mipLevel,
1187 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
1188 };
1189
1190 /* TODO: don't force preloads of dst resources if unneeded */
1191 cmdbuf->state.fb.crc_valid[0] = false;
1192 *fbinfo = (struct pan_fb_info){
1193 .width = u_minify(img->pimage.layout.width, region->imageSubresource.mipLevel),
1194 .height = u_minify(img->pimage.layout.height, region->imageSubresource.mipLevel),
1195 .extent.minx = minx,
1196 .extent.maxx = maxx,
1197 .extent.miny = miny,
1198 .extent.maxy = maxy,
1199 .nr_samples = 1,
1200 .rt_count = 1,
1201 .rts[0].view = &view,
1202 .rts[0].preload = true,
1203 .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
1204 };
1205
1206 panvk_per_arch(cmd_close_batch)(cmdbuf);
1207
1208 assert(region->imageSubresource.layerCount == 1 ||
1209 region->imageExtent.depth == 1);
1210 assert(region->imageOffset.z >= 0);
1211 unsigned first_layer = MAX2(region->imageSubresource.baseArrayLayer, region->imageOffset.z);
1212 unsigned nlayers = MAX2(region->imageSubresource.layerCount, region->imageExtent.depth);
1213 for (unsigned l = 0; l < nlayers; l++) {
1214 float src_rect[] = {
1215 0, 0, l, 1.0,
1216 region->imageExtent.width, 0, l, 1.0,
1217 0, region->imageExtent.height, l, 1.0,
1218 region->imageExtent.width, region->imageExtent.height, l, 1.0,
1219 };
1220
1221 mali_ptr src_coords =
1222 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect,
1223 sizeof(src_rect), 64);
1224
1225 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1226
1227 view.first_layer = view.last_layer = l + first_layer;
1228 batch->blit.src = buf->bo;
1229 batch->blit.dst = img->pimage.data.bo;
1230 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
1231 panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
1232 panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
1233
1234 mali_ptr tsd, tiler;
1235
1236 #if PAN_ARCH >= 6
1237 tsd = batch->tls.gpu;
1238 tiler = batch->tiler.descs.gpu;
1239 #else
1240 tsd = batch->fb.desc.gpu;
1241 tiler = 0;
1242 #endif
1243
1244 struct panfrost_ptr job;
1245
1246 job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base,
1247 &batch->scoreboard,
1248 src_coords, dst_coords,
1249 0, 0, ubo, pushconsts,
1250 vpd, rsd, tsd, tiler);
1251
1252 util_dynarray_append(&batch->jobs, void *, job.cpu);
1253 panvk_per_arch(cmd_close_batch)(cmdbuf);
1254 }
1255 }
1256
1257 static void
panvk_meta_copy_buf2img_init(struct panvk_physical_device * dev)1258 panvk_meta_copy_buf2img_init(struct panvk_physical_device *dev)
1259 {
1260 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_buf2img_fmts) == PANVK_META_COPY_BUF2IMG_NUM_FORMATS);
1261
1262 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
1263 struct pan_shader_info shader_info;
1264 mali_ptr shader =
1265 panvk_meta_copy_buf2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
1266 panvk_meta_copy_buf2img_fmts[i],
1267 &shader_info);
1268 dev->meta.copy.buf2img[i].pushmap = shader_info.push;
1269 dev->meta.copy.buf2img[i].rsd =
1270 panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
1271 shader, &shader_info,
1272 panvk_meta_copy_buf2img_fmts[i].imgfmt,
1273 panvk_meta_copy_buf2img_fmts[i].mask,
1274 false);
1275 }
1276 }
1277
1278 void
panvk_per_arch(CmdCopyBufferToImage)1279 panvk_per_arch(CmdCopyBufferToImage)(VkCommandBuffer commandBuffer,
1280 VkBuffer srcBuffer,
1281 VkImage destImage,
1282 VkImageLayout destImageLayout,
1283 uint32_t regionCount,
1284 const VkBufferImageCopy *pRegions)
1285 {
1286 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1287 VK_FROM_HANDLE(panvk_buffer, buf, srcBuffer);
1288 VK_FROM_HANDLE(panvk_image, img, destImage);
1289
1290 for (unsigned i = 0; i < regionCount; i++) {
1291 panvk_meta_copy_buf2img(cmdbuf, buf, img, &pRegions[i]);
1292 }
1293 }
1294
1295 static const struct panvk_meta_copy_format_info panvk_meta_copy_img2buf_fmts[] = {
1296 { PIPE_FORMAT_R8_UINT, 0x1 },
1297 { PIPE_FORMAT_R8G8_UINT, 0x3 },
1298 { PIPE_FORMAT_R5G6B5_UNORM, 0x7 },
1299 { PIPE_FORMAT_R8G8B8A8_UINT, 0xf },
1300 { PIPE_FORMAT_R16G16B16_UINT, 0x7 },
1301 { PIPE_FORMAT_R32G32_UINT, 0x3 },
1302 { PIPE_FORMAT_R32G32B32_UINT, 0x7 },
1303 { PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
1304 /* S8 -> Z24S8 */
1305 { PIPE_FORMAT_R8G8B8A8_UINT, 0x8 },
1306 /* S8 -> Z32_S8X24 */
1307 { PIPE_FORMAT_R32G32_UINT, 0x2 },
1308 /* Z24X8 -> Z24S8 */
1309 { PIPE_FORMAT_R8G8B8A8_UINT, 0x7 },
1310 /* Z32 -> Z32_S8X24 */
1311 { PIPE_FORMAT_R32G32_UINT, 0x1 },
1312 };
1313
1314 static enum pipe_format
panvk_meta_copy_img2buf_format(enum pipe_format imgfmt)1315 panvk_meta_copy_img2buf_format(enum pipe_format imgfmt)
1316 {
1317 /* Pick blendable formats when we can, and the FLOAT variant matching the
1318 * texelsize otherwise.
1319 */
1320 switch (util_format_get_blocksize(imgfmt)) {
1321 case 1: return PIPE_FORMAT_R8_UINT;
1322 /* AFBC stores things differently for RGB565,
1323 * we can't simply map to R8G8 in that case */
1324 case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
1325 imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ?
1326 PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UINT;
1327 case 4: return PIPE_FORMAT_R8G8B8A8_UINT;
1328 case 6: return PIPE_FORMAT_R16G16B16_UINT;
1329 case 8: return PIPE_FORMAT_R32G32_UINT;
1330 case 12: return PIPE_FORMAT_R32G32B32_UINT;
1331 case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
1332 default: unreachable("Invalid format\n");
1333 }
1334 }
1335
1336 struct panvk_meta_copy_img2buf_info {
1337 struct {
1338 mali_ptr ptr;
1339 struct {
1340 unsigned line;
1341 unsigned surf;
1342 } stride;
1343 } buf;
1344 struct {
1345 struct {
1346 unsigned x, y, z;
1347 } offset;
1348 struct {
1349 unsigned minx, miny, maxx, maxy;
1350 } extent;
1351 } img;
1352 };
1353
1354 #define panvk_meta_copy_img2buf_get_info_field(b, field) \
1355 nir_load_ubo((b), 1, \
1356 sizeof(((struct panvk_meta_copy_img2buf_info *)0)->field) * 8, \
1357 nir_imm_int(b, 0), \
1358 nir_imm_int(b, offsetof(struct panvk_meta_copy_img2buf_info, field)), \
1359 .align_mul = 4, \
1360 .align_offset = 0, \
1361 .range_base = 0, \
1362 .range = ~0)
1363
1364 static mali_ptr
panvk_meta_copy_img2buf_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,struct panvk_meta_copy_format_info key,unsigned texdim,unsigned texisarray,struct pan_shader_info * shader_info)1365 panvk_meta_copy_img2buf_shader(struct panfrost_device *pdev,
1366 struct pan_pool *bin_pool,
1367 struct panvk_meta_copy_format_info key,
1368 unsigned texdim, unsigned texisarray,
1369 struct pan_shader_info *shader_info)
1370 {
1371 unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
1372 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1373
1374 /* FIXME: Won't work on compute queues, but we can't do that with
1375 * a compute shader if the destination is an AFBC surface.
1376 */
1377 nir_builder b =
1378 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1379 GENX(pan_shader_get_compiler_options)(),
1380 "panvk_meta_copy_img2buf(dim=%dD%s,imgfmt=%s,mask=%x)",
1381 texdim, texisarray ? "[]" : "",
1382 util_format_name(key.imgfmt),
1383 key.mask);
1384
1385 b.shader->info.internal = true;
1386 b.shader->info.num_ubos = 1;
1387
1388 nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1389 nir_ssa_def *bufptr =
1390 panvk_meta_copy_img2buf_get_info_field(&b, buf.ptr);
1391 nir_ssa_def *buflinestride =
1392 panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.line);
1393 nir_ssa_def *bufsurfstride =
1394 panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.surf);
1395
1396 nir_ssa_def *imgminx =
1397 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.minx);
1398 nir_ssa_def *imgminy =
1399 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.miny);
1400 nir_ssa_def *imgmaxx =
1401 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxx);
1402 nir_ssa_def *imgmaxy =
1403 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxy);
1404
1405 nir_ssa_def *imgcoords, *inbounds;
1406
1407 switch (texdim + texisarray) {
1408 case 1:
1409 imgcoords =
1410 nir_iadd(&b,
1411 nir_channel(&b, coord, 0),
1412 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x));
1413 inbounds =
1414 nir_iand(&b,
1415 nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1416 nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx));
1417 break;
1418 case 2:
1419 imgcoords =
1420 nir_vec2(&b,
1421 nir_iadd(&b,
1422 nir_channel(&b, coord, 0),
1423 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
1424 nir_iadd(&b,
1425 nir_channel(&b, coord, 1),
1426 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
1427 inbounds =
1428 nir_iand(&b,
1429 nir_iand(&b,
1430 nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1431 nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
1432 nir_iand(&b,
1433 nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
1434 nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
1435 break;
1436 case 3:
1437 imgcoords =
1438 nir_vec3(&b,
1439 nir_iadd(&b,
1440 nir_channel(&b, coord, 0),
1441 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
1442 nir_iadd(&b,
1443 nir_channel(&b, coord, 1),
1444 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)),
1445 nir_iadd(&b,
1446 nir_channel(&b, coord, 2),
1447 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
1448 inbounds =
1449 nir_iand(&b,
1450 nir_iand(&b,
1451 nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1452 nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
1453 nir_iand(&b,
1454 nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
1455 nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
1456 break;
1457 default:
1458 unreachable("Invalid texture dimension\n");
1459 }
1460
1461 nir_push_if(&b, inbounds);
1462
1463 /* FIXME: doesn't work for tiled+compressed formats since blocks are 4x4
1464 * blocks instead of 16x16 texels in that case, and there's nothing we can
1465 * do to force the tile size to 4x4 in the render path.
1466 * This being said, compressed textures are not compatible with AFBC, so we
1467 * could use a compute shader arranging the blocks properly.
1468 */
1469 nir_ssa_def *offset =
1470 nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
1471 offset = nir_iadd(&b, offset,
1472 nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
1473 offset = nir_iadd(&b, offset,
1474 nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
1475 bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
1476
1477 unsigned imgcompsz = imgtexelsz <= 4 ?
1478 1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
1479 unsigned nimgcomps = imgtexelsz / imgcompsz;
1480 assert(nimgcomps <= 4);
1481
1482 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
1483 tex->op = nir_texop_txf;
1484 tex->texture_index = 0;
1485 tex->is_array = texisarray;
1486 tex->dest_type = util_format_is_unorm(key.imgfmt) ?
1487 nir_type_float32 : nir_type_uint32;
1488
1489 switch (texdim) {
1490 case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break;
1491 case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break;
1492 case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break;
1493 default: unreachable("Invalid texture dimension");
1494 }
1495
1496 tex->src[0].src_type = nir_tex_src_coord;
1497 tex->src[0].src = nir_src_for_ssa(imgcoords);
1498 tex->coord_components = texdim + texisarray;
1499 nir_ssa_dest_init(&tex->instr, &tex->dest, 4,
1500 nir_alu_type_get_type_size(tex->dest_type), NULL);
1501 nir_builder_instr_insert(&b, &tex->instr);
1502
1503 nir_ssa_def *texel = &tex->dest.ssa;
1504
1505 unsigned fullmask = (1 << util_format_get_nr_components(key.imgfmt)) - 1;
1506 unsigned nbufcomps = util_bitcount(fullmask);
1507 if (key.mask != fullmask) {
1508 nir_ssa_def *bufcomps[4];
1509 nbufcomps = 0;
1510 for (unsigned i = 0; i < nimgcomps; i++) {
1511 if (key.mask & BITFIELD_BIT(i))
1512 bufcomps[nbufcomps++] = nir_channel(&b, texel, i);
1513 }
1514
1515 texel = nir_vec(&b, bufcomps, nbufcomps);
1516 }
1517
1518 unsigned bufcompsz = buftexelsz / nbufcomps;
1519
1520 if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
1521 texel = nir_fmul(&b, texel,
1522 nir_vec3(&b,
1523 nir_imm_float(&b, 31),
1524 nir_imm_float(&b, 63),
1525 nir_imm_float(&b, 31)));
1526 texel = nir_f2u16(&b, texel);
1527 texel = nir_ior(&b, nir_channel(&b, texel, 0),
1528 nir_ior(&b,
1529 nir_ishl(&b, nir_channel(&b, texel, 1), nir_imm_int(&b, 5)),
1530 nir_ishl(&b, nir_channel(&b, texel, 2), nir_imm_int(&b, 11))));
1531 imgcompsz = 2;
1532 bufcompsz = 2;
1533 nbufcomps = 1;
1534 nimgcomps = 1;
1535 } else if (imgcompsz == 1) {
1536 nir_ssa_def *packed = nir_channel(&b, texel, 0);
1537 for (unsigned i = 1; i < nbufcomps; i++) {
1538 packed = nir_ior(&b, packed,
1539 nir_ishl(&b, nir_iand_imm(&b, nir_channel(&b, texel, i), 0xff),
1540 nir_imm_int(&b, i * 8)));
1541 }
1542 texel = packed;
1543
1544 bufcompsz = nbufcomps == 3 ? 4 : nbufcomps;
1545 nbufcomps = 1;
1546 }
1547
1548 assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
1549 assert(nbufcomps <= 4 && nimgcomps <= 4);
1550 texel = nir_u2uN(&b, texel, bufcompsz * 8);
1551
1552 nir_store_global(&b, bufptr, bufcompsz, texel, (1 << nbufcomps) - 1);
1553 nir_pop_if(&b, NULL);
1554
1555 struct panfrost_compile_inputs inputs = {
1556 .gpu_id = pdev->gpu_id,
1557 .is_blit = true,
1558 };
1559
1560 struct util_dynarray binary;
1561
1562 util_dynarray_init(&binary, NULL);
1563 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1564
1565 /* Make sure UBO words have been upgraded to push constants and everything
1566 * is at the right place.
1567 */
1568 assert(shader_info->ubo_count == 1);
1569 assert(shader_info->push.count <= (sizeof(struct panvk_meta_copy_img2buf_info) / 4));
1570
1571 mali_ptr shader =
1572 pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
1573 PAN_ARCH >= 6 ? 128 : 64);
1574
1575 util_dynarray_fini(&binary);
1576 ralloc_free(b.shader);
1577
1578 return shader;
1579 }
1580
1581 static unsigned
panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key)1582 panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key)
1583 {
1584 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
1585 if (!memcmp(&key, &panvk_meta_copy_img2buf_fmts[i], sizeof(key)))
1586 return i;
1587 }
1588
1589 unreachable("Invalid texel size\n");
1590 }
1591
1592 static void
panvk_meta_copy_img2buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * buf,const struct panvk_image * img,const VkBufferImageCopy * region)1593 panvk_meta_copy_img2buf(struct panvk_cmd_buffer *cmdbuf,
1594 const struct panvk_buffer *buf,
1595 const struct panvk_image *img,
1596 const VkBufferImageCopy *region)
1597 {
1598 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
1599 struct panvk_meta_copy_format_info key = {
1600 .imgfmt = panvk_meta_copy_img2buf_format(img->pimage.layout.format),
1601 .mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
1602 region->imageSubresource.aspectMask),
1603 };
1604 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1605 unsigned texdimidx =
1606 panvk_meta_copy_tex_type(img->pimage.layout.dim,
1607 img->pimage.layout.array_size > 1);
1608 unsigned fmtidx = panvk_meta_copy_img2buf_format_idx(key);
1609
1610 mali_ptr rsd =
1611 cmdbuf->device->physical_device->meta.copy.img2buf[texdimidx][fmtidx].rsd;
1612 const struct panfrost_ubo_push *pushmap =
1613 &cmdbuf->device->physical_device->meta.copy.img2buf[texdimidx][fmtidx].pushmap;
1614
1615 struct panvk_meta_copy_img2buf_info info = {
1616 .buf.ptr = buf->bo->ptr.gpu + buf->bo_offset + region->bufferOffset,
1617 .buf.stride.line = (region->bufferRowLength ? : region->imageExtent.width) * buftexelsz,
1618 .img.offset.x = MAX2(region->imageOffset.x & ~15, 0),
1619 .img.extent.minx = MAX2(region->imageOffset.x, 0),
1620 .img.extent.maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0),
1621 };
1622
1623 if (img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D) {
1624 info.img.extent.maxy = region->imageSubresource.layerCount - 1;
1625 } else {
1626 info.img.offset.y = MAX2(region->imageOffset.y & ~15, 0);
1627 info.img.offset.z = MAX2(region->imageOffset.z, 0);
1628 info.img.extent.miny = MAX2(region->imageOffset.y, 0);
1629 info.img.extent.maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
1630 }
1631
1632 info.buf.stride.surf = (region->bufferImageHeight ? : region->imageExtent.height) *
1633 info.buf.stride.line;
1634
1635 mali_ptr pushconsts =
1636 panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base,
1637 &info, sizeof(info));
1638 mali_ptr ubo =
1639 panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info));
1640
1641 struct pan_image_view view = {
1642 .format = key.imgfmt,
1643 .dim = img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
1644 MALI_TEXTURE_DIMENSION_2D : img->pimage.layout.dim,
1645 .image = &img->pimage,
1646 .nr_samples = img->pimage.layout.nr_samples,
1647 .first_level = region->imageSubresource.mipLevel,
1648 .last_level = region->imageSubresource.mipLevel,
1649 .first_layer = region->imageSubresource.baseArrayLayer,
1650 .last_layer = region->imageSubresource.baseArrayLayer + region->imageSubresource.layerCount - 1,
1651 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
1652 };
1653
1654 mali_ptr texture =
1655 panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &view);
1656 mali_ptr sampler =
1657 panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base);
1658
1659 panvk_per_arch(cmd_close_batch)(cmdbuf);
1660
1661 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1662
1663 struct pan_tls_info tlsinfo = { 0 };
1664
1665 batch->blit.src = img->pimage.data.bo;
1666 batch->blit.dst = buf->bo;
1667 batch->tls =
1668 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE);
1669 GENX(pan_emit_tls)(&tlsinfo, batch->tls.cpu);
1670
1671 mali_ptr tsd = batch->tls.gpu;
1672
1673 struct pan_compute_dim wg_sz = {
1674 16,
1675 img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1 : 16,
1676 1,
1677 };
1678
1679 struct pan_compute_dim num_wg = {
1680 (ALIGN_POT(info.img.extent.maxx + 1, 16) - info.img.offset.x) / 16,
1681 img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ?
1682 region->imageSubresource.layerCount :
1683 (ALIGN_POT(info.img.extent.maxy + 1, 16) - info.img.offset.y) / 16,
1684 img->pimage.layout.dim != MALI_TEXTURE_DIMENSION_1D ?
1685 MAX2(region->imageSubresource.layerCount, region->imageExtent.depth) : 1,
1686 };
1687
1688 struct panfrost_ptr job =
1689 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1690 &batch->scoreboard, &num_wg, &wg_sz,
1691 texture, sampler,
1692 ubo, pushconsts,
1693 rsd, tsd);
1694
1695 util_dynarray_append(&batch->jobs, void *, job.cpu);
1696
1697 panvk_per_arch(cmd_close_batch)(cmdbuf);
1698 }
1699
1700 static void
panvk_meta_copy_img2buf_init(struct panvk_physical_device * dev)1701 panvk_meta_copy_img2buf_init(struct panvk_physical_device *dev)
1702 {
1703 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2buf_fmts) == PANVK_META_COPY_IMG2BUF_NUM_FORMATS);
1704
1705 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
1706 for (unsigned texdim = 1; texdim <= 3; texdim++) {
1707 unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
1708 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
1709
1710 struct pan_shader_info shader_info;
1711 mali_ptr shader =
1712 panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1713 panvk_meta_copy_img2buf_fmts[i],
1714 texdim, false, &shader_info);
1715 dev->meta.copy.img2buf[texdimidx][i].pushmap = shader_info.push;
1716 dev->meta.copy.img2buf[texdimidx][i].rsd =
1717 panvk_meta_copy_to_buf_emit_rsd(&dev->pdev,
1718 &dev->meta.desc_pool.base,
1719 shader, &shader_info, true);
1720
1721 if (texdim == 3)
1722 continue;
1723
1724 memset(&shader_info, 0, sizeof(shader_info));
1725 texdimidx = panvk_meta_copy_tex_type(texdim, true);
1726 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
1727 shader =
1728 panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1729 panvk_meta_copy_img2buf_fmts[i],
1730 texdim, true, &shader_info);
1731 dev->meta.copy.img2buf[texdimidx][i].pushmap = shader_info.push;
1732 dev->meta.copy.img2buf[texdimidx][i].rsd =
1733 panvk_meta_copy_to_buf_emit_rsd(&dev->pdev,
1734 &dev->meta.desc_pool.base,
1735 shader, &shader_info, true);
1736 }
1737 }
1738 }
1739
1740 void
panvk_per_arch(CmdCopyImageToBuffer)1741 panvk_per_arch(CmdCopyImageToBuffer)(VkCommandBuffer commandBuffer,
1742 VkImage srcImage,
1743 VkImageLayout srcImageLayout,
1744 VkBuffer destBuffer,
1745 uint32_t regionCount,
1746 const VkBufferImageCopy *pRegions)
1747 {
1748 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1749 VK_FROM_HANDLE(panvk_buffer, buf, destBuffer);
1750 VK_FROM_HANDLE(panvk_image, img, srcImage);
1751
1752 for (unsigned i = 0; i < regionCount; i++) {
1753 panvk_meta_copy_img2buf(cmdbuf, buf, img, &pRegions[i]);
1754 }
1755 }
1756
1757 struct panvk_meta_copy_buf2buf_info {
1758 mali_ptr src;
1759 mali_ptr dst;
1760 };
1761
1762 #define panvk_meta_copy_buf2buf_get_info_field(b, field) \
1763 nir_load_ubo((b), 1, \
1764 sizeof(((struct panvk_meta_copy_buf2buf_info *)0)->field) * 8, \
1765 nir_imm_int(b, 0), \
1766 nir_imm_int(b, offsetof(struct panvk_meta_copy_buf2buf_info, field)), \
1767 .align_mul = 4, \
1768 .align_offset = 0, \
1769 .range_base = 0, \
1770 .range = ~0)
1771
1772 static mali_ptr
panvk_meta_copy_buf2buf_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,unsigned blksz,struct pan_shader_info * shader_info)1773 panvk_meta_copy_buf2buf_shader(struct panfrost_device *pdev,
1774 struct pan_pool *bin_pool,
1775 unsigned blksz,
1776 struct pan_shader_info *shader_info)
1777 {
1778 /* FIXME: Won't work on compute queues, but we can't do that with
1779 * a compute shader if the destination is an AFBC surface.
1780 */
1781 nir_builder b =
1782 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1783 GENX(pan_shader_get_compiler_options)(),
1784 "panvk_meta_copy_buf2buf(blksz=%d)",
1785 blksz);
1786
1787 b.shader->info.internal = true;
1788 b.shader->info.num_ubos = 1;
1789
1790 nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1791
1792 nir_ssa_def *offset =
1793 nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, blksz)));
1794 nir_ssa_def *srcptr =
1795 nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, src), offset);
1796 nir_ssa_def *dstptr =
1797 nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, dst), offset);
1798
1799 unsigned compsz = blksz < 4 ? blksz : 4;
1800 unsigned ncomps = blksz / compsz;
1801 nir_store_global(&b, dstptr, blksz,
1802 nir_load_global(&b, srcptr, blksz, ncomps, compsz * 8),
1803 (1 << ncomps) - 1);
1804
1805 struct panfrost_compile_inputs inputs = {
1806 .gpu_id = pdev->gpu_id,
1807 .is_blit = true,
1808 };
1809
1810 struct util_dynarray binary;
1811
1812 util_dynarray_init(&binary, NULL);
1813 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1814
1815 /* Make sure UBO words have been upgraded to push constants and everything
1816 * is at the right place.
1817 */
1818 assert(shader_info->ubo_count == 1);
1819 assert(shader_info->push.count == (sizeof(struct panvk_meta_copy_buf2buf_info) / 4));
1820
1821 mali_ptr shader =
1822 pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
1823 PAN_ARCH >= 6 ? 128 : 64);
1824
1825 util_dynarray_fini(&binary);
1826 ralloc_free(b.shader);
1827
1828 return shader;
1829 }
1830
1831 static void
panvk_meta_copy_buf2buf_init(struct panvk_physical_device * dev)1832 panvk_meta_copy_buf2buf_init(struct panvk_physical_device *dev)
1833 {
1834 for (unsigned i = 0; i < ARRAY_SIZE(dev->meta.copy.buf2buf); i++) {
1835 struct pan_shader_info shader_info;
1836 mali_ptr shader =
1837 panvk_meta_copy_buf2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1838 1 << i, &shader_info);
1839 dev->meta.copy.buf2buf[i].pushmap = shader_info.push;
1840 dev->meta.copy.buf2buf[i].rsd =
1841 panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
1842 shader, &shader_info, false);
1843 }
1844 }
1845
1846 static void
panvk_meta_copy_buf2buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * src,const struct panvk_buffer * dst,const VkBufferCopy * region)1847 panvk_meta_copy_buf2buf(struct panvk_cmd_buffer *cmdbuf,
1848 const struct panvk_buffer *src,
1849 const struct panvk_buffer *dst,
1850 const VkBufferCopy *region)
1851 {
1852 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
1853
1854 struct panvk_meta_copy_buf2buf_info info = {
1855 .src = src->bo->ptr.gpu + src->bo_offset + region->srcOffset,
1856 .dst = dst->bo->ptr.gpu + dst->bo_offset + region->dstOffset,
1857 };
1858
1859 unsigned alignment = ffs((info.src | info.dst | region->size) & 15);
1860 unsigned log2blksz = alignment ? alignment - 1 : 4;
1861
1862 assert(log2blksz < ARRAY_SIZE(cmdbuf->device->physical_device->meta.copy.buf2buf));
1863 mali_ptr rsd =
1864 cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd;
1865 const struct panfrost_ubo_push *pushmap =
1866 &cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].pushmap;
1867
1868 mali_ptr pushconsts =
1869 panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base,
1870 &info, sizeof(info));
1871 mali_ptr ubo =
1872 panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info));
1873
1874 panvk_per_arch(cmd_close_batch)(cmdbuf);
1875
1876 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1877
1878 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1879
1880 mali_ptr tsd = batch->tls.gpu;
1881
1882 unsigned nblocks = region->size >> log2blksz;
1883 struct pan_compute_dim num_wg = { nblocks, 1, 1 };
1884 struct pan_compute_dim wg_sz = { 1, 1, 1};
1885 struct panfrost_ptr job =
1886 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1887 &batch->scoreboard,
1888 &num_wg, &wg_sz,
1889 0, 0, ubo, pushconsts, rsd, tsd);
1890
1891 util_dynarray_append(&batch->jobs, void *, job.cpu);
1892
1893 batch->blit.src = src->bo;
1894 batch->blit.dst = dst->bo;
1895 panvk_per_arch(cmd_close_batch)(cmdbuf);
1896 }
1897
1898 void
panvk_per_arch(CmdCopyBuffer)1899 panvk_per_arch(CmdCopyBuffer)(VkCommandBuffer commandBuffer,
1900 VkBuffer srcBuffer,
1901 VkBuffer destBuffer,
1902 uint32_t regionCount,
1903 const VkBufferCopy *pRegions)
1904 {
1905 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1906 VK_FROM_HANDLE(panvk_buffer, src, srcBuffer);
1907 VK_FROM_HANDLE(panvk_buffer, dst, destBuffer);
1908
1909 for (unsigned i = 0; i < regionCount; i++) {
1910 panvk_meta_copy_buf2buf(cmdbuf, src, dst, &pRegions[i]);
1911 }
1912 }
1913
1914 struct panvk_meta_fill_buf_info {
1915 mali_ptr start;
1916 uint32_t val;
1917 };
1918
1919 #define panvk_meta_fill_buf_get_info_field(b, field) \
1920 nir_load_ubo((b), 1, \
1921 sizeof(((struct panvk_meta_fill_buf_info *)0)->field) * 8, \
1922 nir_imm_int(b, 0), \
1923 nir_imm_int(b, offsetof(struct panvk_meta_fill_buf_info, field)), \
1924 .align_mul = 4, \
1925 .align_offset = 0, \
1926 .range_base = 0, \
1927 .range = ~0)
1928
1929 static mali_ptr
panvk_meta_fill_buf_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,struct pan_shader_info * shader_info)1930 panvk_meta_fill_buf_shader(struct panfrost_device *pdev,
1931 struct pan_pool *bin_pool,
1932 struct pan_shader_info *shader_info)
1933 {
1934 /* FIXME: Won't work on compute queues, but we can't do that with
1935 * a compute shader if the destination is an AFBC surface.
1936 */
1937 nir_builder b =
1938 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1939 GENX(pan_shader_get_compiler_options)(),
1940 "panvk_meta_fill_buf()");
1941
1942 b.shader->info.internal = true;
1943 b.shader->info.num_ubos = 1;
1944
1945 nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1946
1947 nir_ssa_def *offset =
1948 nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, sizeof(uint32_t))));
1949 nir_ssa_def *ptr =
1950 nir_iadd(&b, panvk_meta_fill_buf_get_info_field(&b, start), offset);
1951 nir_ssa_def *val = panvk_meta_fill_buf_get_info_field(&b, val);
1952
1953 nir_store_global(&b, ptr, sizeof(uint32_t), val, 1);
1954
1955 struct panfrost_compile_inputs inputs = {
1956 .gpu_id = pdev->gpu_id,
1957 .is_blit = true,
1958 };
1959
1960 struct util_dynarray binary;
1961
1962 util_dynarray_init(&binary, NULL);
1963 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1964
1965 /* Make sure UBO words have been upgraded to push constants and everything
1966 * is at the right place.
1967 */
1968 assert(shader_info->ubo_count == 1);
1969 assert(shader_info->push.count == 3);
1970
1971 mali_ptr shader =
1972 pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
1973 PAN_ARCH >= 6 ? 128 : 64);
1974
1975 util_dynarray_fini(&binary);
1976 ralloc_free(b.shader);
1977
1978 return shader;
1979 }
1980
1981 static mali_ptr
panvk_meta_fill_buf_emit_rsd(struct panfrost_device * pdev,struct pan_pool * bin_pool,struct pan_pool * desc_pool,struct panfrost_ubo_push * pushmap)1982 panvk_meta_fill_buf_emit_rsd(struct panfrost_device *pdev,
1983 struct pan_pool *bin_pool,
1984 struct pan_pool *desc_pool,
1985 struct panfrost_ubo_push *pushmap)
1986 {
1987 struct pan_shader_info shader_info;
1988
1989 mali_ptr shader =
1990 panvk_meta_fill_buf_shader(pdev, bin_pool, &shader_info);
1991
1992 struct panfrost_ptr rsd_ptr =
1993 pan_pool_alloc_desc_aggregate(desc_pool,
1994 PAN_DESC(RENDERER_STATE));
1995
1996 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
1997 pan_shader_prepare_rsd(&shader_info, shader, &cfg);
1998 }
1999
2000 *pushmap = shader_info.push;
2001 return rsd_ptr.gpu;
2002 }
2003
2004 static void
panvk_meta_fill_buf_init(struct panvk_physical_device * dev)2005 panvk_meta_fill_buf_init(struct panvk_physical_device *dev)
2006 {
2007 dev->meta.copy.fillbuf.rsd =
2008 panvk_meta_fill_buf_emit_rsd(&dev->pdev, &dev->meta.bin_pool.base,
2009 &dev->meta.desc_pool.base,
2010 &dev->meta.copy.fillbuf.pushmap);
2011 }
2012
2013 static void
panvk_meta_fill_buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * dst,VkDeviceSize size,VkDeviceSize offset,uint32_t val)2014 panvk_meta_fill_buf(struct panvk_cmd_buffer *cmdbuf,
2015 const struct panvk_buffer *dst,
2016 VkDeviceSize size, VkDeviceSize offset,
2017 uint32_t val)
2018 {
2019 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
2020
2021 if (size == VK_WHOLE_SIZE)
2022 size = (dst->size - offset) & ~3ULL;
2023
2024 struct panvk_meta_fill_buf_info info = {
2025 .start = dst->bo->ptr.gpu + dst->bo_offset + offset,
2026 .val = val,
2027 };
2028
2029 assert(!(offset & 3) && !(size & 3));
2030
2031 unsigned nwords = size / sizeof(uint32_t);
2032 mali_ptr rsd =
2033 cmdbuf->device->physical_device->meta.copy.fillbuf.rsd;
2034 const struct panfrost_ubo_push *pushmap =
2035 &cmdbuf->device->physical_device->meta.copy.fillbuf.pushmap;
2036
2037 mali_ptr pushconsts =
2038 panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base,
2039 &info, sizeof(info));
2040 mali_ptr ubo =
2041 panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info));
2042
2043 panvk_per_arch(cmd_close_batch)(cmdbuf);
2044
2045 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
2046
2047 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
2048
2049 mali_ptr tsd = batch->tls.gpu;
2050
2051 struct pan_compute_dim num_wg = { nwords, 1, 1 };
2052 struct pan_compute_dim wg_sz = { 1, 1, 1};
2053 struct panfrost_ptr job =
2054 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
2055 &batch->scoreboard,
2056 &num_wg, &wg_sz,
2057 0, 0, ubo, pushconsts, rsd, tsd);
2058
2059 util_dynarray_append(&batch->jobs, void *, job.cpu);
2060
2061 batch->blit.dst = dst->bo;
2062 panvk_per_arch(cmd_close_batch)(cmdbuf);
2063 }
2064
2065 void
panvk_per_arch(CmdFillBuffer)2066 panvk_per_arch(CmdFillBuffer)(VkCommandBuffer commandBuffer,
2067 VkBuffer dstBuffer,
2068 VkDeviceSize dstOffset,
2069 VkDeviceSize fillSize,
2070 uint32_t data)
2071 {
2072 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
2073 VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
2074
2075 panvk_meta_fill_buf(cmdbuf, dst, fillSize, dstOffset, data);
2076 }
2077
2078 static void
panvk_meta_update_buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * dst,VkDeviceSize offset,VkDeviceSize size,const void * data)2079 panvk_meta_update_buf(struct panvk_cmd_buffer *cmdbuf,
2080 const struct panvk_buffer *dst, VkDeviceSize offset,
2081 VkDeviceSize size, const void *data)
2082 {
2083 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
2084
2085 struct panvk_meta_copy_buf2buf_info info = {
2086 .src = pan_pool_upload_aligned(&cmdbuf->desc_pool.base, data, size, 4),
2087 .dst = dst->bo->ptr.gpu + dst->bo_offset + offset,
2088 };
2089
2090 unsigned log2blksz = ffs(sizeof(uint32_t)) - 1;
2091
2092 mali_ptr rsd =
2093 cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd;
2094 const struct panfrost_ubo_push *pushmap =
2095 &cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].pushmap;
2096
2097 mali_ptr pushconsts =
2098 panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base,
2099 &info, sizeof(info));
2100 mali_ptr ubo =
2101 panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info));
2102
2103 panvk_per_arch(cmd_close_batch)(cmdbuf);
2104
2105 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
2106
2107 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
2108
2109 mali_ptr tsd = batch->tls.gpu;
2110
2111 unsigned nblocks = size >> log2blksz;
2112 struct pan_compute_dim num_wg = { nblocks, 1, 1 };
2113 struct pan_compute_dim wg_sz = { 1, 1, 1};
2114 struct panfrost_ptr job =
2115 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
2116 &batch->scoreboard,
2117 &num_wg, &wg_sz,
2118 0, 0, ubo, pushconsts, rsd, tsd);
2119
2120 util_dynarray_append(&batch->jobs, void *, job.cpu);
2121
2122 batch->blit.dst = dst->bo;
2123 panvk_per_arch(cmd_close_batch)(cmdbuf);
2124 }
2125
2126 void
panvk_per_arch(CmdUpdateBuffer)2127 panvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer,
2128 VkBuffer dstBuffer,
2129 VkDeviceSize dstOffset,
2130 VkDeviceSize dataSize,
2131 const void *pData)
2132 {
2133 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
2134 VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
2135
2136 panvk_meta_update_buf(cmdbuf, dst, dstOffset, dataSize, pData);
2137 }
2138
2139 void
panvk_per_arch(meta_copy_init)2140 panvk_per_arch(meta_copy_init)(struct panvk_physical_device *dev)
2141 {
2142 panvk_meta_copy_img2img_init(dev, false);
2143 panvk_meta_copy_img2img_init(dev, true);
2144 panvk_meta_copy_buf2img_init(dev);
2145 panvk_meta_copy_img2buf_init(dev);
2146 panvk_meta_copy_buf2buf_init(dev);
2147 panvk_meta_fill_buf_init(dev);
2148 }
2149