1 /*
2 * Copyright © 2021 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "gen_macros.h"
25
26 #include "nir/nir_builder.h"
27 #include "pan_encoder.h"
28 #include "pan_shader.h"
29
30 #include "panvk_private.h"
31
32 static mali_ptr
panvk_meta_copy_img_emit_texture(struct panfrost_device * pdev,struct pan_pool * desc_pool,const struct pan_image_view * view)33 panvk_meta_copy_img_emit_texture(struct panfrost_device *pdev,
34 struct pan_pool *desc_pool,
35 const struct pan_image_view *view)
36 {
37 #if PAN_ARCH >= 6
38 struct panfrost_ptr texture =
39 pan_pool_alloc_desc(desc_pool, TEXTURE);
40 size_t payload_size =
41 GENX(panfrost_estimate_texture_payload_size)(view);
42 struct panfrost_ptr surfaces =
43 pan_pool_alloc_aligned(desc_pool, payload_size,
44 pan_alignment(SURFACE_WITH_STRIDE));
45
46 GENX(panfrost_new_texture)(pdev, view, texture.cpu, &surfaces);
47
48 return texture.gpu;
49 #else
50 size_t sz = pan_size(TEXTURE) +
51 GENX(panfrost_estimate_texture_payload_size)(view);
52 struct panfrost_ptr texture =
53 pan_pool_alloc_aligned(desc_pool, sz, pan_alignment(TEXTURE));
54 struct panfrost_ptr surfaces = {
55 .cpu = texture.cpu + pan_size(TEXTURE),
56 .gpu = texture.gpu + pan_size(TEXTURE),
57 };
58
59 GENX(panfrost_new_texture)(pdev, view, texture.cpu, &surfaces);
60
61 return pan_pool_upload_aligned(desc_pool, &texture.gpu,
62 sizeof(mali_ptr),
63 sizeof(mali_ptr));
64 #endif
65 }
66
67 static mali_ptr
panvk_meta_copy_img_emit_sampler(struct panfrost_device * pdev,struct pan_pool * desc_pool)68 panvk_meta_copy_img_emit_sampler(struct panfrost_device *pdev,
69 struct pan_pool *desc_pool)
70 {
71 struct panfrost_ptr sampler =
72 pan_pool_alloc_desc(desc_pool, SAMPLER);
73
74 pan_pack(sampler.cpu, SAMPLER, cfg) {
75 #if PAN_ARCH >= 6
76 cfg.seamless_cube_map = false;
77 #endif
78 cfg.normalized_coordinates = false;
79 cfg.minify_nearest = true;
80 cfg.magnify_nearest = true;
81 }
82
83 return sampler.gpu;
84 }
85
86 static void
panvk_meta_copy_emit_varying(struct pan_pool * pool,mali_ptr coordinates,mali_ptr * varying_bufs,mali_ptr * varyings)87 panvk_meta_copy_emit_varying(struct pan_pool *pool,
88 mali_ptr coordinates,
89 mali_ptr *varying_bufs,
90 mali_ptr *varyings)
91 {
92 /* Bifrost needs an empty desc to mark end of prefetching */
93 bool padding_buffer = PAN_ARCH >= 6;
94
95 struct panfrost_ptr varying =
96 pan_pool_alloc_desc(pool, ATTRIBUTE);
97 struct panfrost_ptr varying_buffer =
98 pan_pool_alloc_desc_array(pool, (padding_buffer ? 2 : 1),
99 ATTRIBUTE_BUFFER);
100
101 pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
102 cfg.pointer = coordinates;
103 cfg.stride = 4 * sizeof(uint32_t);
104 cfg.size = cfg.stride * 4;
105 }
106
107 if (padding_buffer) {
108 pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
109 ATTRIBUTE_BUFFER, cfg);
110 }
111
112 pan_pack(varying.cpu, ATTRIBUTE, cfg) {
113 cfg.buffer_index = 0;
114 cfg.offset_enable = PAN_ARCH <= 5;
115 cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw;
116 }
117
118 *varyings = varying.gpu;
119 *varying_bufs = varying_buffer.gpu;
120 }
121
122 static void
panvk_meta_copy_emit_dcd(struct pan_pool * pool,mali_ptr src_coords,mali_ptr dst_coords,mali_ptr texture,mali_ptr sampler,mali_ptr vpd,mali_ptr tsd,mali_ptr rsd,mali_ptr ubos,mali_ptr push_constants,void * out)123 panvk_meta_copy_emit_dcd(struct pan_pool *pool,
124 mali_ptr src_coords, mali_ptr dst_coords,
125 mali_ptr texture, mali_ptr sampler,
126 mali_ptr vpd, mali_ptr tsd, mali_ptr rsd,
127 mali_ptr ubos, mali_ptr push_constants,
128 void *out)
129 {
130 pan_pack(out, DRAW, cfg) {
131 cfg.thread_storage = tsd;
132 cfg.state = rsd;
133 cfg.uniform_buffers = ubos;
134 cfg.push_uniforms = push_constants;
135 cfg.position = dst_coords;
136 if (src_coords) {
137 panvk_meta_copy_emit_varying(pool, src_coords,
138 &cfg.varying_buffers,
139 &cfg.varyings);
140 }
141 cfg.viewport = vpd;
142 cfg.textures = texture;
143 cfg.samplers = sampler;
144 }
145 }
146
147 static struct panfrost_ptr
panvk_meta_copy_emit_tiler_job(struct pan_pool * desc_pool,struct pan_scoreboard * scoreboard,mali_ptr src_coords,mali_ptr dst_coords,mali_ptr texture,mali_ptr sampler,mali_ptr ubo,mali_ptr push_constants,mali_ptr vpd,mali_ptr rsd,mali_ptr tsd,mali_ptr tiler)148 panvk_meta_copy_emit_tiler_job(struct pan_pool *desc_pool,
149 struct pan_scoreboard *scoreboard,
150 mali_ptr src_coords, mali_ptr dst_coords,
151 mali_ptr texture, mali_ptr sampler,
152 mali_ptr ubo, mali_ptr push_constants,
153 mali_ptr vpd, mali_ptr rsd,
154 mali_ptr tsd, mali_ptr tiler)
155 {
156 struct panfrost_ptr job =
157 pan_pool_alloc_desc(desc_pool, TILER_JOB);
158
159 panvk_meta_copy_emit_dcd(desc_pool, src_coords, dst_coords,
160 texture, sampler, vpd, tsd, rsd, ubo, push_constants,
161 pan_section_ptr(job.cpu, TILER_JOB, DRAW));
162
163 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
164 cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
165 cfg.index_count = 4;
166 cfg.job_task_split = 6;
167 }
168
169 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
170 cfg.constant = 1.0f;
171 }
172
173 void *invoc = pan_section_ptr(job.cpu,
174 TILER_JOB,
175 INVOCATION);
176 panfrost_pack_work_groups_compute(invoc, 1, 4,
177 1, 1, 1, 1, true, false);
178
179 #if PAN_ARCH >= 6
180 pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg);
181 pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) {
182 cfg.address = tiler;
183 }
184 #endif
185
186 panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
187 false, false, 0, 0, &job, false);
188 return job;
189 }
190
191 static struct panfrost_ptr
panvk_meta_copy_emit_compute_job(struct pan_pool * desc_pool,struct pan_scoreboard * scoreboard,const struct pan_compute_dim * num_wg,const struct pan_compute_dim * wg_sz,mali_ptr texture,mali_ptr sampler,mali_ptr ubo,mali_ptr push_constants,mali_ptr rsd,mali_ptr tsd)192 panvk_meta_copy_emit_compute_job(struct pan_pool *desc_pool,
193 struct pan_scoreboard *scoreboard,
194 const struct pan_compute_dim *num_wg,
195 const struct pan_compute_dim *wg_sz,
196 mali_ptr texture, mali_ptr sampler,
197 mali_ptr ubo, mali_ptr push_constants,
198 mali_ptr rsd, mali_ptr tsd)
199 {
200 struct panfrost_ptr job =
201 pan_pool_alloc_desc(desc_pool, COMPUTE_JOB);
202
203 void *invoc = pan_section_ptr(job.cpu,
204 COMPUTE_JOB,
205 INVOCATION);
206 panfrost_pack_work_groups_compute(invoc, num_wg->x, num_wg->y, num_wg->z,
207 wg_sz->x, wg_sz->y, wg_sz->z,
208 false, false);
209
210 pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
211 cfg.job_task_split = 8;
212 }
213
214 panvk_meta_copy_emit_dcd(desc_pool, 0, 0, texture, sampler,
215 0, tsd, rsd, ubo, push_constants,
216 pan_section_ptr(job.cpu, COMPUTE_JOB, DRAW));
217
218 panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_COMPUTE,
219 false, false, 0, 0, &job, false);
220 return job;
221 }
222
223
224 #if PAN_ARCH >= 6
225 static uint32_t
panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize)226 panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize)
227 {
228 switch (texelsize) {
229 case 6: return MALI_RGB16UI << 12;
230 case 8: return MALI_RG32UI << 12;
231 case 12: return MALI_RGB32UI << 12;
232 case 16: return MALI_RGBA32UI << 12;
233 default: unreachable("Invalid texel size\n");
234 }
235 }
236 #endif
237
238 static mali_ptr
panvk_meta_copy_to_img_emit_rsd(struct panfrost_device * pdev,struct pan_pool * desc_pool,mali_ptr shader,const struct pan_shader_info * shader_info,enum pipe_format fmt,unsigned wrmask,bool from_img)239 panvk_meta_copy_to_img_emit_rsd(struct panfrost_device *pdev,
240 struct pan_pool *desc_pool,
241 mali_ptr shader,
242 const struct pan_shader_info *shader_info,
243 enum pipe_format fmt, unsigned wrmask,
244 bool from_img)
245 {
246 struct panfrost_ptr rsd_ptr =
247 pan_pool_alloc_desc_aggregate(desc_pool,
248 PAN_DESC(RENDERER_STATE),
249 PAN_DESC_ARRAY(1, BLEND));
250
251 bool raw = util_format_get_blocksize(fmt) > 4;
252 unsigned fullmask = (1 << util_format_get_nr_components(fmt)) - 1;
253 bool partialwrite = fullmask != wrmask && !raw;
254 bool readstb = fullmask != wrmask && raw;
255
256 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
257 pan_shader_prepare_rsd(shader_info, shader, &cfg);
258 if (from_img) {
259 cfg.shader.varying_count = 1;
260 cfg.shader.texture_count = 1;
261 cfg.shader.sampler_count = 1;
262 }
263 cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
264 cfg.multisample_misc.sample_mask = UINT16_MAX;
265 cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
266 cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
267 cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
268 cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
269 cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
270 cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
271 cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
272 cfg.stencil_front.mask = 0xFF;
273 cfg.stencil_back = cfg.stencil_front;
274
275 #if PAN_ARCH >= 6
276 cfg.properties.allow_forward_pixel_to_be_killed = true;
277 cfg.properties.allow_forward_pixel_to_kill =
278 !partialwrite && !readstb;
279 cfg.properties.zs_update_operation =
280 MALI_PIXEL_KILL_STRONG_EARLY;
281 cfg.properties.pixel_kill_operation =
282 MALI_PIXEL_KILL_FORCE_EARLY;
283 #else
284 cfg.properties.shader_reads_tilebuffer = readstb;
285 cfg.properties.work_register_count = shader_info->work_reg_count;
286 cfg.properties.force_early_z = true;
287 cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS;
288 #endif
289 }
290
291 pan_pack(rsd_ptr.cpu + pan_size(RENDERER_STATE), BLEND, cfg) {
292 cfg.round_to_fb_precision = true;
293 cfg.load_destination = partialwrite;
294 cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
295 cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
296 cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
297 cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
298 cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
299 cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
300 #if PAN_ARCH >= 6
301 cfg.internal.mode =
302 partialwrite ?
303 MALI_BLEND_MODE_FIXED_FUNCTION :
304 MALI_BLEND_MODE_OPAQUE;
305 cfg.equation.color_mask = partialwrite ? wrmask : 0xf;
306 cfg.internal.fixed_function.num_comps = 4;
307 if (!raw) {
308 cfg.internal.fixed_function.conversion.memory_format =
309 panfrost_format_to_bifrost_blend(pdev, fmt, false);
310 cfg.internal.fixed_function.conversion.register_format =
311 MALI_REGISTER_FILE_FORMAT_F32;
312 } else {
313 unsigned imgtexelsz = util_format_get_blocksize(fmt);
314
315 cfg.internal.fixed_function.conversion.memory_format =
316 panvk_meta_copy_img_bifrost_raw_format(imgtexelsz);
317 cfg.internal.fixed_function.conversion.register_format =
318 (imgtexelsz & 2) ?
319 MALI_REGISTER_FILE_FORMAT_U16 :
320 MALI_REGISTER_FILE_FORMAT_U32;
321 }
322 #else
323 cfg.equation.color_mask = wrmask;
324 #endif
325 }
326
327 return rsd_ptr.gpu;
328 }
329
330 static mali_ptr
panvk_meta_copy_emit_ubo(struct panfrost_device * pdev,struct pan_pool * pool,void * data,unsigned size)331 panvk_meta_copy_emit_ubo(struct panfrost_device *pdev,
332 struct pan_pool *pool,
333 void *data, unsigned size)
334 {
335 struct panfrost_ptr ubo = pan_pool_alloc_desc(pool, UNIFORM_BUFFER);
336
337 pan_pack(ubo.cpu, UNIFORM_BUFFER, cfg) {
338 cfg.entries = DIV_ROUND_UP(size, 16);
339 cfg.pointer = pan_pool_upload_aligned(pool, data, size, 16);
340 }
341
342 return ubo.gpu;
343 }
344
345 static mali_ptr
panvk_meta_copy_emit_push_constants(struct panfrost_device * pdev,const struct panfrost_ubo_push * pushmap,struct pan_pool * pool,const void * data,unsigned size)346 panvk_meta_copy_emit_push_constants(struct panfrost_device *pdev,
347 const struct panfrost_ubo_push *pushmap,
348 struct pan_pool *pool,
349 const void *data, unsigned size)
350 {
351 assert(pushmap->count <= (size / 4));
352
353 const uint32_t *in = data;
354 uint32_t pushvals[PAN_MAX_PUSH];
355
356 for (unsigned i = 0; i < pushmap->count; i++) {
357 assert(i < ARRAY_SIZE(pushvals));
358 assert(pushmap->words[i].ubo == 0);
359 assert(pushmap->words[i].offset < size);
360 pushvals[i] = in[pushmap->words[i].offset / 4];
361 }
362
363 return pan_pool_upload_aligned(pool, pushvals, size, 16);
364 }
365
366 static mali_ptr
panvk_meta_copy_to_buf_emit_rsd(struct panfrost_device * pdev,struct pan_pool * desc_pool,mali_ptr shader,const struct pan_shader_info * shader_info,bool from_img)367 panvk_meta_copy_to_buf_emit_rsd(struct panfrost_device *pdev,
368 struct pan_pool *desc_pool,
369 mali_ptr shader,
370 const struct pan_shader_info *shader_info,
371 bool from_img)
372 {
373 struct panfrost_ptr rsd_ptr =
374 pan_pool_alloc_desc_aggregate(desc_pool,
375 PAN_DESC(RENDERER_STATE));
376
377 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
378 pan_shader_prepare_rsd(shader_info, shader, &cfg);
379 if (from_img) {
380 cfg.shader.texture_count = 1;
381 cfg.shader.sampler_count = 1;
382 }
383 }
384
385 return rsd_ptr.gpu;
386 }
387
388 static mali_ptr
panvk_meta_copy_img2img_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,enum pipe_format srcfmt,enum pipe_format dstfmt,unsigned dstmask,unsigned texdim,bool texisarray,bool is_ms,struct pan_shader_info * shader_info)389 panvk_meta_copy_img2img_shader(struct panfrost_device *pdev,
390 struct pan_pool *bin_pool,
391 enum pipe_format srcfmt,
392 enum pipe_format dstfmt, unsigned dstmask,
393 unsigned texdim, bool texisarray, bool is_ms,
394 struct pan_shader_info *shader_info)
395 {
396 nir_builder b =
397 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
398 GENX(pan_shader_get_compiler_options)(),
399 "panvk_meta_copy_img2img(srcfmt=%s,dstfmt=%s,%dD%s%s)",
400 util_format_name(srcfmt), util_format_name(dstfmt),
401 texdim, texisarray ? "[]" : "", is_ms ? ",ms" : "");
402
403 nir_variable *coord_var =
404 nir_variable_create(b.shader, nir_var_shader_in,
405 glsl_vector_type(GLSL_TYPE_FLOAT, texdim + texisarray),
406 "coord");
407 coord_var->data.location = VARYING_SLOT_TEX0;
408 nir_ssa_def *coord = nir_f2u32(&b, nir_load_var(&b, coord_var));
409
410 nir_tex_instr *tex = nir_tex_instr_create(b.shader, is_ms ? 2 : 1);
411 tex->op = is_ms ? nir_texop_txf_ms : nir_texop_txf;
412 tex->texture_index = 0;
413 tex->is_array = texisarray;
414 tex->dest_type = util_format_is_unorm(srcfmt) ?
415 nir_type_float32 : nir_type_uint32;
416
417 switch (texdim) {
418 case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break;
419 case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break;
420 case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break;
421 default: unreachable("Invalid texture dimension");
422 }
423
424 tex->src[0].src_type = nir_tex_src_coord;
425 tex->src[0].src = nir_src_for_ssa(coord);
426 tex->coord_components = texdim + texisarray;
427
428 if (is_ms) {
429 tex->src[1].src_type = nir_tex_src_ms_index;
430 tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
431 }
432
433 nir_ssa_dest_init(&tex->instr, &tex->dest, 4,
434 nir_alu_type_get_type_size(tex->dest_type), NULL);
435 nir_builder_instr_insert(&b, &tex->instr);
436
437 nir_ssa_def *texel = &tex->dest.ssa;
438
439 unsigned dstcompsz =
440 util_format_get_component_bits(dstfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
441 unsigned ndstcomps = util_format_get_nr_components(dstfmt);
442 const struct glsl_type *outtype = NULL;
443
444 if (srcfmt == PIPE_FORMAT_R5G6B5_UNORM && dstfmt == PIPE_FORMAT_R8G8_UNORM) {
445 nir_ssa_def *rgb =
446 nir_f2u32(&b, nir_fmul(&b, texel,
447 nir_vec3(&b,
448 nir_imm_float(&b, 31),
449 nir_imm_float(&b, 63),
450 nir_imm_float(&b, 31))));
451 nir_ssa_def *rg =
452 nir_vec2(&b,
453 nir_ior(&b, nir_channel(&b, rgb, 0),
454 nir_ishl(&b, nir_channel(&b, rgb, 1),
455 nir_imm_int(&b, 5))),
456 nir_ior(&b,
457 nir_ushr_imm(&b, nir_channel(&b, rgb, 1), 3),
458 nir_ishl(&b, nir_channel(&b, rgb, 2),
459 nir_imm_int(&b, 3))));
460 rg = nir_iand_imm(&b, rg, 255);
461 texel = nir_fmul_imm(&b, nir_u2f32(&b, rg), 1.0 / 255);
462 outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
463 } else if (srcfmt == PIPE_FORMAT_R8G8_UNORM && dstfmt == PIPE_FORMAT_R5G6B5_UNORM) {
464 nir_ssa_def *rg = nir_f2u32(&b, nir_fmul_imm(&b, texel, 255));
465 nir_ssa_def *rgb =
466 nir_vec3(&b,
467 nir_channel(&b, rg, 0),
468 nir_ior(&b,
469 nir_ushr_imm(&b, nir_channel(&b, rg, 0), 5),
470 nir_ishl(&b, nir_channel(&b, rg, 1),
471 nir_imm_int(&b, 3))),
472 nir_ushr_imm(&b, nir_channel(&b, rg, 1), 3));
473 rgb = nir_iand(&b, rgb,
474 nir_vec3(&b,
475 nir_imm_int(&b, 31),
476 nir_imm_int(&b, 63),
477 nir_imm_int(&b, 31)));
478 texel = nir_fmul(&b, nir_u2f32(&b, rgb),
479 nir_vec3(&b,
480 nir_imm_float(&b, 1.0 / 31),
481 nir_imm_float(&b, 1.0 / 63),
482 nir_imm_float(&b, 1.0 / 31)));
483 outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
484 } else {
485 assert(srcfmt == dstfmt);
486 enum glsl_base_type basetype;
487 if (util_format_is_unorm(dstfmt)) {
488 basetype = GLSL_TYPE_FLOAT;
489 } else if (dstcompsz == 16) {
490 basetype = GLSL_TYPE_UINT16;
491 } else {
492 assert(dstcompsz == 32);
493 basetype = GLSL_TYPE_UINT;
494 }
495
496 if (dstcompsz == 16)
497 texel = nir_u2u16(&b, texel);
498
499 texel = nir_channels(&b, texel, (1 << ndstcomps) - 1);
500 outtype = glsl_vector_type(basetype, ndstcomps);
501 }
502
503 nir_variable *out =
504 nir_variable_create(b.shader, nir_var_shader_out, outtype, "out");
505 out->data.location = FRAG_RESULT_DATA0;
506
507 unsigned fullmask = (1 << ndstcomps) - 1;
508 if (dstcompsz > 8 && dstmask != fullmask) {
509 nir_ssa_def *oldtexel = nir_load_var(&b, out);
510 nir_ssa_def *dstcomps[4];
511
512 for (unsigned i = 0; i < ndstcomps; i++) {
513 if (dstmask & BITFIELD_BIT(i))
514 dstcomps[i] = nir_channel(&b, texel, i);
515 else
516 dstcomps[i] = nir_channel(&b, oldtexel, i);
517 }
518
519 texel = nir_vec(&b, dstcomps, ndstcomps);
520 }
521
522 nir_store_var(&b, out, texel, 0xff);
523
524 struct panfrost_compile_inputs inputs = {
525 .gpu_id = pdev->gpu_id,
526 .is_blit = true,
527 };
528
529 #if PAN_ARCH >= 6
530 pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) {
531 cfg.memory_format = (dstcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12;
532 cfg.register_format = dstcompsz == 2 ?
533 MALI_REGISTER_FILE_FORMAT_U16 :
534 MALI_REGISTER_FILE_FORMAT_U32;
535 }
536 inputs.bifrost.static_rt_conv = true;
537 #endif
538
539 struct util_dynarray binary;
540
541 util_dynarray_init(&binary, NULL);
542 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
543
544 shader_info->fs.sample_shading = is_ms;
545
546 mali_ptr shader =
547 pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
548 PAN_ARCH >= 6 ? 128 : 64);
549
550 util_dynarray_fini(&binary);
551 ralloc_free(b.shader);
552
553 return shader;
554 }
555
556 static enum pipe_format
panvk_meta_copy_img_format(enum pipe_format fmt)557 panvk_meta_copy_img_format(enum pipe_format fmt)
558 {
559 /* We can't use a non-compressed format when handling a tiled/AFBC
560 * compressed format because the tile size differ (4x4 blocks for
561 * compressed formats and 16x16 texels for non-compressed ones).
562 */
563 assert(!util_format_is_compressed(fmt));
564
565 /* Pick blendable formats when we can, otherwise pick the UINT variant
566 * matching the texel size.
567 */
568 switch (util_format_get_blocksize(fmt)) {
569 case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
570 case 12: return PIPE_FORMAT_R32G32B32_UINT;
571 case 8: return PIPE_FORMAT_R32G32_UINT;
572 case 6: return PIPE_FORMAT_R16G16B16_UINT;
573 case 4: return PIPE_FORMAT_R8G8B8A8_UNORM;
574 case 2: return (fmt == PIPE_FORMAT_R5G6B5_UNORM ||
575 fmt == PIPE_FORMAT_B5G6R5_UNORM) ?
576 PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM;
577 case 1: return PIPE_FORMAT_R8_UNORM;
578 default: unreachable("Unsupported format\n");
579 }
580 }
581
582 struct panvk_meta_copy_img2img_format_info {
583 enum pipe_format srcfmt;
584 enum pipe_format dstfmt;
585 unsigned dstmask;
586 };
587
588 static const struct panvk_meta_copy_img2img_format_info panvk_meta_copy_img2img_fmts[] = {
589 { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R8_UNORM, 0x1},
590 { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
591 { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
592 { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
593 { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
594 /* Z24S8(depth) */
595 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 },
596 /* Z24S8(stencil) */
597 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 },
598 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0xf },
599 { PIPE_FORMAT_R16G16B16_UINT, PIPE_FORMAT_R16G16B16_UINT, 0x7 },
600 { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x3 },
601 /* Z32S8X24(depth) */
602 { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x1 },
603 /* Z32S8X24(stencil) */
604 { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x2 },
605 { PIPE_FORMAT_R32G32B32_UINT, PIPE_FORMAT_R32G32B32_UINT, 0x7 },
606 { PIPE_FORMAT_R32G32B32A32_UINT, PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
607 };
608
609 static unsigned
panvk_meta_copy_img2img_format_idx(struct panvk_meta_copy_img2img_format_info key)610 panvk_meta_copy_img2img_format_idx(struct panvk_meta_copy_img2img_format_info key)
611 {
612 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
613
614 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
615 if (!memcmp(&key, &panvk_meta_copy_img2img_fmts[i], sizeof(key)))
616 return i;
617 }
618
619 unreachable("Invalid image format\n");
620 }
621
622 static unsigned
panvk_meta_copy_img_mask(enum pipe_format imgfmt,VkImageAspectFlags aspectMask)623 panvk_meta_copy_img_mask(enum pipe_format imgfmt, VkImageAspectFlags aspectMask)
624 {
625 if (aspectMask != VK_IMAGE_ASPECT_DEPTH_BIT &&
626 aspectMask != VK_IMAGE_ASPECT_STENCIL_BIT) {
627 enum pipe_format outfmt = panvk_meta_copy_img_format(imgfmt);
628
629 return (1 << util_format_get_nr_components(outfmt)) - 1;
630 }
631
632 switch (imgfmt) {
633 case PIPE_FORMAT_S8_UINT:
634 return 1;
635 case PIPE_FORMAT_Z16_UNORM:
636 return 3;
637 case PIPE_FORMAT_Z16_UNORM_S8_UINT:
638 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 3 : 8;
639 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
640 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 7 : 8;
641 case PIPE_FORMAT_Z24X8_UNORM:
642 assert(aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT);
643 return 7;
644 case PIPE_FORMAT_Z32_FLOAT:
645 return 0xf;
646 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
647 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 1 : 2;
648 default:
649 unreachable("Invalid depth format\n");
650 }
651 }
652
653 static void
panvk_meta_copy_img2img(struct panvk_cmd_buffer * cmdbuf,const struct panvk_image * src,const struct panvk_image * dst,const VkImageCopy2 * region)654 panvk_meta_copy_img2img(struct panvk_cmd_buffer *cmdbuf,
655 const struct panvk_image *src,
656 const struct panvk_image *dst,
657 const VkImageCopy2 *region)
658 {
659 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
660 struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
661 struct panvk_meta_copy_img2img_format_info key = {
662 .srcfmt = panvk_meta_copy_img_format(src->pimage.layout.format),
663 .dstfmt = panvk_meta_copy_img_format(dst->pimage.layout.format),
664 .dstmask = panvk_meta_copy_img_mask(dst->pimage.layout.format,
665 region->dstSubresource.aspectMask),
666 };
667
668 assert(src->pimage.layout.nr_samples == dst->pimage.layout.nr_samples);
669
670 unsigned texdimidx =
671 panvk_meta_copy_tex_type(src->pimage.layout.dim,
672 src->pimage.layout.array_size > 1);
673 unsigned fmtidx =
674 panvk_meta_copy_img2img_format_idx(key);
675 unsigned ms = dst->pimage.layout.nr_samples > 1 ? 1 : 0;
676
677 mali_ptr rsd =
678 cmdbuf->device->physical_device->meta.copy.img2img[ms][texdimidx][fmtidx].rsd;
679
680 struct pan_image_view srcview = {
681 .format = key.srcfmt,
682 .dim = src->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
683 MALI_TEXTURE_DIMENSION_2D : src->pimage.layout.dim,
684 .image = &src->pimage,
685 .nr_samples = src->pimage.layout.nr_samples,
686 .first_level = region->srcSubresource.mipLevel,
687 .last_level = region->srcSubresource.mipLevel,
688 .first_layer = region->srcSubresource.baseArrayLayer,
689 .last_layer = region->srcSubresource.baseArrayLayer + region->srcSubresource.layerCount - 1,
690 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
691 };
692
693 struct pan_image_view dstview = {
694 .format = key.dstfmt,
695 .dim = MALI_TEXTURE_DIMENSION_2D,
696 .image = &dst->pimage,
697 .nr_samples = dst->pimage.layout.nr_samples,
698 .first_level = region->dstSubresource.mipLevel,
699 .last_level = region->dstSubresource.mipLevel,
700 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
701 };
702
703 unsigned minx = MAX2(region->dstOffset.x, 0);
704 unsigned miny = MAX2(region->dstOffset.y, 0);
705 unsigned maxx = MAX2(region->dstOffset.x + region->extent.width - 1, 0);
706 unsigned maxy = MAX2(region->dstOffset.y + region->extent.height - 1, 0);
707
708 mali_ptr vpd =
709 panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
710 minx, miny, maxx, maxy);
711
712 float dst_rect[] = {
713 minx, miny, 0.0, 1.0,
714 maxx + 1, miny, 0.0, 1.0,
715 minx, maxy + 1, 0.0, 1.0,
716 maxx + 1, maxy + 1, 0.0, 1.0,
717 };
718
719 mali_ptr dst_coords =
720 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect,
721 sizeof(dst_rect), 64);
722
723 /* TODO: don't force preloads of dst resources if unneeded */
724
725 unsigned width = u_minify(dst->pimage.layout.width, region->dstSubresource.mipLevel);
726 unsigned height = u_minify(dst->pimage.layout.height, region->dstSubresource.mipLevel);
727 cmdbuf->state.fb.crc_valid[0] = false;
728 *fbinfo = (struct pan_fb_info){
729 .width = width,
730 .height = height,
731 .extent.minx = minx & ~31,
732 .extent.miny = miny & ~31,
733 .extent.maxx = MIN2(ALIGN_POT(maxx + 1, 32), width) - 1,
734 .extent.maxy = MIN2(ALIGN_POT(maxy + 1, 32), height) - 1,
735 .nr_samples = dst->pimage.layout.nr_samples,
736 .rt_count = 1,
737 .rts[0].view = &dstview,
738 .rts[0].preload = true,
739 .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
740 };
741
742 mali_ptr texture =
743 panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &srcview);
744 mali_ptr sampler =
745 panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base);
746
747 panvk_per_arch(cmd_close_batch)(cmdbuf);
748
749 minx = MAX2(region->srcOffset.x, 0);
750 miny = MAX2(region->srcOffset.y, 0);
751 maxx = MAX2(region->srcOffset.x + region->extent.width - 1, 0);
752 maxy = MAX2(region->srcOffset.y + region->extent.height - 1, 0);
753 assert(region->dstOffset.z >= 0);
754
755 unsigned first_src_layer = MAX2(0, region->srcOffset.z);
756 unsigned first_dst_layer = MAX2(region->dstSubresource.baseArrayLayer, region->dstOffset.z);
757 unsigned nlayers = MAX2(region->dstSubresource.layerCount, region->extent.depth);
758 for (unsigned l = 0; l < nlayers; l++) {
759 unsigned src_l = l + first_src_layer;
760 float src_rect[] = {
761 minx, miny, src_l, 1.0,
762 maxx + 1, miny, src_l, 1.0,
763 minx, maxy + 1, src_l, 1.0,
764 maxx + 1, maxy + 1, src_l, 1.0,
765 };
766
767 mali_ptr src_coords =
768 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect,
769 sizeof(src_rect), 64);
770
771 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
772
773 dstview.first_layer = dstview.last_layer = l + first_dst_layer;
774 batch->blit.src = src->pimage.data.bo;
775 batch->blit.dst = dst->pimage.data.bo;
776 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
777 panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
778 panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
779
780 mali_ptr tsd, tiler;
781
782 #if PAN_ARCH >= 6
783 tsd = batch->tls.gpu;
784 tiler = batch->tiler.descs.gpu;
785 #else
786 tsd = batch->fb.desc.gpu;
787 tiler = 0;
788 #endif
789
790 struct panfrost_ptr job;
791
792 job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base,
793 &batch->scoreboard,
794 src_coords, dst_coords,
795 texture, sampler, 0, 0,
796 vpd, rsd, tsd, tiler);
797
798 util_dynarray_append(&batch->jobs, void *, job.cpu);
799 panvk_per_arch(cmd_close_batch)(cmdbuf);
800 }
801 }
802
803 static void
panvk_meta_copy_img2img_init(struct panvk_physical_device * dev,bool is_ms)804 panvk_meta_copy_img2img_init(struct panvk_physical_device *dev, bool is_ms)
805 {
806 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
807
808 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
809 for (unsigned texdim = 1; texdim <= 3; texdim++) {
810 unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
811 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
812
813 /* No MSAA on 3D textures */
814 if (texdim == 3 && is_ms) continue;
815
816 struct pan_shader_info shader_info;
817 mali_ptr shader =
818 panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
819 panvk_meta_copy_img2img_fmts[i].srcfmt,
820 panvk_meta_copy_img2img_fmts[i].dstfmt,
821 panvk_meta_copy_img2img_fmts[i].dstmask,
822 texdim, false, is_ms, &shader_info);
823 dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
824 panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
825 shader, &shader_info,
826 panvk_meta_copy_img2img_fmts[i].dstfmt,
827 panvk_meta_copy_img2img_fmts[i].dstmask,
828 true);
829 if (texdim == 3)
830 continue;
831
832 memset(&shader_info, 0, sizeof(shader_info));
833 texdimidx = panvk_meta_copy_tex_type(texdim, true);
834 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
835 shader =
836 panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
837 panvk_meta_copy_img2img_fmts[i].srcfmt,
838 panvk_meta_copy_img2img_fmts[i].dstfmt,
839 panvk_meta_copy_img2img_fmts[i].dstmask,
840 texdim, true, is_ms, &shader_info);
841 dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
842 panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
843 shader, &shader_info,
844 panvk_meta_copy_img2img_fmts[i].dstfmt,
845 panvk_meta_copy_img2img_fmts[i].dstmask,
846 true);
847 }
848 }
849 }
850
851 void
panvk_per_arch(CmdCopyImage2)852 panvk_per_arch(CmdCopyImage2)(VkCommandBuffer commandBuffer,
853 const VkCopyImageInfo2 *pCopyImageInfo)
854 {
855 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
856 VK_FROM_HANDLE(panvk_image, dst, pCopyImageInfo->dstImage);
857 VK_FROM_HANDLE(panvk_image, src, pCopyImageInfo->srcImage);
858
859 for (unsigned i = 0; i < pCopyImageInfo->regionCount; i++) {
860 panvk_meta_copy_img2img(cmdbuf, src, dst, &pCopyImageInfo->pRegions[i]);
861 }
862 }
863
864 static unsigned
panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt,unsigned mask)865 panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt, unsigned mask)
866 {
867 unsigned imgtexelsz = util_format_get_blocksize(imgfmt);
868 unsigned nbufcomps = util_bitcount(mask);
869
870 if (nbufcomps == util_format_get_nr_components(imgfmt))
871 return imgtexelsz;
872
873 /* Special case for Z24 buffers which are not tightly packed */
874 if (mask == 7 && imgtexelsz == 4)
875 return 4;
876
877 /* Special case for S8 extraction from Z32_S8X24 */
878 if (mask == 2 && imgtexelsz == 8)
879 return 1;
880
881 unsigned compsz =
882 util_format_get_component_bits(imgfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
883
884 assert(!(compsz % 8));
885
886 return nbufcomps * compsz / 8;
887 }
888
889 static enum pipe_format
panvk_meta_copy_buf2img_format(enum pipe_format imgfmt)890 panvk_meta_copy_buf2img_format(enum pipe_format imgfmt)
891 {
892 /* Pick blendable formats when we can, and the FLOAT variant matching the
893 * texelsize otherwise.
894 */
895 switch (util_format_get_blocksize(imgfmt)) {
896 case 1: return PIPE_FORMAT_R8_UNORM;
897 /* AFBC stores things differently for RGB565,
898 * we can't simply map to R8G8 in that case */
899 case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
900 imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ?
901 PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM;
902 case 4: return PIPE_FORMAT_R8G8B8A8_UNORM;
903 case 6: return PIPE_FORMAT_R16G16B16_UINT;
904 case 8: return PIPE_FORMAT_R32G32_UINT;
905 case 12: return PIPE_FORMAT_R32G32B32_UINT;
906 case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
907 default: unreachable("Invalid format\n");
908 }
909 }
910
911 struct panvk_meta_copy_format_info {
912 enum pipe_format imgfmt;
913 unsigned mask;
914 };
915
916 static const struct panvk_meta_copy_format_info panvk_meta_copy_buf2img_fmts[] = {
917 { PIPE_FORMAT_R8_UNORM, 0x1 },
918 { PIPE_FORMAT_R8G8_UNORM, 0x3 },
919 { PIPE_FORMAT_R5G6B5_UNORM, 0x7 },
920 { PIPE_FORMAT_R8G8B8A8_UNORM, 0xf },
921 { PIPE_FORMAT_R16G16B16_UINT, 0x7 },
922 { PIPE_FORMAT_R32G32_UINT, 0x3 },
923 { PIPE_FORMAT_R32G32B32_UINT, 0x7 },
924 { PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
925 /* S8 -> Z24S8 */
926 { PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 },
927 /* S8 -> Z32_S8X24 */
928 { PIPE_FORMAT_R32G32_UINT, 0x2 },
929 /* Z24X8 -> Z24S8 */
930 { PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 },
931 /* Z32 -> Z32_S8X24 */
932 { PIPE_FORMAT_R32G32_UINT, 0x1 },
933 };
934
935 struct panvk_meta_copy_buf2img_info {
936 struct {
937 mali_ptr ptr;
938 struct {
939 unsigned line;
940 unsigned surf;
941 } stride;
942 } buf;
943 };
944
945 #define panvk_meta_copy_buf2img_get_info_field(b, field) \
946 nir_load_ubo((b), 1, \
947 sizeof(((struct panvk_meta_copy_buf2img_info *)0)->field) * 8, \
948 nir_imm_int(b, 0), \
949 nir_imm_int(b, offsetof(struct panvk_meta_copy_buf2img_info, field)), \
950 .align_mul = 4, \
951 .align_offset = 0, \
952 .range_base = 0, \
953 .range = ~0)
954
955 static mali_ptr
panvk_meta_copy_buf2img_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,struct panvk_meta_copy_format_info key,struct pan_shader_info * shader_info)956 panvk_meta_copy_buf2img_shader(struct panfrost_device *pdev,
957 struct pan_pool *bin_pool,
958 struct panvk_meta_copy_format_info key,
959 struct pan_shader_info *shader_info)
960 {
961 nir_builder b =
962 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
963 GENX(pan_shader_get_compiler_options)(),
964 "panvk_meta_copy_buf2img(imgfmt=%s,mask=%x)",
965 util_format_name(key.imgfmt),
966 key.mask);
967
968 b.shader->info.num_ubos = 1;
969
970 nir_variable *coord_var =
971 nir_variable_create(b.shader, nir_var_shader_in,
972 glsl_vector_type(GLSL_TYPE_FLOAT, 3),
973 "coord");
974 coord_var->data.location = VARYING_SLOT_TEX0;
975 nir_ssa_def *coord = nir_load_var(&b, coord_var);
976
977 coord = nir_f2u32(&b, coord);
978
979 nir_ssa_def *bufptr =
980 panvk_meta_copy_buf2img_get_info_field(&b, buf.ptr);
981 nir_ssa_def *buflinestride =
982 panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.line);
983 nir_ssa_def *bufsurfstride =
984 panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.surf);
985
986 unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
987 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
988 unsigned writemask = key.mask;
989
990 nir_ssa_def *offset =
991 nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
992 offset = nir_iadd(&b, offset,
993 nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
994 offset = nir_iadd(&b, offset,
995 nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
996 bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
997
998 unsigned imgcompsz =
999 (imgtexelsz <= 4 && key.imgfmt != PIPE_FORMAT_R5G6B5_UNORM) ?
1000 1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
1001
1002 unsigned nimgcomps = imgtexelsz / imgcompsz;
1003 unsigned bufcompsz = MIN2(buftexelsz, imgcompsz);
1004 unsigned nbufcomps = buftexelsz / bufcompsz;
1005
1006 assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
1007 assert(nbufcomps <= 4 && nimgcomps <= 4);
1008
1009 nir_ssa_def *texel =
1010 nir_load_global(&b, bufptr, bufcompsz, nbufcomps, bufcompsz * 8);
1011
1012 enum glsl_base_type basetype;
1013 if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
1014 texel = nir_vec3(&b,
1015 nir_iand_imm(&b, texel, BITFIELD_MASK(5)),
1016 nir_iand_imm(&b, nir_ushr_imm(&b, texel, 5), BITFIELD_MASK(6)),
1017 nir_iand_imm(&b, nir_ushr_imm(&b, texel, 11), BITFIELD_MASK(5)));
1018 texel = nir_fmul(&b,
1019 nir_u2f32(&b, texel),
1020 nir_vec3(&b,
1021 nir_imm_float(&b, 1.0f / 31),
1022 nir_imm_float(&b, 1.0f / 63),
1023 nir_imm_float(&b, 1.0f / 31)));
1024 nimgcomps = 3;
1025 basetype = GLSL_TYPE_FLOAT;
1026 } else if (imgcompsz == 1) {
1027 assert(bufcompsz == 1);
1028 /* Blendable formats are unorm and the fixed-function blend unit
1029 * takes float values.
1030 */
1031 texel = nir_fmul(&b, nir_u2f32(&b, texel),
1032 nir_imm_float(&b, 1.0f / 255));
1033 basetype = GLSL_TYPE_FLOAT;
1034 } else {
1035 texel = nir_u2uN(&b, texel, imgcompsz * 8);
1036 basetype = imgcompsz == 2 ? GLSL_TYPE_UINT16 : GLSL_TYPE_UINT;
1037 }
1038
1039 /* We always pass the texel using 32-bit regs for now */
1040 nir_variable *out =
1041 nir_variable_create(b.shader, nir_var_shader_out,
1042 glsl_vector_type(basetype, nimgcomps),
1043 "out");
1044 out->data.location = FRAG_RESULT_DATA0;
1045
1046 uint16_t fullmask = (1 << nimgcomps) - 1;
1047
1048 assert(fullmask >= writemask);
1049
1050 if (fullmask != writemask) {
1051 unsigned first_written_comp = ffs(writemask) - 1;
1052 nir_ssa_def *oldtexel = NULL;
1053 if (imgcompsz > 1)
1054 oldtexel = nir_load_var(&b, out);
1055
1056 nir_ssa_def *texel_comps[4];
1057 for (unsigned i = 0; i < nimgcomps; i++) {
1058 if (writemask & BITFIELD_BIT(i))
1059 texel_comps[i] = nir_channel(&b, texel, i - first_written_comp);
1060 else if (imgcompsz > 1)
1061 texel_comps[i] = nir_channel(&b, oldtexel, i);
1062 else
1063 texel_comps[i] = nir_imm_intN_t(&b, 0, texel->bit_size);
1064 }
1065
1066 texel = nir_vec(&b, texel_comps, nimgcomps);
1067 }
1068
1069 nir_store_var(&b, out, texel, 0xff);
1070
1071 struct panfrost_compile_inputs inputs = {
1072 .gpu_id = pdev->gpu_id,
1073 .is_blit = true,
1074 };
1075
1076 #if PAN_ARCH >= 6
1077 pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) {
1078 cfg.memory_format = (imgcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12;
1079 cfg.register_format = imgcompsz == 2 ?
1080 MALI_REGISTER_FILE_FORMAT_U16 :
1081 MALI_REGISTER_FILE_FORMAT_U32;
1082 }
1083 inputs.bifrost.static_rt_conv = true;
1084 #endif
1085
1086 struct util_dynarray binary;
1087
1088 util_dynarray_init(&binary, NULL);
1089 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1090
1091 /* Make sure UBO words have been upgraded to push constants */
1092 assert(shader_info->ubo_count == 1);
1093
1094 mali_ptr shader =
1095 pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
1096 PAN_ARCH >= 6 ? 128 : 64);
1097
1098 util_dynarray_fini(&binary);
1099 ralloc_free(b.shader);
1100
1101 return shader;
1102 }
1103
1104 static unsigned
panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key)1105 panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key)
1106 {
1107 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
1108 if (!memcmp(&key, &panvk_meta_copy_buf2img_fmts[i], sizeof(key)))
1109 return i;
1110 }
1111
1112 unreachable("Invalid image format\n");
1113 }
1114
1115 static void
panvk_meta_copy_buf2img(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * buf,const struct panvk_image * img,const VkBufferImageCopy2 * region)1116 panvk_meta_copy_buf2img(struct panvk_cmd_buffer *cmdbuf,
1117 const struct panvk_buffer *buf,
1118 const struct panvk_image *img,
1119 const VkBufferImageCopy2 *region)
1120 {
1121 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
1122 struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
1123 unsigned minx = MAX2(region->imageOffset.x, 0);
1124 unsigned miny = MAX2(region->imageOffset.y, 0);
1125 unsigned maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0);
1126 unsigned maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
1127
1128 mali_ptr vpd =
1129 panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
1130 minx, miny, maxx, maxy);
1131
1132 float dst_rect[] = {
1133 minx, miny, 0.0, 1.0,
1134 maxx + 1, miny, 0.0, 1.0,
1135 minx, maxy + 1, 0.0, 1.0,
1136 maxx + 1, maxy + 1, 0.0, 1.0,
1137 };
1138 mali_ptr dst_coords =
1139 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect,
1140 sizeof(dst_rect), 64);
1141
1142 struct panvk_meta_copy_format_info key = {
1143 .imgfmt = panvk_meta_copy_buf2img_format(img->pimage.layout.format),
1144 .mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
1145 region->imageSubresource.aspectMask),
1146 };
1147
1148 unsigned fmtidx = panvk_meta_copy_buf2img_format_idx(key);
1149
1150 mali_ptr rsd =
1151 cmdbuf->device->physical_device->meta.copy.buf2img[fmtidx].rsd;
1152 const struct panfrost_ubo_push *pushmap =
1153 &cmdbuf->device->physical_device->meta.copy.buf2img[fmtidx].pushmap;
1154
1155 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1156 struct panvk_meta_copy_buf2img_info info = {
1157 .buf.ptr = buf->bo->ptr.gpu + buf->bo_offset + region->bufferOffset,
1158 .buf.stride.line = (region->bufferRowLength ? : region->imageExtent.width) * buftexelsz,
1159 };
1160
1161 info.buf.stride.surf =
1162 (region->bufferImageHeight ? : region->imageExtent.height) * info.buf.stride.line;
1163
1164 mali_ptr pushconsts =
1165 panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base,
1166 &info, sizeof(info));
1167 mali_ptr ubo =
1168 panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info));
1169
1170 struct pan_image_view view = {
1171 .format = key.imgfmt,
1172 .dim = MALI_TEXTURE_DIMENSION_2D,
1173 .image = &img->pimage,
1174 .nr_samples = img->pimage.layout.nr_samples,
1175 .first_level = region->imageSubresource.mipLevel,
1176 .last_level = region->imageSubresource.mipLevel,
1177 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
1178 };
1179
1180 /* TODO: don't force preloads of dst resources if unneeded */
1181 cmdbuf->state.fb.crc_valid[0] = false;
1182 *fbinfo = (struct pan_fb_info){
1183 .width = u_minify(img->pimage.layout.width, region->imageSubresource.mipLevel),
1184 .height = u_minify(img->pimage.layout.height, region->imageSubresource.mipLevel),
1185 .extent.minx = minx,
1186 .extent.maxx = maxx,
1187 .extent.miny = miny,
1188 .extent.maxy = maxy,
1189 .nr_samples = 1,
1190 .rt_count = 1,
1191 .rts[0].view = &view,
1192 .rts[0].preload = true,
1193 .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
1194 };
1195
1196 panvk_per_arch(cmd_close_batch)(cmdbuf);
1197
1198 assert(region->imageSubresource.layerCount == 1 ||
1199 region->imageExtent.depth == 1);
1200 assert(region->imageOffset.z >= 0);
1201 unsigned first_layer = MAX2(region->imageSubresource.baseArrayLayer, region->imageOffset.z);
1202 unsigned nlayers = MAX2(region->imageSubresource.layerCount, region->imageExtent.depth);
1203 for (unsigned l = 0; l < nlayers; l++) {
1204 float src_rect[] = {
1205 0, 0, l, 1.0,
1206 region->imageExtent.width, 0, l, 1.0,
1207 0, region->imageExtent.height, l, 1.0,
1208 region->imageExtent.width, region->imageExtent.height, l, 1.0,
1209 };
1210
1211 mali_ptr src_coords =
1212 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect,
1213 sizeof(src_rect), 64);
1214
1215 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1216
1217 view.first_layer = view.last_layer = l + first_layer;
1218 batch->blit.src = buf->bo;
1219 batch->blit.dst = img->pimage.data.bo;
1220 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
1221 panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
1222 panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
1223
1224 mali_ptr tsd, tiler;
1225
1226 #if PAN_ARCH >= 6
1227 tsd = batch->tls.gpu;
1228 tiler = batch->tiler.descs.gpu;
1229 #else
1230 tsd = batch->fb.desc.gpu;
1231 tiler = 0;
1232 #endif
1233
1234 struct panfrost_ptr job;
1235
1236 job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base,
1237 &batch->scoreboard,
1238 src_coords, dst_coords,
1239 0, 0, ubo, pushconsts,
1240 vpd, rsd, tsd, tiler);
1241
1242 util_dynarray_append(&batch->jobs, void *, job.cpu);
1243 panvk_per_arch(cmd_close_batch)(cmdbuf);
1244 }
1245 }
1246
1247 static void
panvk_meta_copy_buf2img_init(struct panvk_physical_device * dev)1248 panvk_meta_copy_buf2img_init(struct panvk_physical_device *dev)
1249 {
1250 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_buf2img_fmts) == PANVK_META_COPY_BUF2IMG_NUM_FORMATS);
1251
1252 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
1253 struct pan_shader_info shader_info;
1254 mali_ptr shader =
1255 panvk_meta_copy_buf2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
1256 panvk_meta_copy_buf2img_fmts[i],
1257 &shader_info);
1258 dev->meta.copy.buf2img[i].pushmap = shader_info.push;
1259 dev->meta.copy.buf2img[i].rsd =
1260 panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
1261 shader, &shader_info,
1262 panvk_meta_copy_buf2img_fmts[i].imgfmt,
1263 panvk_meta_copy_buf2img_fmts[i].mask,
1264 false);
1265 }
1266 }
1267
1268 void
panvk_per_arch(CmdCopyBufferToImage2)1269 panvk_per_arch(CmdCopyBufferToImage2)(VkCommandBuffer commandBuffer,
1270 const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
1271 {
1272 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1273 VK_FROM_HANDLE(panvk_buffer, buf, pCopyBufferToImageInfo->srcBuffer);
1274 VK_FROM_HANDLE(panvk_image, img, pCopyBufferToImageInfo->dstImage);
1275
1276 for (unsigned i = 0; i < pCopyBufferToImageInfo->regionCount; i++) {
1277 panvk_meta_copy_buf2img(cmdbuf, buf, img, &pCopyBufferToImageInfo->pRegions[i]);
1278 }
1279 }
1280
1281 static const struct panvk_meta_copy_format_info panvk_meta_copy_img2buf_fmts[] = {
1282 { PIPE_FORMAT_R8_UINT, 0x1 },
1283 { PIPE_FORMAT_R8G8_UINT, 0x3 },
1284 { PIPE_FORMAT_R5G6B5_UNORM, 0x7 },
1285 { PIPE_FORMAT_R8G8B8A8_UINT, 0xf },
1286 { PIPE_FORMAT_R16G16B16_UINT, 0x7 },
1287 { PIPE_FORMAT_R32G32_UINT, 0x3 },
1288 { PIPE_FORMAT_R32G32B32_UINT, 0x7 },
1289 { PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
1290 /* S8 -> Z24S8 */
1291 { PIPE_FORMAT_R8G8B8A8_UINT, 0x8 },
1292 /* S8 -> Z32_S8X24 */
1293 { PIPE_FORMAT_R32G32_UINT, 0x2 },
1294 /* Z24X8 -> Z24S8 */
1295 { PIPE_FORMAT_R8G8B8A8_UINT, 0x7 },
1296 /* Z32 -> Z32_S8X24 */
1297 { PIPE_FORMAT_R32G32_UINT, 0x1 },
1298 };
1299
1300 static enum pipe_format
panvk_meta_copy_img2buf_format(enum pipe_format imgfmt)1301 panvk_meta_copy_img2buf_format(enum pipe_format imgfmt)
1302 {
1303 /* Pick blendable formats when we can, and the FLOAT variant matching the
1304 * texelsize otherwise.
1305 */
1306 switch (util_format_get_blocksize(imgfmt)) {
1307 case 1: return PIPE_FORMAT_R8_UINT;
1308 /* AFBC stores things differently for RGB565,
1309 * we can't simply map to R8G8 in that case */
1310 case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
1311 imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ?
1312 PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UINT;
1313 case 4: return PIPE_FORMAT_R8G8B8A8_UINT;
1314 case 6: return PIPE_FORMAT_R16G16B16_UINT;
1315 case 8: return PIPE_FORMAT_R32G32_UINT;
1316 case 12: return PIPE_FORMAT_R32G32B32_UINT;
1317 case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
1318 default: unreachable("Invalid format\n");
1319 }
1320 }
1321
1322 struct panvk_meta_copy_img2buf_info {
1323 struct {
1324 mali_ptr ptr;
1325 struct {
1326 unsigned line;
1327 unsigned surf;
1328 } stride;
1329 } buf;
1330 struct {
1331 struct {
1332 unsigned x, y, z;
1333 } offset;
1334 struct {
1335 unsigned minx, miny, maxx, maxy;
1336 } extent;
1337 } img;
1338 };
1339
1340 #define panvk_meta_copy_img2buf_get_info_field(b, field) \
1341 nir_load_ubo((b), 1, \
1342 sizeof(((struct panvk_meta_copy_img2buf_info *)0)->field) * 8, \
1343 nir_imm_int(b, 0), \
1344 nir_imm_int(b, offsetof(struct panvk_meta_copy_img2buf_info, field)), \
1345 .align_mul = 4, \
1346 .align_offset = 0, \
1347 .range_base = 0, \
1348 .range = ~0)
1349
1350 static mali_ptr
panvk_meta_copy_img2buf_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,struct panvk_meta_copy_format_info key,unsigned texdim,unsigned texisarray,struct pan_shader_info * shader_info)1351 panvk_meta_copy_img2buf_shader(struct panfrost_device *pdev,
1352 struct pan_pool *bin_pool,
1353 struct panvk_meta_copy_format_info key,
1354 unsigned texdim, unsigned texisarray,
1355 struct pan_shader_info *shader_info)
1356 {
1357 unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
1358 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1359
1360 /* FIXME: Won't work on compute queues, but we can't do that with
1361 * a compute shader if the destination is an AFBC surface.
1362 */
1363 nir_builder b =
1364 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1365 GENX(pan_shader_get_compiler_options)(),
1366 "panvk_meta_copy_img2buf(dim=%dD%s,imgfmt=%s,mask=%x)",
1367 texdim, texisarray ? "[]" : "",
1368 util_format_name(key.imgfmt),
1369 key.mask);
1370
1371 b.shader->info.num_ubos = 1;
1372
1373 nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1374 nir_ssa_def *bufptr =
1375 panvk_meta_copy_img2buf_get_info_field(&b, buf.ptr);
1376 nir_ssa_def *buflinestride =
1377 panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.line);
1378 nir_ssa_def *bufsurfstride =
1379 panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.surf);
1380
1381 nir_ssa_def *imgminx =
1382 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.minx);
1383 nir_ssa_def *imgminy =
1384 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.miny);
1385 nir_ssa_def *imgmaxx =
1386 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxx);
1387 nir_ssa_def *imgmaxy =
1388 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxy);
1389
1390 nir_ssa_def *imgcoords, *inbounds;
1391
1392 switch (texdim + texisarray) {
1393 case 1:
1394 imgcoords =
1395 nir_iadd(&b,
1396 nir_channel(&b, coord, 0),
1397 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x));
1398 inbounds =
1399 nir_iand(&b,
1400 nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1401 nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx));
1402 break;
1403 case 2:
1404 imgcoords =
1405 nir_vec2(&b,
1406 nir_iadd(&b,
1407 nir_channel(&b, coord, 0),
1408 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
1409 nir_iadd(&b,
1410 nir_channel(&b, coord, 1),
1411 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
1412 inbounds =
1413 nir_iand(&b,
1414 nir_iand(&b,
1415 nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1416 nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
1417 nir_iand(&b,
1418 nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
1419 nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
1420 break;
1421 case 3:
1422 imgcoords =
1423 nir_vec3(&b,
1424 nir_iadd(&b,
1425 nir_channel(&b, coord, 0),
1426 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
1427 nir_iadd(&b,
1428 nir_channel(&b, coord, 1),
1429 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)),
1430 nir_iadd(&b,
1431 nir_channel(&b, coord, 2),
1432 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
1433 inbounds =
1434 nir_iand(&b,
1435 nir_iand(&b,
1436 nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1437 nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
1438 nir_iand(&b,
1439 nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
1440 nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
1441 break;
1442 default:
1443 unreachable("Invalid texture dimension\n");
1444 }
1445
1446 nir_push_if(&b, inbounds);
1447
1448 /* FIXME: doesn't work for tiled+compressed formats since blocks are 4x4
1449 * blocks instead of 16x16 texels in that case, and there's nothing we can
1450 * do to force the tile size to 4x4 in the render path.
1451 * This being said, compressed textures are not compatible with AFBC, so we
1452 * could use a compute shader arranging the blocks properly.
1453 */
1454 nir_ssa_def *offset =
1455 nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
1456 offset = nir_iadd(&b, offset,
1457 nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
1458 offset = nir_iadd(&b, offset,
1459 nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
1460 bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
1461
1462 unsigned imgcompsz = imgtexelsz <= 4 ?
1463 1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
1464 unsigned nimgcomps = imgtexelsz / imgcompsz;
1465 assert(nimgcomps <= 4);
1466
1467 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
1468 tex->op = nir_texop_txf;
1469 tex->texture_index = 0;
1470 tex->is_array = texisarray;
1471 tex->dest_type = util_format_is_unorm(key.imgfmt) ?
1472 nir_type_float32 : nir_type_uint32;
1473
1474 switch (texdim) {
1475 case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break;
1476 case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break;
1477 case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break;
1478 default: unreachable("Invalid texture dimension");
1479 }
1480
1481 tex->src[0].src_type = nir_tex_src_coord;
1482 tex->src[0].src = nir_src_for_ssa(imgcoords);
1483 tex->coord_components = texdim + texisarray;
1484 nir_ssa_dest_init(&tex->instr, &tex->dest, 4,
1485 nir_alu_type_get_type_size(tex->dest_type), NULL);
1486 nir_builder_instr_insert(&b, &tex->instr);
1487
1488 nir_ssa_def *texel = &tex->dest.ssa;
1489
1490 unsigned fullmask = (1 << util_format_get_nr_components(key.imgfmt)) - 1;
1491 unsigned nbufcomps = util_bitcount(fullmask);
1492 if (key.mask != fullmask) {
1493 nir_ssa_def *bufcomps[4];
1494 nbufcomps = 0;
1495 for (unsigned i = 0; i < nimgcomps; i++) {
1496 if (key.mask & BITFIELD_BIT(i))
1497 bufcomps[nbufcomps++] = nir_channel(&b, texel, i);
1498 }
1499
1500 texel = nir_vec(&b, bufcomps, nbufcomps);
1501 }
1502
1503 unsigned bufcompsz = buftexelsz / nbufcomps;
1504
1505 if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
1506 texel = nir_fmul(&b, texel,
1507 nir_vec3(&b,
1508 nir_imm_float(&b, 31),
1509 nir_imm_float(&b, 63),
1510 nir_imm_float(&b, 31)));
1511 texel = nir_f2u16(&b, texel);
1512 texel = nir_ior(&b, nir_channel(&b, texel, 0),
1513 nir_ior(&b,
1514 nir_ishl(&b, nir_channel(&b, texel, 1), nir_imm_int(&b, 5)),
1515 nir_ishl(&b, nir_channel(&b, texel, 2), nir_imm_int(&b, 11))));
1516 imgcompsz = 2;
1517 bufcompsz = 2;
1518 nbufcomps = 1;
1519 nimgcomps = 1;
1520 } else if (imgcompsz == 1) {
1521 nir_ssa_def *packed = nir_channel(&b, texel, 0);
1522 for (unsigned i = 1; i < nbufcomps; i++) {
1523 packed = nir_ior(&b, packed,
1524 nir_ishl(&b, nir_iand_imm(&b, nir_channel(&b, texel, i), 0xff),
1525 nir_imm_int(&b, i * 8)));
1526 }
1527 texel = packed;
1528
1529 bufcompsz = nbufcomps == 3 ? 4 : nbufcomps;
1530 nbufcomps = 1;
1531 }
1532
1533 assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
1534 assert(nbufcomps <= 4 && nimgcomps <= 4);
1535 texel = nir_u2uN(&b, texel, bufcompsz * 8);
1536
1537 nir_store_global(&b, bufptr, bufcompsz, texel, (1 << nbufcomps) - 1);
1538 nir_pop_if(&b, NULL);
1539
1540 struct panfrost_compile_inputs inputs = {
1541 .gpu_id = pdev->gpu_id,
1542 .is_blit = true,
1543 };
1544
1545 struct util_dynarray binary;
1546
1547 util_dynarray_init(&binary, NULL);
1548 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1549
1550 /* Make sure UBO words have been upgraded to push constants and everything
1551 * is at the right place.
1552 */
1553 assert(shader_info->ubo_count == 1);
1554 assert(shader_info->push.count <= (sizeof(struct panvk_meta_copy_img2buf_info) / 4));
1555
1556 mali_ptr shader =
1557 pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
1558 PAN_ARCH >= 6 ? 128 : 64);
1559
1560 util_dynarray_fini(&binary);
1561 ralloc_free(b.shader);
1562
1563 return shader;
1564 }
1565
1566 static unsigned
panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key)1567 panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key)
1568 {
1569 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
1570 if (!memcmp(&key, &panvk_meta_copy_img2buf_fmts[i], sizeof(key)))
1571 return i;
1572 }
1573
1574 unreachable("Invalid texel size\n");
1575 }
1576
1577 static void
panvk_meta_copy_img2buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * buf,const struct panvk_image * img,const VkBufferImageCopy2 * region)1578 panvk_meta_copy_img2buf(struct panvk_cmd_buffer *cmdbuf,
1579 const struct panvk_buffer *buf,
1580 const struct panvk_image *img,
1581 const VkBufferImageCopy2 *region)
1582 {
1583 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
1584 struct panvk_meta_copy_format_info key = {
1585 .imgfmt = panvk_meta_copy_img2buf_format(img->pimage.layout.format),
1586 .mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
1587 region->imageSubresource.aspectMask),
1588 };
1589 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1590 unsigned texdimidx =
1591 panvk_meta_copy_tex_type(img->pimage.layout.dim,
1592 img->pimage.layout.array_size > 1);
1593 unsigned fmtidx = panvk_meta_copy_img2buf_format_idx(key);
1594
1595 mali_ptr rsd =
1596 cmdbuf->device->physical_device->meta.copy.img2buf[texdimidx][fmtidx].rsd;
1597 const struct panfrost_ubo_push *pushmap =
1598 &cmdbuf->device->physical_device->meta.copy.img2buf[texdimidx][fmtidx].pushmap;
1599
1600 struct panvk_meta_copy_img2buf_info info = {
1601 .buf.ptr = buf->bo->ptr.gpu + buf->bo_offset + region->bufferOffset,
1602 .buf.stride.line = (region->bufferRowLength ? : region->imageExtent.width) * buftexelsz,
1603 .img.offset.x = MAX2(region->imageOffset.x & ~15, 0),
1604 .img.extent.minx = MAX2(region->imageOffset.x, 0),
1605 .img.extent.maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0),
1606 };
1607
1608 if (img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D) {
1609 info.img.extent.maxy = region->imageSubresource.layerCount - 1;
1610 } else {
1611 info.img.offset.y = MAX2(region->imageOffset.y & ~15, 0);
1612 info.img.offset.z = MAX2(region->imageOffset.z, 0);
1613 info.img.extent.miny = MAX2(region->imageOffset.y, 0);
1614 info.img.extent.maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
1615 }
1616
1617 info.buf.stride.surf = (region->bufferImageHeight ? : region->imageExtent.height) *
1618 info.buf.stride.line;
1619
1620 mali_ptr pushconsts =
1621 panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base,
1622 &info, sizeof(info));
1623 mali_ptr ubo =
1624 panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info));
1625
1626 struct pan_image_view view = {
1627 .format = key.imgfmt,
1628 .dim = img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
1629 MALI_TEXTURE_DIMENSION_2D : img->pimage.layout.dim,
1630 .image = &img->pimage,
1631 .nr_samples = img->pimage.layout.nr_samples,
1632 .first_level = region->imageSubresource.mipLevel,
1633 .last_level = region->imageSubresource.mipLevel,
1634 .first_layer = region->imageSubresource.baseArrayLayer,
1635 .last_layer = region->imageSubresource.baseArrayLayer + region->imageSubresource.layerCount - 1,
1636 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
1637 };
1638
1639 mali_ptr texture =
1640 panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &view);
1641 mali_ptr sampler =
1642 panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base);
1643
1644 panvk_per_arch(cmd_close_batch)(cmdbuf);
1645
1646 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1647
1648 struct pan_tls_info tlsinfo = { 0 };
1649
1650 batch->blit.src = img->pimage.data.bo;
1651 batch->blit.dst = buf->bo;
1652 batch->tls =
1653 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE);
1654 GENX(pan_emit_tls)(&tlsinfo, batch->tls.cpu);
1655
1656 mali_ptr tsd = batch->tls.gpu;
1657
1658 struct pan_compute_dim wg_sz = {
1659 16,
1660 img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1 : 16,
1661 1,
1662 };
1663
1664 struct pan_compute_dim num_wg = {
1665 (ALIGN_POT(info.img.extent.maxx + 1, 16) - info.img.offset.x) / 16,
1666 img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ?
1667 region->imageSubresource.layerCount :
1668 (ALIGN_POT(info.img.extent.maxy + 1, 16) - info.img.offset.y) / 16,
1669 img->pimage.layout.dim != MALI_TEXTURE_DIMENSION_1D ?
1670 MAX2(region->imageSubresource.layerCount, region->imageExtent.depth) : 1,
1671 };
1672
1673 struct panfrost_ptr job =
1674 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1675 &batch->scoreboard, &num_wg, &wg_sz,
1676 texture, sampler,
1677 ubo, pushconsts,
1678 rsd, tsd);
1679
1680 util_dynarray_append(&batch->jobs, void *, job.cpu);
1681
1682 panvk_per_arch(cmd_close_batch)(cmdbuf);
1683 }
1684
1685 static void
panvk_meta_copy_img2buf_init(struct panvk_physical_device * dev)1686 panvk_meta_copy_img2buf_init(struct panvk_physical_device *dev)
1687 {
1688 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2buf_fmts) == PANVK_META_COPY_IMG2BUF_NUM_FORMATS);
1689
1690 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
1691 for (unsigned texdim = 1; texdim <= 3; texdim++) {
1692 unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
1693 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
1694
1695 struct pan_shader_info shader_info;
1696 mali_ptr shader =
1697 panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1698 panvk_meta_copy_img2buf_fmts[i],
1699 texdim, false, &shader_info);
1700 dev->meta.copy.img2buf[texdimidx][i].pushmap = shader_info.push;
1701 dev->meta.copy.img2buf[texdimidx][i].rsd =
1702 panvk_meta_copy_to_buf_emit_rsd(&dev->pdev,
1703 &dev->meta.desc_pool.base,
1704 shader, &shader_info, true);
1705
1706 if (texdim == 3)
1707 continue;
1708
1709 memset(&shader_info, 0, sizeof(shader_info));
1710 texdimidx = panvk_meta_copy_tex_type(texdim, true);
1711 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
1712 shader =
1713 panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1714 panvk_meta_copy_img2buf_fmts[i],
1715 texdim, true, &shader_info);
1716 dev->meta.copy.img2buf[texdimidx][i].pushmap = shader_info.push;
1717 dev->meta.copy.img2buf[texdimidx][i].rsd =
1718 panvk_meta_copy_to_buf_emit_rsd(&dev->pdev,
1719 &dev->meta.desc_pool.base,
1720 shader, &shader_info, true);
1721 }
1722 }
1723 }
1724
1725 void
panvk_per_arch(CmdCopyImageToBuffer2)1726 panvk_per_arch(CmdCopyImageToBuffer2)(VkCommandBuffer commandBuffer,
1727 const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
1728 {
1729 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1730 VK_FROM_HANDLE(panvk_buffer, buf, pCopyImageToBufferInfo->dstBuffer);
1731 VK_FROM_HANDLE(panvk_image, img, pCopyImageToBufferInfo->srcImage);
1732
1733 for (unsigned i = 0; i < pCopyImageToBufferInfo->regionCount; i++) {
1734 panvk_meta_copy_img2buf(cmdbuf, buf, img, &pCopyImageToBufferInfo->pRegions[i]);
1735 }
1736 }
1737
1738 struct panvk_meta_copy_buf2buf_info {
1739 mali_ptr src;
1740 mali_ptr dst;
1741 };
1742
1743 #define panvk_meta_copy_buf2buf_get_info_field(b, field) \
1744 nir_load_ubo((b), 1, \
1745 sizeof(((struct panvk_meta_copy_buf2buf_info *)0)->field) * 8, \
1746 nir_imm_int(b, 0), \
1747 nir_imm_int(b, offsetof(struct panvk_meta_copy_buf2buf_info, field)), \
1748 .align_mul = 4, \
1749 .align_offset = 0, \
1750 .range_base = 0, \
1751 .range = ~0)
1752
1753 static mali_ptr
panvk_meta_copy_buf2buf_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,unsigned blksz,struct pan_shader_info * shader_info)1754 panvk_meta_copy_buf2buf_shader(struct panfrost_device *pdev,
1755 struct pan_pool *bin_pool,
1756 unsigned blksz,
1757 struct pan_shader_info *shader_info)
1758 {
1759 /* FIXME: Won't work on compute queues, but we can't do that with
1760 * a compute shader if the destination is an AFBC surface.
1761 */
1762 nir_builder b =
1763 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1764 GENX(pan_shader_get_compiler_options)(),
1765 "panvk_meta_copy_buf2buf(blksz=%d)",
1766 blksz);
1767
1768 b.shader->info.num_ubos = 1;
1769
1770 nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1771
1772 nir_ssa_def *offset =
1773 nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, blksz)));
1774 nir_ssa_def *srcptr =
1775 nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, src), offset);
1776 nir_ssa_def *dstptr =
1777 nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, dst), offset);
1778
1779 unsigned compsz = blksz < 4 ? blksz : 4;
1780 unsigned ncomps = blksz / compsz;
1781 nir_store_global(&b, dstptr, blksz,
1782 nir_load_global(&b, srcptr, blksz, ncomps, compsz * 8),
1783 (1 << ncomps) - 1);
1784
1785 struct panfrost_compile_inputs inputs = {
1786 .gpu_id = pdev->gpu_id,
1787 .is_blit = true,
1788 };
1789
1790 struct util_dynarray binary;
1791
1792 util_dynarray_init(&binary, NULL);
1793 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1794
1795 /* Make sure UBO words have been upgraded to push constants and everything
1796 * is at the right place.
1797 */
1798 assert(shader_info->ubo_count == 1);
1799 assert(shader_info->push.count == (sizeof(struct panvk_meta_copy_buf2buf_info) / 4));
1800
1801 mali_ptr shader =
1802 pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
1803 PAN_ARCH >= 6 ? 128 : 64);
1804
1805 util_dynarray_fini(&binary);
1806 ralloc_free(b.shader);
1807
1808 return shader;
1809 }
1810
1811 static void
panvk_meta_copy_buf2buf_init(struct panvk_physical_device * dev)1812 panvk_meta_copy_buf2buf_init(struct panvk_physical_device *dev)
1813 {
1814 for (unsigned i = 0; i < ARRAY_SIZE(dev->meta.copy.buf2buf); i++) {
1815 struct pan_shader_info shader_info;
1816 mali_ptr shader =
1817 panvk_meta_copy_buf2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1818 1 << i, &shader_info);
1819 dev->meta.copy.buf2buf[i].pushmap = shader_info.push;
1820 dev->meta.copy.buf2buf[i].rsd =
1821 panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
1822 shader, &shader_info, false);
1823 }
1824 }
1825
1826 static void
panvk_meta_copy_buf2buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * src,const struct panvk_buffer * dst,const VkBufferCopy2 * region)1827 panvk_meta_copy_buf2buf(struct panvk_cmd_buffer *cmdbuf,
1828 const struct panvk_buffer *src,
1829 const struct panvk_buffer *dst,
1830 const VkBufferCopy2 *region)
1831 {
1832 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
1833
1834 struct panvk_meta_copy_buf2buf_info info = {
1835 .src = src->bo->ptr.gpu + src->bo_offset + region->srcOffset,
1836 .dst = dst->bo->ptr.gpu + dst->bo_offset + region->dstOffset,
1837 };
1838
1839 unsigned alignment = ffs((info.src | info.dst | region->size) & 15);
1840 unsigned log2blksz = alignment ? alignment - 1 : 4;
1841
1842 assert(log2blksz < ARRAY_SIZE(cmdbuf->device->physical_device->meta.copy.buf2buf));
1843 mali_ptr rsd =
1844 cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd;
1845 const struct panfrost_ubo_push *pushmap =
1846 &cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].pushmap;
1847
1848 mali_ptr pushconsts =
1849 panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base,
1850 &info, sizeof(info));
1851 mali_ptr ubo =
1852 panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info));
1853
1854 panvk_per_arch(cmd_close_batch)(cmdbuf);
1855
1856 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1857
1858 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1859
1860 mali_ptr tsd = batch->tls.gpu;
1861
1862 unsigned nblocks = region->size >> log2blksz;
1863 struct pan_compute_dim num_wg = { nblocks, 1, 1 };
1864 struct pan_compute_dim wg_sz = { 1, 1, 1};
1865 struct panfrost_ptr job =
1866 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1867 &batch->scoreboard,
1868 &num_wg, &wg_sz,
1869 0, 0, ubo, pushconsts, rsd, tsd);
1870
1871 util_dynarray_append(&batch->jobs, void *, job.cpu);
1872
1873 batch->blit.src = src->bo;
1874 batch->blit.dst = dst->bo;
1875 panvk_per_arch(cmd_close_batch)(cmdbuf);
1876 }
1877
1878 void
panvk_per_arch(CmdCopyBuffer2)1879 panvk_per_arch(CmdCopyBuffer2)(VkCommandBuffer commandBuffer,
1880 const VkCopyBufferInfo2 *pCopyBufferInfo)
1881 {
1882 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1883 VK_FROM_HANDLE(panvk_buffer, src, pCopyBufferInfo->srcBuffer);
1884 VK_FROM_HANDLE(panvk_buffer, dst, pCopyBufferInfo->dstBuffer);
1885
1886 for (unsigned i = 0; i < pCopyBufferInfo->regionCount; i++) {
1887 panvk_meta_copy_buf2buf(cmdbuf, src, dst, &pCopyBufferInfo->pRegions[i]);
1888 }
1889 }
1890
1891 struct panvk_meta_fill_buf_info {
1892 mali_ptr start;
1893 uint32_t val;
1894 };
1895
1896 #define panvk_meta_fill_buf_get_info_field(b, field) \
1897 nir_load_ubo((b), 1, \
1898 sizeof(((struct panvk_meta_fill_buf_info *)0)->field) * 8, \
1899 nir_imm_int(b, 0), \
1900 nir_imm_int(b, offsetof(struct panvk_meta_fill_buf_info, field)), \
1901 .align_mul = 4, \
1902 .align_offset = 0, \
1903 .range_base = 0, \
1904 .range = ~0)
1905
1906 static mali_ptr
panvk_meta_fill_buf_shader(struct panfrost_device * pdev,struct pan_pool * bin_pool,struct pan_shader_info * shader_info)1907 panvk_meta_fill_buf_shader(struct panfrost_device *pdev,
1908 struct pan_pool *bin_pool,
1909 struct pan_shader_info *shader_info)
1910 {
1911 /* FIXME: Won't work on compute queues, but we can't do that with
1912 * a compute shader if the destination is an AFBC surface.
1913 */
1914 nir_builder b =
1915 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1916 GENX(pan_shader_get_compiler_options)(),
1917 "panvk_meta_fill_buf()");
1918
1919 b.shader->info.num_ubos = 1;
1920
1921 nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1922
1923 nir_ssa_def *offset =
1924 nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, sizeof(uint32_t))));
1925 nir_ssa_def *ptr =
1926 nir_iadd(&b, panvk_meta_fill_buf_get_info_field(&b, start), offset);
1927 nir_ssa_def *val = panvk_meta_fill_buf_get_info_field(&b, val);
1928
1929 nir_store_global(&b, ptr, sizeof(uint32_t), val, 1);
1930
1931 struct panfrost_compile_inputs inputs = {
1932 .gpu_id = pdev->gpu_id,
1933 .is_blit = true,
1934 };
1935
1936 struct util_dynarray binary;
1937
1938 util_dynarray_init(&binary, NULL);
1939 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1940
1941 /* Make sure UBO words have been upgraded to push constants and everything
1942 * is at the right place.
1943 */
1944 assert(shader_info->ubo_count == 1);
1945 assert(shader_info->push.count == 3);
1946
1947 mali_ptr shader =
1948 pan_pool_upload_aligned(bin_pool, binary.data, binary.size,
1949 PAN_ARCH >= 6 ? 128 : 64);
1950
1951 util_dynarray_fini(&binary);
1952 ralloc_free(b.shader);
1953
1954 return shader;
1955 }
1956
1957 static mali_ptr
panvk_meta_fill_buf_emit_rsd(struct panfrost_device * pdev,struct pan_pool * bin_pool,struct pan_pool * desc_pool,struct panfrost_ubo_push * pushmap)1958 panvk_meta_fill_buf_emit_rsd(struct panfrost_device *pdev,
1959 struct pan_pool *bin_pool,
1960 struct pan_pool *desc_pool,
1961 struct panfrost_ubo_push *pushmap)
1962 {
1963 struct pan_shader_info shader_info;
1964
1965 mali_ptr shader =
1966 panvk_meta_fill_buf_shader(pdev, bin_pool, &shader_info);
1967
1968 struct panfrost_ptr rsd_ptr =
1969 pan_pool_alloc_desc_aggregate(desc_pool,
1970 PAN_DESC(RENDERER_STATE));
1971
1972 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
1973 pan_shader_prepare_rsd(&shader_info, shader, &cfg);
1974 }
1975
1976 *pushmap = shader_info.push;
1977 return rsd_ptr.gpu;
1978 }
1979
1980 static void
panvk_meta_fill_buf_init(struct panvk_physical_device * dev)1981 panvk_meta_fill_buf_init(struct panvk_physical_device *dev)
1982 {
1983 dev->meta.copy.fillbuf.rsd =
1984 panvk_meta_fill_buf_emit_rsd(&dev->pdev, &dev->meta.bin_pool.base,
1985 &dev->meta.desc_pool.base,
1986 &dev->meta.copy.fillbuf.pushmap);
1987 }
1988
1989 static void
panvk_meta_fill_buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * dst,VkDeviceSize size,VkDeviceSize offset,uint32_t val)1990 panvk_meta_fill_buf(struct panvk_cmd_buffer *cmdbuf,
1991 const struct panvk_buffer *dst,
1992 VkDeviceSize size, VkDeviceSize offset,
1993 uint32_t val)
1994 {
1995 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
1996
1997 if (size == VK_WHOLE_SIZE)
1998 size = (dst->size - offset) & ~3ULL;
1999
2000 struct panvk_meta_fill_buf_info info = {
2001 .start = dst->bo->ptr.gpu + dst->bo_offset + offset,
2002 .val = val,
2003 };
2004
2005 assert(!(offset & 3) && !(size & 3));
2006
2007 unsigned nwords = size / sizeof(uint32_t);
2008 mali_ptr rsd =
2009 cmdbuf->device->physical_device->meta.copy.fillbuf.rsd;
2010 const struct panfrost_ubo_push *pushmap =
2011 &cmdbuf->device->physical_device->meta.copy.fillbuf.pushmap;
2012
2013 mali_ptr pushconsts =
2014 panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base,
2015 &info, sizeof(info));
2016 mali_ptr ubo =
2017 panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info));
2018
2019 panvk_per_arch(cmd_close_batch)(cmdbuf);
2020
2021 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
2022
2023 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
2024
2025 mali_ptr tsd = batch->tls.gpu;
2026
2027 struct pan_compute_dim num_wg = { nwords, 1, 1 };
2028 struct pan_compute_dim wg_sz = { 1, 1, 1};
2029 struct panfrost_ptr job =
2030 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
2031 &batch->scoreboard,
2032 &num_wg, &wg_sz,
2033 0, 0, ubo, pushconsts, rsd, tsd);
2034
2035 util_dynarray_append(&batch->jobs, void *, job.cpu);
2036
2037 batch->blit.dst = dst->bo;
2038 panvk_per_arch(cmd_close_batch)(cmdbuf);
2039 }
2040
2041 void
panvk_per_arch(CmdFillBuffer)2042 panvk_per_arch(CmdFillBuffer)(VkCommandBuffer commandBuffer,
2043 VkBuffer dstBuffer,
2044 VkDeviceSize dstOffset,
2045 VkDeviceSize fillSize,
2046 uint32_t data)
2047 {
2048 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
2049 VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
2050
2051 panvk_meta_fill_buf(cmdbuf, dst, fillSize, dstOffset, data);
2052 }
2053
2054 static void
panvk_meta_update_buf(struct panvk_cmd_buffer * cmdbuf,const struct panvk_buffer * dst,VkDeviceSize offset,VkDeviceSize size,const void * data)2055 panvk_meta_update_buf(struct panvk_cmd_buffer *cmdbuf,
2056 const struct panvk_buffer *dst, VkDeviceSize offset,
2057 VkDeviceSize size, const void *data)
2058 {
2059 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
2060
2061 struct panvk_meta_copy_buf2buf_info info = {
2062 .src = pan_pool_upload_aligned(&cmdbuf->desc_pool.base, data, size, 4),
2063 .dst = dst->bo->ptr.gpu + dst->bo_offset + offset,
2064 };
2065
2066 unsigned log2blksz = ffs(sizeof(uint32_t)) - 1;
2067
2068 mali_ptr rsd =
2069 cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd;
2070 const struct panfrost_ubo_push *pushmap =
2071 &cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].pushmap;
2072
2073 mali_ptr pushconsts =
2074 panvk_meta_copy_emit_push_constants(pdev, pushmap, &cmdbuf->desc_pool.base,
2075 &info, sizeof(info));
2076 mali_ptr ubo =
2077 panvk_meta_copy_emit_ubo(pdev, &cmdbuf->desc_pool.base, &info, sizeof(info));
2078
2079 panvk_per_arch(cmd_close_batch)(cmdbuf);
2080
2081 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
2082
2083 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
2084
2085 mali_ptr tsd = batch->tls.gpu;
2086
2087 unsigned nblocks = size >> log2blksz;
2088 struct pan_compute_dim num_wg = { nblocks, 1, 1 };
2089 struct pan_compute_dim wg_sz = { 1, 1, 1};
2090 struct panfrost_ptr job =
2091 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
2092 &batch->scoreboard,
2093 &num_wg, &wg_sz,
2094 0, 0, ubo, pushconsts, rsd, tsd);
2095
2096 util_dynarray_append(&batch->jobs, void *, job.cpu);
2097
2098 batch->blit.dst = dst->bo;
2099 panvk_per_arch(cmd_close_batch)(cmdbuf);
2100 }
2101
2102 void
panvk_per_arch(CmdUpdateBuffer)2103 panvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer,
2104 VkBuffer dstBuffer,
2105 VkDeviceSize dstOffset,
2106 VkDeviceSize dataSize,
2107 const void *pData)
2108 {
2109 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
2110 VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
2111
2112 panvk_meta_update_buf(cmdbuf, dst, dstOffset, dataSize, pData);
2113 }
2114
2115 void
panvk_per_arch(meta_copy_init)2116 panvk_per_arch(meta_copy_init)(struct panvk_physical_device *dev)
2117 {
2118 panvk_meta_copy_img2img_init(dev, false);
2119 panvk_meta_copy_img2img_init(dev, true);
2120 panvk_meta_copy_buf2img_init(dev);
2121 panvk_meta_copy_img2buf_init(dev);
2122 panvk_meta_copy_buf2buf_init(dev);
2123 panvk_meta_fill_buf_init(dev);
2124 }
2125