1 /*
2 * Copyright © 2016 Dave Airlie
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26
27 #include "nir/nir_builder.h"
28 #include "radv_meta.h"
29 #include "radv_private.h"
30 #include "sid.h"
31 #include "vk_format.h"
32
33 static nir_ssa_def *
radv_meta_build_resolve_srgb_conversion(nir_builder * b,nir_ssa_def * input)34 radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_ssa_def *input)
35 {
36 unsigned i;
37
38 nir_ssa_def *cmp[3];
39 for (i = 0; i < 3; i++)
40 cmp[i] = nir_flt(b, nir_channel(b, input, i), nir_imm_int(b, 0x3b4d2e1c));
41
42 nir_ssa_def *ltvals[3];
43 for (i = 0; i < 3; i++)
44 ltvals[i] = nir_fmul(b, nir_channel(b, input, i), nir_imm_float(b, 12.92));
45
46 nir_ssa_def *gtvals[3];
47
48 for (i = 0; i < 3; i++) {
49 gtvals[i] = nir_fpow(b, nir_channel(b, input, i), nir_imm_float(b, 1.0 / 2.4));
50 gtvals[i] = nir_fmul(b, gtvals[i], nir_imm_float(b, 1.055));
51 gtvals[i] = nir_fsub(b, gtvals[i], nir_imm_float(b, 0.055));
52 }
53
54 nir_ssa_def *comp[4];
55 for (i = 0; i < 3; i++)
56 comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
57 comp[3] = nir_channels(b, input, 1 << 3);
58 return nir_vec(b, comp, 4);
59 }
60
61 static nir_shader *
build_resolve_compute_shader(struct radv_device * dev,bool is_integer,bool is_srgb,int samples)62 build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
63 {
64 const struct glsl_type *sampler_type =
65 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT);
66 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
67 nir_builder b =
68 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_resolve_cs-%d-%s", samples,
69 is_integer ? "int" : (is_srgb ? "srgb" : "float"));
70 b.shader->info.workgroup_size[0] = 8;
71 b.shader->info.workgroup_size[1] = 8;
72 b.shader->info.workgroup_size[2] = 1;
73
74 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
75 input_img->data.descriptor_set = 0;
76 input_img->data.binding = 0;
77
78 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
79 output_img->data.descriptor_set = 0;
80 output_img->data.binding = 1;
81
82 nir_ssa_def *global_id = get_global_ids(&b, 2);
83
84 nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 16);
85 nir_ssa_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
86
87 nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset);
88 nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
89
90 nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
91
92 radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img, color, src_coord);
93
94 nir_ssa_def *outval = nir_load_var(&b, color);
95 if (is_srgb)
96 outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
97
98 nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0),
99 nir_channel(&b, dst_coord, 1),
100 nir_ssa_undef(&b, 1, 32),
101 nir_ssa_undef(&b, 1, 32));
102
103 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
104 nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
105 .image_dim = GLSL_SAMPLER_DIM_2D);
106 return b.shader;
107 }
108
109 enum {
110 DEPTH_RESOLVE,
111 STENCIL_RESOLVE,
112 };
113
114 static const char *
get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)115 get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
116 {
117 switch (resolve_mode) {
118 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
119 return "zero";
120 case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
121 return "average";
122 case VK_RESOLVE_MODE_MIN_BIT_KHR:
123 return "min";
124 case VK_RESOLVE_MODE_MAX_BIT_KHR:
125 return "max";
126 default:
127 unreachable("invalid resolve mode");
128 }
129 }
130
131 static nir_shader *
build_depth_stencil_resolve_compute_shader(struct radv_device * dev,int samples,int index,VkResolveModeFlagBits resolve_mode)132 build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, int index,
133 VkResolveModeFlagBits resolve_mode)
134 {
135 const struct glsl_type *sampler_type =
136 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, GLSL_TYPE_FLOAT);
137 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_FLOAT);
138
139 nir_builder b = nir_builder_init_simple_shader(
140 MESA_SHADER_COMPUTE, NULL, "meta_resolve_cs_%s-%s-%d",
141 index == DEPTH_RESOLVE ? "depth" : "stencil", get_resolve_mode_str(resolve_mode), samples);
142 b.shader->info.workgroup_size[0] = 8;
143 b.shader->info.workgroup_size[1] = 8;
144 b.shader->info.workgroup_size[2] = 1;
145
146 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
147 input_img->data.descriptor_set = 0;
148 input_img->data.binding = 0;
149
150 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
151 output_img->data.descriptor_set = 0;
152 output_img->data.binding = 1;
153
154 nir_ssa_def *img_coord = get_global_ids(&b, 3);
155
156 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
157
158 nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float32 : nir_type_uint32;
159
160 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
161 tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
162 tex->op = nir_texop_txf_ms;
163 tex->src[0].src_type = nir_tex_src_coord;
164 tex->src[0].src = nir_src_for_ssa(img_coord);
165 tex->src[1].src_type = nir_tex_src_ms_index;
166 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
167 tex->src[2].src_type = nir_tex_src_texture_deref;
168 tex->src[2].src = nir_src_for_ssa(input_img_deref);
169 tex->dest_type = type;
170 tex->is_array = true;
171 tex->coord_components = 3;
172
173 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
174 nir_builder_instr_insert(&b, &tex->instr);
175
176 nir_ssa_def *outval = &tex->dest.ssa;
177
178 if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
179 for (int i = 1; i < samples; i++) {
180 nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
181 tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
182 tex_add->op = nir_texop_txf_ms;
183 tex_add->src[0].src_type = nir_tex_src_coord;
184 tex_add->src[0].src = nir_src_for_ssa(img_coord);
185 tex_add->src[1].src_type = nir_tex_src_ms_index;
186 tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
187 tex_add->src[2].src_type = nir_tex_src_texture_deref;
188 tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
189 tex_add->dest_type = type;
190 tex_add->is_array = true;
191 tex_add->coord_components = 3;
192
193 nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
194 nir_builder_instr_insert(&b, &tex_add->instr);
195
196 switch (resolve_mode) {
197 case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
198 assert(index == DEPTH_RESOLVE);
199 outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
200 break;
201 case VK_RESOLVE_MODE_MIN_BIT_KHR:
202 if (index == DEPTH_RESOLVE)
203 outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
204 else
205 outval = nir_umin(&b, outval, &tex_add->dest.ssa);
206 break;
207 case VK_RESOLVE_MODE_MAX_BIT_KHR:
208 if (index == DEPTH_RESOLVE)
209 outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
210 else
211 outval = nir_umax(&b, outval, &tex_add->dest.ssa);
212 break;
213 default:
214 unreachable("invalid resolve mode");
215 }
216 }
217
218 if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
219 outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
220 }
221
222 nir_ssa_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
223 nir_channel(&b, img_coord, 2), nir_ssa_undef(&b, 1, 32));
224 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
225 nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
226 .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
227 return b.shader;
228 }
229
230 static VkResult
create_layout(struct radv_device * device)231 create_layout(struct radv_device *device)
232 {
233 VkResult result;
234 /*
235 * two descriptors one for the image being sampled
236 * one for the buffer being written.
237 */
238 VkDescriptorSetLayoutCreateInfo ds_create_info = {
239 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
240 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
241 .bindingCount = 2,
242 .pBindings = (VkDescriptorSetLayoutBinding[]){
243 {.binding = 0,
244 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
245 .descriptorCount = 1,
246 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
247 .pImmutableSamplers = NULL},
248 {.binding = 1,
249 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
250 .descriptorCount = 1,
251 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
252 .pImmutableSamplers = NULL},
253 }};
254
255 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
256 &device->meta_state.alloc,
257 &device->meta_state.resolve_compute.ds_layout);
258 if (result != VK_SUCCESS)
259 goto fail;
260
261 VkPipelineLayoutCreateInfo pl_create_info = {
262 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
263 .setLayoutCount = 1,
264 .pSetLayouts = &device->meta_state.resolve_compute.ds_layout,
265 .pushConstantRangeCount = 1,
266 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
267 };
268
269 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
270 &device->meta_state.alloc,
271 &device->meta_state.resolve_compute.p_layout);
272 if (result != VK_SUCCESS)
273 goto fail;
274 return VK_SUCCESS;
275 fail:
276 return result;
277 }
278
279 static VkResult
create_resolve_pipeline(struct radv_device * device,int samples,bool is_integer,bool is_srgb,VkPipeline * pipeline)280 create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb,
281 VkPipeline *pipeline)
282 {
283 VkResult result;
284
285 mtx_lock(&device->meta_state.mtx);
286 if (*pipeline) {
287 mtx_unlock(&device->meta_state.mtx);
288 return VK_SUCCESS;
289 }
290
291 nir_shader *cs = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
292
293 /* compute shader */
294
295 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
296 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
297 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
298 .module = vk_shader_module_handle_from_nir(cs),
299 .pName = "main",
300 .pSpecializationInfo = NULL,
301 };
302
303 VkComputePipelineCreateInfo vk_pipeline_info = {
304 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
305 .stage = pipeline_shader_stage,
306 .flags = 0,
307 .layout = device->meta_state.resolve_compute.p_layout,
308 };
309
310 result = radv_CreateComputePipelines(radv_device_to_handle(device),
311 radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
312 &vk_pipeline_info, NULL, pipeline);
313 if (result != VK_SUCCESS)
314 goto fail;
315
316 ralloc_free(cs);
317 mtx_unlock(&device->meta_state.mtx);
318 return VK_SUCCESS;
319 fail:
320 ralloc_free(cs);
321 mtx_unlock(&device->meta_state.mtx);
322 return result;
323 }
324
325 static VkResult
create_depth_stencil_resolve_pipeline(struct radv_device * device,int samples,int index,VkResolveModeFlagBits resolve_mode,VkPipeline * pipeline)326 create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, int index,
327 VkResolveModeFlagBits resolve_mode, VkPipeline *pipeline)
328 {
329 VkResult result;
330
331 mtx_lock(&device->meta_state.mtx);
332 if (*pipeline) {
333 mtx_unlock(&device->meta_state.mtx);
334 return VK_SUCCESS;
335 }
336
337 nir_shader *cs =
338 build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode);
339
340 /* compute shader */
341 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
342 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
343 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
344 .module = vk_shader_module_handle_from_nir(cs),
345 .pName = "main",
346 .pSpecializationInfo = NULL,
347 };
348
349 VkComputePipelineCreateInfo vk_pipeline_info = {
350 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
351 .stage = pipeline_shader_stage,
352 .flags = 0,
353 .layout = device->meta_state.resolve_compute.p_layout,
354 };
355
356 result = radv_CreateComputePipelines(radv_device_to_handle(device),
357 radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
358 &vk_pipeline_info, NULL, pipeline);
359 if (result != VK_SUCCESS)
360 goto fail;
361
362 ralloc_free(cs);
363 mtx_unlock(&device->meta_state.mtx);
364 return VK_SUCCESS;
365 fail:
366 ralloc_free(cs);
367 mtx_unlock(&device->meta_state.mtx);
368 return result;
369 }
370
371 VkResult
radv_device_init_meta_resolve_compute_state(struct radv_device * device,bool on_demand)372 radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
373 {
374 struct radv_meta_state *state = &device->meta_state;
375 VkResult res;
376
377 res = create_layout(device);
378 if (res != VK_SUCCESS)
379 goto fail;
380
381 if (on_demand)
382 return VK_SUCCESS;
383
384 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
385 uint32_t samples = 1 << i;
386
387 res = create_resolve_pipeline(device, samples, false, false,
388 &state->resolve_compute.rc[i].pipeline);
389 if (res != VK_SUCCESS)
390 goto fail;
391
392 res = create_resolve_pipeline(device, samples, true, false,
393 &state->resolve_compute.rc[i].i_pipeline);
394 if (res != VK_SUCCESS)
395 goto fail;
396
397 res = create_resolve_pipeline(device, samples, false, true,
398 &state->resolve_compute.rc[i].srgb_pipeline);
399 if (res != VK_SUCCESS)
400 goto fail;
401
402 res = create_depth_stencil_resolve_pipeline(
403 device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT_KHR,
404 &state->resolve_compute.depth[i].average_pipeline);
405 if (res != VK_SUCCESS)
406 goto fail;
407
408 res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
409 VK_RESOLVE_MODE_MAX_BIT_KHR,
410 &state->resolve_compute.depth[i].max_pipeline);
411 if (res != VK_SUCCESS)
412 goto fail;
413
414 res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
415 VK_RESOLVE_MODE_MIN_BIT_KHR,
416 &state->resolve_compute.depth[i].min_pipeline);
417 if (res != VK_SUCCESS)
418 goto fail;
419
420 res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
421 VK_RESOLVE_MODE_MAX_BIT_KHR,
422 &state->resolve_compute.stencil[i].max_pipeline);
423 if (res != VK_SUCCESS)
424 goto fail;
425
426 res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
427 VK_RESOLVE_MODE_MIN_BIT_KHR,
428 &state->resolve_compute.stencil[i].min_pipeline);
429 if (res != VK_SUCCESS)
430 goto fail;
431 }
432
433 res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE,
434 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
435 &state->resolve_compute.depth_zero_pipeline);
436 if (res != VK_SUCCESS)
437 goto fail;
438
439 res = create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE,
440 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
441 &state->resolve_compute.stencil_zero_pipeline);
442 if (res != VK_SUCCESS)
443 goto fail;
444
445 return VK_SUCCESS;
446 fail:
447 radv_device_finish_meta_resolve_compute_state(device);
448 return res;
449 }
450
451 void
radv_device_finish_meta_resolve_compute_state(struct radv_device * device)452 radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
453 {
454 struct radv_meta_state *state = &device->meta_state;
455 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
456 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline,
457 &state->alloc);
458
459 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline,
460 &state->alloc);
461
462 radv_DestroyPipeline(radv_device_to_handle(device),
463 state->resolve_compute.rc[i].srgb_pipeline, &state->alloc);
464
465 radv_DestroyPipeline(radv_device_to_handle(device),
466 state->resolve_compute.depth[i].average_pipeline, &state->alloc);
467
468 radv_DestroyPipeline(radv_device_to_handle(device),
469 state->resolve_compute.depth[i].max_pipeline, &state->alloc);
470
471 radv_DestroyPipeline(radv_device_to_handle(device),
472 state->resolve_compute.depth[i].min_pipeline, &state->alloc);
473
474 radv_DestroyPipeline(radv_device_to_handle(device),
475 state->resolve_compute.stencil[i].max_pipeline, &state->alloc);
476
477 radv_DestroyPipeline(radv_device_to_handle(device),
478 state->resolve_compute.stencil[i].min_pipeline, &state->alloc);
479 }
480
481 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline,
482 &state->alloc);
483
484 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline,
485 &state->alloc);
486
487 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->resolve_compute.ds_layout,
488 &state->alloc);
489 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout,
490 &state->alloc);
491 }
492
493 static VkPipeline *
radv_get_resolve_pipeline(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview)494 radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview)
495 {
496 struct radv_device *device = cmd_buffer->device;
497 struct radv_meta_state *state = &device->meta_state;
498 uint32_t samples = src_iview->image->info.samples;
499 uint32_t samples_log2 = ffs(samples) - 1;
500 VkPipeline *pipeline;
501
502 if (vk_format_is_int(src_iview->vk_format))
503 pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
504 else if (vk_format_is_srgb(src_iview->vk_format))
505 pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
506 else
507 pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
508
509 if (!*pipeline) {
510 VkResult ret;
511
512 ret = create_resolve_pipeline(device, samples, vk_format_is_int(src_iview->vk_format),
513 vk_format_is_srgb(src_iview->vk_format), pipeline);
514 if (ret != VK_SUCCESS) {
515 cmd_buffer->record_result = ret;
516 return NULL;
517 }
518 }
519
520 return pipeline;
521 }
522
523 static void
emit_resolve(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,struct radv_image_view * dest_iview,const VkOffset2D * src_offset,const VkOffset2D * dest_offset,const VkExtent2D * resolve_extent)524 emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
525 struct radv_image_view *dest_iview, const VkOffset2D *src_offset,
526 const VkOffset2D *dest_offset, const VkExtent2D *resolve_extent)
527 {
528 struct radv_device *device = cmd_buffer->device;
529 VkPipeline *pipeline;
530
531 radv_meta_push_descriptor_set(
532 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
533 0, /* set */
534 2, /* descriptorWriteCount */
535 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
536 .dstBinding = 0,
537 .dstArrayElement = 0,
538 .descriptorCount = 1,
539 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
540 .pImageInfo =
541 (VkDescriptorImageInfo[]){
542 {.sampler = VK_NULL_HANDLE,
543 .imageView = radv_image_view_to_handle(src_iview),
544 .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
545 }},
546 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
547 .dstBinding = 1,
548 .dstArrayElement = 0,
549 .descriptorCount = 1,
550 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
551 .pImageInfo = (VkDescriptorImageInfo[]){
552 {
553 .sampler = VK_NULL_HANDLE,
554 .imageView = radv_image_view_to_handle(dest_iview),
555 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
556 },
557 }}});
558
559 pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);
560
561 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
562 *pipeline);
563
564 unsigned push_constants[4] = {
565 src_offset->x,
566 src_offset->y,
567 dest_offset->x,
568 dest_offset->y,
569 };
570 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
571 device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
572 0, 16, push_constants);
573 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
574 }
575
576 static void
emit_depth_stencil_resolve(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,struct radv_image_view * dest_iview,const VkExtent3D * resolve_extent,VkImageAspectFlags aspects,VkResolveModeFlagBits resolve_mode)577 emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
578 struct radv_image_view *dest_iview, const VkExtent3D *resolve_extent,
579 VkImageAspectFlags aspects, VkResolveModeFlagBits resolve_mode)
580 {
581 struct radv_device *device = cmd_buffer->device;
582 const uint32_t samples = src_iview->image->info.samples;
583 const uint32_t samples_log2 = ffs(samples) - 1;
584 VkPipeline *pipeline;
585
586 radv_meta_push_descriptor_set(
587 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
588 0, /* set */
589 2, /* descriptorWriteCount */
590 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
591 .dstBinding = 0,
592 .dstArrayElement = 0,
593 .descriptorCount = 1,
594 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
595 .pImageInfo =
596 (VkDescriptorImageInfo[]){
597 {.sampler = VK_NULL_HANDLE,
598 .imageView = radv_image_view_to_handle(src_iview),
599 .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
600 }},
601 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
602 .dstBinding = 1,
603 .dstArrayElement = 0,
604 .descriptorCount = 1,
605 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
606 .pImageInfo = (VkDescriptorImageInfo[]){
607 {
608 .sampler = VK_NULL_HANDLE,
609 .imageView = radv_image_view_to_handle(dest_iview),
610 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
611 },
612 }}});
613
614 switch (resolve_mode) {
615 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
616 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
617 pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
618 else
619 pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
620 break;
621 case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
622 assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
623 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
624 break;
625 case VK_RESOLVE_MODE_MIN_BIT_KHR:
626 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
627 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
628 else
629 pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
630 break;
631 case VK_RESOLVE_MODE_MAX_BIT_KHR:
632 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
633 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
634 else
635 pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
636 break;
637 default:
638 unreachable("invalid resolve mode");
639 }
640
641 if (!*pipeline) {
642 int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
643 VkResult ret;
644
645 ret = create_depth_stencil_resolve_pipeline(device, samples, index, resolve_mode, pipeline);
646 if (ret != VK_SUCCESS) {
647 cmd_buffer->record_result = ret;
648 return;
649 }
650 }
651
652 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
653 *pipeline);
654
655 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height,
656 resolve_extent->depth);
657 }
658
659 void
radv_meta_resolve_compute_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * src_image,VkFormat src_format,VkImageLayout src_image_layout,struct radv_image * dest_image,VkFormat dest_format,VkImageLayout dest_image_layout,const VkImageResolve2KHR * region)660 radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
661 VkFormat src_format, VkImageLayout src_image_layout,
662 struct radv_image *dest_image, VkFormat dest_format,
663 VkImageLayout dest_image_layout, const VkImageResolve2KHR *region)
664 {
665 struct radv_meta_saved_state saved_state;
666
667 radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region);
668
669 /* For partial resolves, DCC should be decompressed before resolving
670 * because the metadata is re-initialized to the uncompressed after.
671 */
672 uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->queue_family_index,
673 cmd_buffer->queue_family_index);
674
675 if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&
676 radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel,
677 dest_image_layout, false, queue_mask) &&
678 (region->dstOffset.x || region->dstOffset.y || region->dstOffset.z ||
679 region->extent.width != dest_image->info.width ||
680 region->extent.height != dest_image->info.height ||
681 region->extent.depth != dest_image->info.depth)) {
682 radv_decompress_dcc(cmd_buffer, dest_image,
683 &(VkImageSubresourceRange){
684 .aspectMask = region->dstSubresource.aspectMask,
685 .baseMipLevel = region->dstSubresource.mipLevel,
686 .levelCount = 1,
687 .baseArrayLayer = region->dstSubresource.baseArrayLayer,
688 .layerCount = region->dstSubresource.layerCount,
689 });
690 }
691
692 radv_meta_save(
693 &saved_state, cmd_buffer,
694 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
695
696 assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
697 assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
698 assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
699
700 const uint32_t src_base_layer =
701 radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset);
702
703 const uint32_t dest_base_layer =
704 radv_meta_get_iview_layer(dest_image, ®ion->dstSubresource, ®ion->dstOffset);
705
706 const struct VkExtent3D extent = radv_sanitize_image_extent(src_image->type, region->extent);
707 const struct VkOffset3D srcOffset =
708 radv_sanitize_image_offset(src_image->type, region->srcOffset);
709 const struct VkOffset3D dstOffset =
710 radv_sanitize_image_offset(dest_image->type, region->dstOffset);
711
712 for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) {
713
714 struct radv_image_view src_iview;
715 radv_image_view_init(&src_iview, cmd_buffer->device,
716 &(VkImageViewCreateInfo){
717 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
718 .image = radv_image_to_handle(src_image),
719 .viewType = radv_meta_get_view_type(src_image),
720 .format = src_format,
721 .subresourceRange =
722 {
723 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
724 .baseMipLevel = region->srcSubresource.mipLevel,
725 .levelCount = 1,
726 .baseArrayLayer = src_base_layer + layer,
727 .layerCount = 1,
728 },
729 },
730 NULL);
731
732 struct radv_image_view dest_iview;
733 radv_image_view_init(&dest_iview, cmd_buffer->device,
734 &(VkImageViewCreateInfo){
735 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
736 .image = radv_image_to_handle(dest_image),
737 .viewType = radv_meta_get_view_type(dest_image),
738 .format = vk_to_non_srgb_format(dest_format),
739 .subresourceRange =
740 {
741 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
742 .baseMipLevel = region->dstSubresource.mipLevel,
743 .levelCount = 1,
744 .baseArrayLayer = dest_base_layer + layer,
745 .layerCount = 1,
746 },
747 },
748 NULL);
749
750 emit_resolve(cmd_buffer, &src_iview, &dest_iview, &(VkOffset2D){srcOffset.x, srcOffset.y},
751 &(VkOffset2D){dstOffset.x, dstOffset.y},
752 &(VkExtent2D){extent.width, extent.height});
753
754 radv_image_view_finish(&src_iview);
755 radv_image_view_finish(&dest_iview);
756 }
757
758 radv_meta_restore(&saved_state, cmd_buffer);
759
760 if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&
761 radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel,
762 dest_image_layout, false, queue_mask)) {
763
764 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
765
766 VkImageSubresourceRange range = {
767 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
768 .baseMipLevel = region->dstSubresource.mipLevel,
769 .levelCount = 1,
770 .baseArrayLayer = dest_base_layer,
771 .layerCount = region->dstSubresource.layerCount,
772 };
773
774 cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dest_image, &range, 0xffffffff);
775 }
776 }
777
778 /**
779 * Emit any needed resolves for the current subpass.
780 */
781 void
radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer * cmd_buffer)782 radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
783 {
784 struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
785 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
786 struct radv_subpass_barrier barrier;
787 uint32_t layer_count = fb->layers;
788
789 if (subpass->view_mask)
790 layer_count = util_last_bit(subpass->view_mask);
791
792 /* Resolves happen before the end-of-subpass barriers get executed, so
793 * we have to make the attachment shader-readable.
794 */
795 barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
796 barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
797 barrier.dst_access_mask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
798 radv_emit_subpass_barrier(cmd_buffer, &barrier);
799
800 for (uint32_t i = 0; i < subpass->color_count; ++i) {
801 struct radv_subpass_attachment src_att = subpass->color_attachments[i];
802 struct radv_subpass_attachment dst_att = subpass->resolve_attachments[i];
803
804 if (dst_att.attachment == VK_ATTACHMENT_UNUSED)
805 continue;
806
807 struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
808 struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
809
810 VkImageResolve2KHR region = {
811 .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR,
812 .extent = (VkExtent3D){fb->width, fb->height, 1},
813 .srcSubresource =
814 (VkImageSubresourceLayers){
815 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
816 .mipLevel = src_iview->base_mip,
817 .baseArrayLayer = src_iview->base_layer,
818 .layerCount = layer_count,
819 },
820 .dstSubresource =
821 (VkImageSubresourceLayers){
822 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
823 .mipLevel = dst_iview->base_mip,
824 .baseArrayLayer = dst_iview->base_layer,
825 .layerCount = layer_count,
826 },
827 .srcOffset = (VkOffset3D){0, 0, 0},
828 .dstOffset = (VkOffset3D){0, 0, 0},
829 };
830
831 radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk_format,
832 src_att.layout, dst_iview->image, dst_iview->vk_format,
833 dst_att.layout, ®ion);
834 }
835
836 cmd_buffer->state.flush_bits |=
837 RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
838 radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
839 }
840
841 void
radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspects,VkResolveModeFlagBits resolve_mode)842 radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
843 VkImageAspectFlags aspects,
844 VkResolveModeFlagBits resolve_mode)
845 {
846 struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
847 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
848 struct radv_meta_saved_state saved_state;
849 uint32_t layer_count = fb->layers;
850
851 if (subpass->view_mask)
852 layer_count = util_last_bit(subpass->view_mask);
853
854 /* Resolves happen before the end-of-subpass barriers get executed, so
855 * we have to make the attachment shader-readable.
856 */
857 cmd_buffer->state.flush_bits |=
858 radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
859 radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, NULL) |
860 radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
861
862 struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
863 struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
864 struct radv_image *src_image = src_iview->image;
865
866 VkImageResolve2KHR region = {0};
867 region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
868 region.srcSubresource.aspectMask = aspects;
869 region.srcSubresource.mipLevel = 0;
870 region.srcSubresource.baseArrayLayer = src_iview->base_layer;
871 region.srcSubresource.layerCount = layer_count;
872
873 radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, ®ion);
874
875 radv_meta_save(&saved_state, cmd_buffer,
876 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
877
878 struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
879 struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
880 struct radv_image *dst_image = dst_iview->image;
881
882 struct radv_image_view tsrc_iview;
883 radv_image_view_init(&tsrc_iview, cmd_buffer->device,
884 &(VkImageViewCreateInfo){
885 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
886 .image = radv_image_to_handle(src_image),
887 .viewType = radv_meta_get_view_type(src_image),
888 .format = src_iview->vk_format,
889 .subresourceRange =
890 {
891 .aspectMask = aspects,
892 .baseMipLevel = src_iview->base_mip,
893 .levelCount = 1,
894 .baseArrayLayer = src_iview->base_layer,
895 .layerCount = layer_count,
896 },
897 },
898 NULL);
899
900 struct radv_image_view tdst_iview;
901 radv_image_view_init(&tdst_iview, cmd_buffer->device,
902 &(VkImageViewCreateInfo){
903 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
904 .image = radv_image_to_handle(dst_image),
905 .viewType = radv_meta_get_view_type(dst_image),
906 .format = dst_iview->vk_format,
907 .subresourceRange =
908 {
909 .aspectMask = aspects,
910 .baseMipLevel = dst_iview->base_mip,
911 .levelCount = 1,
912 .baseArrayLayer = dst_iview->base_layer,
913 .layerCount = layer_count,
914 },
915 },
916 NULL);
917
918 emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
919 &(VkExtent3D){fb->width, fb->height, layer_count}, aspects,
920 resolve_mode);
921
922 cmd_buffer->state.flush_bits |=
923 RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
924 radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
925
926 VkImageLayout layout = cmd_buffer->state.attachments[dest_att.attachment].current_layout;
927 uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->queue_family_index,
928 cmd_buffer->queue_family_index);
929
930 if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, layout, false, queue_mask)) {
931 VkImageSubresourceRange range = {0};
932 range.aspectMask = aspects;
933 range.baseMipLevel = dst_iview->base_mip;
934 range.levelCount = 1;
935 range.baseArrayLayer = dst_iview->base_layer;
936 range.layerCount = layer_count;
937
938 uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, dst_image);
939
940 cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, dst_image, &range, htile_value);
941 }
942
943 radv_image_view_finish(&tsrc_iview);
944 radv_image_view_finish(&tdst_iview);
945
946 radv_meta_restore(&saved_state, cmd_buffer);
947 }
948