1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 #include "nir/nir_builder.h"
24 #include "radv_meta.h"
25 
26 static nir_shader *
build_fmask_copy_compute_shader(struct radv_device * dev,int samples)27 build_fmask_copy_compute_shader(struct radv_device *dev, int samples)
28 {
29    const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT);
30    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, false, GLSL_TYPE_FLOAT);
31 
32    nir_builder b = radv_meta_init_shader(MESA_SHADER_COMPUTE, "meta_fmask_copy_cs_-%d", samples);
33 
34    b.shader->info.workgroup_size[0] = 8;
35    b.shader->info.workgroup_size[1] = 8;
36 
37    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
38    input_img->data.descriptor_set = 0;
39    input_img->data.binding = 0;
40 
41    nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
42    output_img->data.descriptor_set = 0;
43    output_img->data.binding = 1;
44 
45    nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
46    nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
47    nir_ssa_def *block_size =
48       nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
49                     b.shader->info.workgroup_size[2]);
50 
51    nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
52 
53    /* Get coordinates. */
54    nir_ssa_def *src_coord = nir_channels(&b, global_id, 0x3);
55    nir_ssa_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0),
56                                          nir_channel(&b, src_coord, 1),
57                                          nir_ssa_undef(&b, 1, 32),
58                                          nir_ssa_undef(&b, 1, 32));
59 
60    nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
61 
62    /* Fetch the mask for this fragment. */
63    nir_tex_instr *frag_mask_fetch = nir_tex_instr_create(b.shader, 3);
64    frag_mask_fetch->sampler_dim = GLSL_SAMPLER_DIM_MS;
65    frag_mask_fetch->op = nir_texop_fragment_mask_fetch_amd;
66    frag_mask_fetch->src[0].src_type = nir_tex_src_coord;
67    frag_mask_fetch->src[0].src = nir_src_for_ssa(src_coord);
68    frag_mask_fetch->src[1].src_type = nir_tex_src_lod;
69    frag_mask_fetch->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
70    frag_mask_fetch->src[2].src_type = nir_tex_src_texture_deref;
71    frag_mask_fetch->src[2].src = nir_src_for_ssa(input_img_deref);
72    frag_mask_fetch->dest_type = nir_type_uint32;
73    frag_mask_fetch->is_array = false;
74    frag_mask_fetch->coord_components = 2;
75 
76    nir_ssa_dest_init(&frag_mask_fetch->instr, &frag_mask_fetch->dest, 1, 32, "frag_mask_fetch");
77    nir_builder_instr_insert(&b, &frag_mask_fetch->instr);
78 
79    nir_ssa_def *frag_mask = &frag_mask_fetch->dest.ssa;
80 
81    /* Get the maximum sample used in this fragment. */
82    nir_ssa_def *max_sample_index = nir_imm_int(&b, 0);
83    for (uint32_t s = 0; s < samples; s++) {
84       /* max_sample_index = MAX2(max_sample_index, (frag_mask >> (s * 4)) & 0xf) */
85       max_sample_index = nir_umax(&b, max_sample_index,
86                               nir_ubitfield_extract(&b, frag_mask, nir_imm_int(&b, 4 * s),
87                                                     nir_imm_int(&b, 4)));
88    }
89 
90    nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter");
91    nir_store_var(&b, counter, nir_imm_int(&b, 0), 0x1);
92 
93    nir_loop *loop = nir_push_loop(&b);
94    {
95       nir_ssa_def *sample_id = nir_load_var(&b, counter);
96 
97       nir_tex_instr *frag_fetch = nir_tex_instr_create(b.shader, 4);
98       frag_fetch->sampler_dim = GLSL_SAMPLER_DIM_MS;
99       frag_fetch->op = nir_texop_fragment_fetch_amd;
100       frag_fetch->src[0].src_type = nir_tex_src_coord;
101       frag_fetch->src[0].src = nir_src_for_ssa(src_coord);
102       frag_fetch->src[1].src_type = nir_tex_src_lod;
103       frag_fetch->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
104       frag_fetch->src[2].src_type = nir_tex_src_texture_deref;
105       frag_fetch->src[2].src = nir_src_for_ssa(input_img_deref);
106       frag_fetch->src[3].src_type = nir_tex_src_ms_index;
107       frag_fetch->src[3].src = nir_src_for_ssa(sample_id);
108       frag_fetch->dest_type = nir_type_uint32;
109       frag_fetch->is_array = false;
110       frag_fetch->coord_components = 2;
111 
112       nir_ssa_dest_init(&frag_fetch->instr, &frag_fetch->dest, 4, 32, "frag_fetch");
113       nir_builder_instr_insert(&b, &frag_fetch->instr);
114 
115       nir_ssa_def *outval = &frag_fetch->dest.ssa;
116       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord,
117                             sample_id, outval, nir_imm_int(&b, 0),
118                             .image_dim = GLSL_SAMPLER_DIM_MS);
119 
120       radv_break_on_count(&b, counter, max_sample_index);
121    }
122    nir_pop_loop(&b, loop);
123 
124    return b.shader;
125 }
126 
127 void
radv_device_finish_meta_fmask_copy_state(struct radv_device * device)128 radv_device_finish_meta_fmask_copy_state(struct radv_device *device)
129 {
130    struct radv_meta_state *state = &device->meta_state;
131 
132    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fmask_copy.p_layout,
133                               &state->alloc);
134    radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->fmask_copy.ds_layout,
135                                    &state->alloc);
136 
137    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
138       radv_DestroyPipeline(radv_device_to_handle(device), state->fmask_copy.pipeline[i], &state->alloc);
139    }
140 }
141 
142 static VkResult
create_fmask_copy_pipeline(struct radv_device * device,int samples,VkPipeline * pipeline)143 create_fmask_copy_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
144 {
145    struct radv_meta_state *state = &device->meta_state;
146    nir_shader *cs = build_fmask_copy_compute_shader(device, samples);
147    VkResult result;
148 
149    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
150       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
151       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
152       .module = vk_shader_module_handle_from_nir(cs),
153       .pName = "main",
154       .pSpecializationInfo = NULL,
155    };
156 
157    VkComputePipelineCreateInfo vk_pipeline_info = {
158       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
159       .stage = pipeline_shader_stage,
160       .flags = 0,
161       .layout = state->fmask_copy.p_layout,
162    };
163 
164    result = radv_CreateComputePipelines(radv_device_to_handle(device),
165                                         radv_pipeline_cache_to_handle(&state->cache), 1,
166                                         &vk_pipeline_info, NULL, pipeline);
167    ralloc_free(cs);
168    return result;
169 }
170 
171 VkResult
radv_device_init_meta_fmask_copy_state(struct radv_device * device)172 radv_device_init_meta_fmask_copy_state(struct radv_device *device)
173 {
174    VkResult result;
175 
176    VkDescriptorSetLayoutCreateInfo ds_create_info = {
177       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
178       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
179       .bindingCount = 2,
180       .pBindings = (VkDescriptorSetLayoutBinding[]){
181          {.binding = 0,
182           .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
183           .descriptorCount = 1,
184           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
185           .pImmutableSamplers = NULL},
186          {.binding = 1,
187           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
188           .descriptorCount = 1,
189           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
190           .pImmutableSamplers = NULL},
191       }};
192 
193    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
194                                            &device->meta_state.alloc,
195                                            &device->meta_state.fmask_copy.ds_layout);
196    if (result != VK_SUCCESS)
197       goto fail;
198 
199    VkPipelineLayoutCreateInfo pl_create_info = {
200       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
201       .setLayoutCount = 1,
202       .pSetLayouts = &device->meta_state.fmask_copy.ds_layout,
203       .pushConstantRangeCount = 0,
204       .pPushConstantRanges = NULL
205    };
206 
207    result =
208       radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
209                                 &device->meta_state.alloc, &device->meta_state.fmask_copy.p_layout);
210    if (result != VK_SUCCESS)
211       goto fail;
212 
213    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
214       uint32_t samples = 1 << i;
215       result = create_fmask_copy_pipeline(device, samples, &device->meta_state.fmask_copy.pipeline[i]);
216       if (result != VK_SUCCESS)
217          goto fail;
218    }
219 
220    return VK_SUCCESS;
221 fail:
222    radv_device_finish_meta_fmask_copy_state(device);
223    return result;
224 }
225 
226 static void
radv_fixup_copy_dst_metadata(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * src_image,const struct radv_image * dst_image)227 radv_fixup_copy_dst_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
228                              const struct radv_image *dst_image)
229 {
230    uint64_t src_offset, dst_offset, size;
231 
232    assert(src_image->planes[0].surface.cmask_size == dst_image->planes[0].surface.cmask_size &&
233           src_image->planes[0].surface.fmask_size == dst_image->planes[0].surface.fmask_size);
234    assert(src_image->planes[0].surface.fmask_offset + src_image->planes[0].surface.fmask_size ==
235           src_image->planes[0].surface.cmask_offset &&
236           dst_image->planes[0].surface.fmask_offset + dst_image->planes[0].surface.fmask_size ==
237           dst_image->planes[0].surface.cmask_offset);
238 
239    /* Copy CMASK+FMASK. */
240    size = src_image->planes[0].surface.cmask_size + src_image->planes[0].surface.fmask_size;
241    src_offset = src_image->offset + src_image->planes[0].surface.fmask_offset;
242    dst_offset = dst_image->offset + dst_image->planes[0].surface.fmask_offset;
243 
244    radv_copy_buffer(cmd_buffer, src_image->bo, dst_image->bo, src_offset, dst_offset, size);
245 }
246 
247 bool
radv_can_use_fmask_copy(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * src_image,const struct radv_image * dst_image,unsigned num_rects,const struct radv_meta_blit2d_rect * rects)248 radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer,
249                         const struct radv_image *src_image, const struct radv_image *dst_image,
250                         unsigned num_rects, const struct radv_meta_blit2d_rect *rects)
251 {
252    /* TODO: Test on pre GFX10 chips. */
253    if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX10)
254       return false;
255 
256    /* TODO: Add support for layers. */
257    if (src_image->info.array_size != 1 || dst_image->info.array_size != 1)
258       return false;
259 
260    /* Source/destination images must have FMASK. */
261    if (!radv_image_has_fmask(src_image) || !radv_image_has_fmask(dst_image))
262       return false;
263 
264    /* Source/destination images must have identical TC-compat mode. */
265    if (radv_image_is_tc_compat_cmask(src_image) != radv_image_is_tc_compat_cmask(dst_image))
266       return false;
267 
268    /* The region must be a whole image copy. */
269    if (num_rects != 1 ||
270        (rects[0].src_x || rects[0].src_y || rects[0].dst_x || rects[0].dst_y ||
271         rects[0].width != src_image->info.width || rects[0].height != src_image->info.height))
272       return false;
273 
274    /* Source/destination images must have identical size. */
275    if (src_image->info.width != dst_image->info.width ||
276        src_image->info.height != dst_image->info.height)
277       return false;
278 
279    /* Source/destination images must have identical swizzle. */
280    if (src_image->planes[0].surface.fmask_tile_swizzle !=
281        dst_image->planes[0].surface.fmask_tile_swizzle ||
282        src_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode !=
283        dst_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode)
284       return false;
285 
286    return true;
287 }
288 
289 void
radv_fmask_copy(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst)290 radv_fmask_copy(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
291                 struct radv_meta_blit2d_surf *dst)
292 {
293    struct radv_device *device = cmd_buffer->device;
294    struct radv_image_view src_iview, dst_iview;
295    uint32_t samples = src->image->info.samples;
296    uint32_t samples_log2 = ffs(samples) - 1;
297 
298    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
299                         cmd_buffer->device->meta_state.fmask_copy.pipeline[samples_log2]);
300 
301    radv_image_view_init(&src_iview, device,
302                         &(VkImageViewCreateInfo){
303                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
304                            .image = radv_image_to_handle(src->image),
305                            .viewType = radv_meta_get_view_type(src->image),
306                            .format = vk_format_no_srgb(src->image->vk_format),
307                            .subresourceRange =
308                               {
309                                  .aspectMask = src->aspect_mask,
310                                  .baseMipLevel = 0,
311                                  .levelCount = 1,
312                                  .baseArrayLayer = 0,
313                                  .layerCount = 1,
314                               },
315                         },
316                         NULL);
317 
318    radv_image_view_init(&dst_iview, device,
319                         &(VkImageViewCreateInfo){
320                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
321                            .image = radv_image_to_handle(dst->image),
322                            .viewType = radv_meta_get_view_type(dst->image),
323                            .format = vk_format_no_srgb(dst->image->vk_format),
324                            .subresourceRange =
325                               {
326                                  .aspectMask = dst->aspect_mask,
327                                  .baseMipLevel = 0,
328                                  .levelCount = 1,
329                                  .baseArrayLayer = 0,
330                                  .layerCount = 1,
331                               },
332                         },
333                         NULL);
334 
335    radv_meta_push_descriptor_set(
336       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
337       cmd_buffer->device->meta_state.fmask_copy.p_layout, 0, /* set */
338       2,                                                     /* descriptorWriteCount */
339       (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
340                                 .dstBinding = 0,
341                                 .dstArrayElement = 0,
342                                 .descriptorCount = 1,
343                                 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
344                                 .pImageInfo =
345                                    (VkDescriptorImageInfo[]){
346                                       {.sampler = VK_NULL_HANDLE,
347                                        .imageView = radv_image_view_to_handle(&src_iview),
348                                        .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
349                                    }},
350                                {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
351                                 .dstBinding = 1,
352                                 .dstArrayElement = 0,
353                                 .descriptorCount = 1,
354                                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
355                                 .pImageInfo = (VkDescriptorImageInfo[]){
356                                    {.sampler = VK_NULL_HANDLE,
357                                     .imageView = radv_image_view_to_handle(&dst_iview),
358                                     .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
359                                 }}});
360 
361    radv_unaligned_dispatch(cmd_buffer, src->image->info.width, src->image->info.height, 1);
362 
363    /* Fixup destination image metadata by copying CMASK/FMASK from the source image. */
364    radv_fixup_copy_dst_metadata(cmd_buffer, src->image, dst->image);
365 }
366