1 /*
2  * Copyright © 2021 Google
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #define AC_SURFACE_INCLUDE_NIR
25 #include "ac_surface.h"
26 
27 #include "radv_meta.h"
28 #include "radv_private.h"
29 
30 static nir_shader *
build_dcc_retile_compute_shader(struct radv_device * dev,struct radeon_surf * surf)31 build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *surf)
32 {
33    enum glsl_sampler_dim dim = GLSL_SAMPLER_DIM_BUF;
34    const struct glsl_type *buf_type = glsl_image_type(dim, false, GLSL_TYPE_UINT);
35    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_retile_compute");
36 
37    b.shader->info.workgroup_size[0] = 8;
38    b.shader->info.workgroup_size[1] = 8;
39    b.shader->info.workgroup_size[2] = 1;
40 
41    nir_ssa_def *src_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
42    nir_ssa_def *src_dcc_pitch = nir_channels(&b, src_dcc_size, 1);
43    nir_ssa_def *src_dcc_height = nir_channels(&b, src_dcc_size, 2);
44 
45    nir_ssa_def *dst_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8);
46    nir_ssa_def *dst_dcc_pitch = nir_channels(&b, dst_dcc_size, 1);
47    nir_ssa_def *dst_dcc_height = nir_channels(&b, dst_dcc_size, 2);
48    nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_in");
49    input_dcc->data.descriptor_set = 0;
50    input_dcc->data.binding = 0;
51    nir_variable *output_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_out");
52    output_dcc->data.descriptor_set = 0;
53    output_dcc->data.binding = 1;
54 
55    nir_ssa_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa;
56    nir_ssa_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->dest.ssa;
57 
58    nir_ssa_def *coord = get_global_ids(&b, 2);
59    nir_ssa_def *zero = nir_imm_int(&b, 0);
60    coord = nir_imul(
61       &b, coord,
62       nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height));
63 
64    nir_ssa_def *src = ac_nir_dcc_addr_from_coord(&b, &dev->physical_device->rad_info, surf->bpe,
65                                                  &surf->u.gfx9.color.dcc_equation, src_dcc_pitch,
66                                                  src_dcc_height, zero, nir_channel(&b, coord, 0),
67                                                  nir_channel(&b, coord, 1), zero, zero, zero);
68    nir_ssa_def *dst = ac_nir_dcc_addr_from_coord(
69       &b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation,
70       dst_dcc_pitch, dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
71       zero, zero, zero);
72 
73    nir_ssa_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref,
74                                                nir_vec4(&b, src, src, src, src),
75                                                nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0),
76                                                .image_dim = dim);
77 
78    nir_image_deref_store(&b, output_dcc_ref, nir_vec4(&b, dst, dst, dst, dst),
79                          nir_ssa_undef(&b, 1, 32), dcc_val, nir_imm_int(&b, 0), .image_dim = dim);
80 
81    return b.shader;
82 }
83 
84 void
radv_device_finish_meta_dcc_retile_state(struct radv_device * device)85 radv_device_finish_meta_dcc_retile_state(struct radv_device *device)
86 {
87    struct radv_meta_state *state = &device->meta_state;
88 
89    for (unsigned i = 0; i < ARRAY_SIZE(state->dcc_retile.pipeline); i++) {
90       radv_DestroyPipeline(radv_device_to_handle(device), state->dcc_retile.pipeline[i],
91                            &state->alloc);
92    }
93    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->dcc_retile.p_layout,
94                               &state->alloc);
95    radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->dcc_retile.ds_layout,
96                                    &state->alloc);
97 
98    /* Reset for next finish. */
99    memset(&state->dcc_retile, 0, sizeof(state->dcc_retile));
100 }
101 
102 /*
103  * This take a surface, but the only things used are:
104  * - BPE
105  * - DCC equations
106  * - DCC block size
107  *
108  * BPE is always 4 at the moment and the rest is derived from the tilemode.
109  */
110 static VkResult
radv_device_init_meta_dcc_retile_state(struct radv_device * device,struct radeon_surf * surf)111 radv_device_init_meta_dcc_retile_state(struct radv_device *device, struct radeon_surf *surf)
112 {
113    VkResult result = VK_SUCCESS;
114    nir_shader *cs = build_dcc_retile_compute_shader(device, surf);
115 
116    VkDescriptorSetLayoutCreateInfo ds_create_info = {
117       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
118       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
119       .bindingCount = 2,
120       .pBindings = (VkDescriptorSetLayoutBinding[]){
121          {.binding = 0,
122           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
123           .descriptorCount = 1,
124           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
125           .pImmutableSamplers = NULL},
126          {.binding = 1,
127           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
128           .descriptorCount = 1,
129           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
130           .pImmutableSamplers = NULL},
131       }};
132 
133    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
134                                            &device->meta_state.alloc,
135                                            &device->meta_state.dcc_retile.ds_layout);
136    if (result != VK_SUCCESS)
137       goto cleanup;
138 
139    VkPipelineLayoutCreateInfo pl_create_info = {
140       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
141       .setLayoutCount = 1,
142       .pSetLayouts = &device->meta_state.dcc_retile.ds_layout,
143       .pushConstantRangeCount = 1,
144       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
145    };
146 
147    result =
148       radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
149                                 &device->meta_state.alloc, &device->meta_state.dcc_retile.p_layout);
150    if (result != VK_SUCCESS)
151       goto cleanup;
152 
153    /* compute shader */
154 
155    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
156       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
157       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
158       .module = vk_shader_module_handle_from_nir(cs),
159       .pName = "main",
160       .pSpecializationInfo = NULL,
161    };
162 
163    VkComputePipelineCreateInfo vk_pipeline_info = {
164       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
165       .stage = pipeline_shader_stage,
166       .flags = 0,
167       .layout = device->meta_state.dcc_retile.p_layout,
168    };
169 
170    result = radv_CreateComputePipelines(
171       radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
172       &vk_pipeline_info, NULL, &device->meta_state.dcc_retile.pipeline[surf->u.gfx9.swizzle_mode]);
173    if (result != VK_SUCCESS)
174       goto cleanup;
175 
176 cleanup:
177    if (result != VK_SUCCESS)
178       radv_device_finish_meta_dcc_retile_state(device);
179    ralloc_free(cs);
180    return result;
181 }
182 
183 void
radv_retile_dcc(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image)184 radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image)
185 {
186    struct radv_meta_saved_state saved_state;
187    struct radv_device *device = cmd_buffer->device;
188    struct radv_buffer buffer;
189 
190    assert(image->type == VK_IMAGE_TYPE_2D);
191    assert(image->info.array_size == 1 && image->info.levels == 1);
192 
193    struct radv_cmd_state *state = &cmd_buffer->state;
194 
195    state->flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, image) |
196                         radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
197 
198    unsigned swizzle_mode = image->planes[0].surface.u.gfx9.swizzle_mode;
199 
200    /* Compile pipelines if not already done so. */
201    if (!cmd_buffer->device->meta_state.dcc_retile.pipeline[swizzle_mode]) {
202       VkResult ret =
203          radv_device_init_meta_dcc_retile_state(cmd_buffer->device, &image->planes[0].surface);
204       if (ret != VK_SUCCESS) {
205          cmd_buffer->record_result = ret;
206          return;
207       }
208    }
209 
210    radv_meta_save(
211       &saved_state, cmd_buffer,
212       RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);
213 
214    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
215                         device->meta_state.dcc_retile.pipeline[swizzle_mode]);
216 
217    radv_buffer_init(&buffer, device, image->bo, image->size, image->offset);
218 
219    struct radv_buffer_view views[2];
220    VkBufferView view_handles[2];
221    radv_buffer_view_init(views, cmd_buffer->device,
222                          &(VkBufferViewCreateInfo){
223                             .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
224                             .buffer = radv_buffer_to_handle(&buffer),
225                             .offset = image->planes[0].surface.meta_offset,
226                             .range = image->planes[0].surface.meta_size,
227                             .format = VK_FORMAT_R8_UINT,
228                          });
229    radv_buffer_view_init(views + 1, cmd_buffer->device,
230                          &(VkBufferViewCreateInfo){
231                             .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
232                             .buffer = radv_buffer_to_handle(&buffer),
233                             .offset = image->planes[0].surface.display_dcc_offset,
234                             .range = image->planes[0].surface.u.gfx9.color.display_dcc_size,
235                             .format = VK_FORMAT_R8_UINT,
236                          });
237    for (unsigned i = 0; i < 2; ++i)
238       view_handles[i] = radv_buffer_view_to_handle(&views[i]);
239 
240    radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
241                                  device->meta_state.dcc_retile.p_layout, 0, /* set */
242                                  2, /* descriptorWriteCount */
243                                  (VkWriteDescriptorSet[]){
244                                     {
245                                        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
246                                        .dstBinding = 0,
247                                        .dstArrayElement = 0,
248                                        .descriptorCount = 1,
249                                        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
250                                        .pTexelBufferView = &view_handles[0],
251                                     },
252                                     {
253                                        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
254                                        .dstBinding = 1,
255                                        .dstArrayElement = 0,
256                                        .descriptorCount = 1,
257                                        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
258                                        .pTexelBufferView = &view_handles[1],
259                                     },
260                                  });
261 
262    unsigned width = DIV_ROUND_UP(image->info.width, vk_format_get_blockwidth(image->vk_format));
263    unsigned height = DIV_ROUND_UP(image->info.height, vk_format_get_blockheight(image->vk_format));
264 
265    unsigned dcc_width = DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width);
266    unsigned dcc_height =
267       DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height);
268 
269    uint32_t constants[] = {
270       image->planes[0].surface.u.gfx9.color.dcc_pitch_max + 1,
271       image->planes[0].surface.u.gfx9.color.dcc_height,
272       image->planes[0].surface.u.gfx9.color.display_dcc_pitch_max + 1,
273       image->planes[0].surface.u.gfx9.color.display_dcc_height,
274    };
275    radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
276                          device->meta_state.dcc_retile.p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
277                          constants);
278 
279    radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, 1);
280 
281    radv_buffer_view_finish(views);
282    radv_buffer_view_finish(views + 1);
283    radv_buffer_finish(&buffer);
284 
285    radv_meta_restore(&saved_state, cmd_buffer);
286 
287    state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
288                         radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
289 }
290