1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 
27 #include "radv_meta.h"
28 #include "radv_private.h"
29 #include "sid.h"
30 
31 enum radv_depth_op {
32    DEPTH_DECOMPRESS,
33    DEPTH_RESUMMARIZE,
34 };
35 
36 static nir_shader *
build_expand_depth_stencil_compute_shader(struct radv_device * dev)37 build_expand_depth_stencil_compute_shader(struct radv_device *dev)
38 {
39    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
40 
41    nir_builder b =
42       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "expand_depth_stencil_compute");
43 
44    /* We need at least 8/8/1 to cover an entire HTILE block in a single workgroup. */
45    b.shader->info.workgroup_size[0] = 8;
46    b.shader->info.workgroup_size[1] = 8;
47    b.shader->info.workgroup_size[2] = 1;
48    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "in_img");
49    input_img->data.descriptor_set = 0;
50    input_img->data.binding = 0;
51 
52    nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
53    output_img->data.descriptor_set = 0;
54    output_img->data.binding = 1;
55 
56    nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
57    nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
58    nir_ssa_def *block_size =
59       nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
60                     b.shader->info.workgroup_size[2], 0);
61 
62    nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
63 
64    nir_ssa_def *data = nir_image_deref_load(
65       &b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id, nir_ssa_undef(&b, 1, 32),
66       nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
67 
68    /* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid
69     * creating a vmcnt(0) because it expects the L1 cache to keep memory
70     * operations in-order for the same workgroup. The vmcnt(0) seems
71     * necessary however. */
72    nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE,
73                       .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
74 
75    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id,
76                          nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0),
77                          .image_dim = GLSL_SAMPLER_DIM_2D);
78    return b.shader;
79 }
80 
81 static VkResult
create_expand_depth_stencil_compute(struct radv_device * device)82 create_expand_depth_stencil_compute(struct radv_device *device)
83 {
84    VkResult result = VK_SUCCESS;
85    nir_shader *cs = build_expand_depth_stencil_compute_shader(device);
86 
87    VkDescriptorSetLayoutCreateInfo ds_create_info = {
88       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
89       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
90       .bindingCount = 2,
91       .pBindings = (VkDescriptorSetLayoutBinding[]){
92          {.binding = 0,
93           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
94           .descriptorCount = 1,
95           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
96           .pImmutableSamplers = NULL},
97          {.binding = 1,
98           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
99           .descriptorCount = 1,
100           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
101           .pImmutableSamplers = NULL},
102       }};
103 
104    result = radv_CreateDescriptorSetLayout(
105       radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
106       &device->meta_state.expand_depth_stencil_compute_ds_layout);
107    if (result != VK_SUCCESS)
108       goto cleanup;
109 
110    VkPipelineLayoutCreateInfo pl_create_info = {
111       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
112       .setLayoutCount = 1,
113       .pSetLayouts = &device->meta_state.expand_depth_stencil_compute_ds_layout,
114       .pushConstantRangeCount = 0,
115       .pPushConstantRanges = NULL,
116    };
117 
118    result = radv_CreatePipelineLayout(
119       radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
120       &device->meta_state.expand_depth_stencil_compute_p_layout);
121    if (result != VK_SUCCESS)
122       goto cleanup;
123 
124    /* compute shader */
125 
126    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
127       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
128       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
129       .module = vk_shader_module_handle_from_nir(cs),
130       .pName = "main",
131       .pSpecializationInfo = NULL,
132    };
133 
134    VkComputePipelineCreateInfo vk_pipeline_info = {
135       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
136       .stage = pipeline_shader_stage,
137       .flags = 0,
138       .layout = device->meta_state.expand_depth_stencil_compute_p_layout,
139    };
140 
141    result = radv_CreateComputePipelines(
142       radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
143       &vk_pipeline_info, NULL,
144       &device->meta_state.expand_depth_stencil_compute_pipeline);
145    if (result != VK_SUCCESS)
146       goto cleanup;
147 
148 cleanup:
149    ralloc_free(cs);
150    return result;
151 }
152 
153 static VkResult
create_pass(struct radv_device * device,uint32_t samples,VkRenderPass * pass)154 create_pass(struct radv_device *device, uint32_t samples, VkRenderPass *pass)
155 {
156    VkResult result;
157    VkDevice device_h = radv_device_to_handle(device);
158    const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
159    VkAttachmentDescription2 attachment;
160 
161    attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
162    attachment.pNext = NULL;
163    attachment.flags = 0;
164    attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
165    attachment.samples = samples;
166    attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
167    attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
168    attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
169    attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
170    attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
171    attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
172 
173    result = radv_CreateRenderPass2(
174       device_h,
175       &(VkRenderPassCreateInfo2){
176          .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
177          .attachmentCount = 1,
178          .pAttachments = &attachment,
179          .subpassCount = 1,
180          .pSubpasses =
181             &(VkSubpassDescription2){
182                .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
183                .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
184                .inputAttachmentCount = 0,
185                .colorAttachmentCount = 0,
186                .pColorAttachments = NULL,
187                .pResolveAttachments = NULL,
188                .pDepthStencilAttachment =
189                   &(VkAttachmentReference2){
190                      .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
191                      .attachment = 0,
192                      .layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
193                   },
194                .preserveAttachmentCount = 0,
195                .pPreserveAttachments = NULL,
196             },
197          .dependencyCount = 2,
198          .pDependencies =
199             (VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
200                                       .srcSubpass = VK_SUBPASS_EXTERNAL,
201                                       .dstSubpass = 0,
202                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
203                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
204                                       .srcAccessMask = 0,
205                                       .dstAccessMask = 0,
206                                       .dependencyFlags = 0},
207                                      {.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
208                                       .srcSubpass = 0,
209                                       .dstSubpass = VK_SUBPASS_EXTERNAL,
210                                       .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
211                                       .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
212                                       .srcAccessMask = 0,
213                                       .dstAccessMask = 0,
214                                       .dependencyFlags = 0}},
215       },
216       alloc, pass);
217 
218    return result;
219 }
220 
221 static VkResult
create_pipeline_layout(struct radv_device * device,VkPipelineLayout * layout)222 create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
223 {
224    VkPipelineLayoutCreateInfo pl_create_info = {
225       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
226       .setLayoutCount = 0,
227       .pSetLayouts = NULL,
228       .pushConstantRangeCount = 0,
229       .pPushConstantRanges = NULL,
230    };
231 
232    return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
233                                     &device->meta_state.alloc, layout);
234 }
235 
236 static VkResult
create_pipeline(struct radv_device * device,uint32_t samples,VkRenderPass pass,VkPipelineLayout layout,enum radv_depth_op op,VkPipeline * pipeline)237 create_pipeline(struct radv_device *device, uint32_t samples, VkRenderPass pass,
238                 VkPipelineLayout layout, enum radv_depth_op op, VkPipeline *pipeline)
239 {
240    VkResult result;
241    VkDevice device_h = radv_device_to_handle(device);
242 
243    mtx_lock(&device->meta_state.mtx);
244    if (*pipeline) {
245       mtx_unlock(&device->meta_state.mtx);
246       return VK_SUCCESS;
247    }
248 
249    nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
250    nir_shader *fs_module = radv_meta_build_nir_fs_noop();
251 
252    if (!vs_module || !fs_module) {
253       /* XXX: Need more accurate error */
254       result = VK_ERROR_OUT_OF_HOST_MEMORY;
255       goto cleanup;
256    }
257 
258    const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
259       .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
260       .sampleLocationsEnable = false,
261    };
262 
263    const VkGraphicsPipelineCreateInfo pipeline_create_info = {
264       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
265       .stageCount = 2,
266       .pStages =
267          (VkPipelineShaderStageCreateInfo[]){
268             {
269                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
270                .stage = VK_SHADER_STAGE_VERTEX_BIT,
271                .module = vk_shader_module_handle_from_nir(vs_module),
272                .pName = "main",
273             },
274             {
275                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
276                .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
277                .module = vk_shader_module_handle_from_nir(fs_module),
278                .pName = "main",
279             },
280          },
281       .pVertexInputState =
282          &(VkPipelineVertexInputStateCreateInfo){
283             .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
284             .vertexBindingDescriptionCount = 0,
285             .vertexAttributeDescriptionCount = 0,
286          },
287       .pInputAssemblyState =
288          &(VkPipelineInputAssemblyStateCreateInfo){
289             .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
290             .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
291             .primitiveRestartEnable = false,
292          },
293       .pViewportState =
294          &(VkPipelineViewportStateCreateInfo){
295             .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
296             .viewportCount = 1,
297             .scissorCount = 1,
298          },
299       .pRasterizationState =
300          &(VkPipelineRasterizationStateCreateInfo){
301             .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
302             .depthClampEnable = false,
303             .rasterizerDiscardEnable = false,
304             .polygonMode = VK_POLYGON_MODE_FILL,
305             .cullMode = VK_CULL_MODE_NONE,
306             .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
307          },
308       .pMultisampleState =
309          &(VkPipelineMultisampleStateCreateInfo){
310             .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
311             .pNext = &sample_locs_create_info,
312             .rasterizationSamples = samples,
313             .sampleShadingEnable = false,
314             .pSampleMask = NULL,
315             .alphaToCoverageEnable = false,
316             .alphaToOneEnable = false,
317          },
318       .pColorBlendState =
319          &(VkPipelineColorBlendStateCreateInfo){
320             .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
321             .logicOpEnable = false,
322             .attachmentCount = 0,
323             .pAttachments = NULL,
324          },
325       .pDepthStencilState =
326          &(VkPipelineDepthStencilStateCreateInfo){
327             .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
328             .depthTestEnable = false,
329             .depthWriteEnable = false,
330             .depthBoundsTestEnable = false,
331             .stencilTestEnable = false,
332          },
333       .pDynamicState =
334          &(VkPipelineDynamicStateCreateInfo){
335             .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
336             .dynamicStateCount = 3,
337             .pDynamicStates =
338                (VkDynamicState[]){
339                   VK_DYNAMIC_STATE_VIEWPORT,
340                   VK_DYNAMIC_STATE_SCISSOR,
341                   VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
342                },
343          },
344       .layout = layout,
345       .renderPass = pass,
346       .subpass = 0,
347    };
348 
349    struct radv_graphics_pipeline_create_info extra = {
350       .use_rectlist = true,
351       .depth_compress_disable = true,
352       .stencil_compress_disable = true,
353       .resummarize_enable = op == DEPTH_RESUMMARIZE,
354    };
355 
356    result = radv_graphics_pipeline_create(
357       device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache), &pipeline_create_info,
358       &extra, &device->meta_state.alloc, pipeline);
359 
360 cleanup:
361    ralloc_free(fs_module);
362    ralloc_free(vs_module);
363    mtx_unlock(&device->meta_state.mtx);
364    return result;
365 }
366 
367 void
radv_device_finish_meta_depth_decomp_state(struct radv_device * device)368 radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
369 {
370    struct radv_meta_state *state = &device->meta_state;
371 
372    for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
373       radv_DestroyRenderPass(radv_device_to_handle(device), state->depth_decomp[i].pass,
374                              &state->alloc);
375       radv_DestroyPipelineLayout(radv_device_to_handle(device), state->depth_decomp[i].p_layout,
376                                  &state->alloc);
377 
378       radv_DestroyPipeline(radv_device_to_handle(device),
379                            state->depth_decomp[i].decompress_pipeline, &state->alloc);
380       radv_DestroyPipeline(radv_device_to_handle(device),
381                            state->depth_decomp[i].resummarize_pipeline, &state->alloc);
382    }
383 
384    radv_DestroyPipeline(radv_device_to_handle(device),
385                         state->expand_depth_stencil_compute_pipeline, &state->alloc);
386    radv_DestroyPipelineLayout(radv_device_to_handle(device),
387                               state->expand_depth_stencil_compute_p_layout, &state->alloc);
388    radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
389                                    state->expand_depth_stencil_compute_ds_layout, &state->alloc);
390 }
391 
392 VkResult
radv_device_init_meta_depth_decomp_state(struct radv_device * device,bool on_demand)393 radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
394 {
395    struct radv_meta_state *state = &device->meta_state;
396    VkResult res = VK_SUCCESS;
397 
398    for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
399       uint32_t samples = 1 << i;
400 
401       res = create_pass(device, samples, &state->depth_decomp[i].pass);
402       if (res != VK_SUCCESS)
403          goto fail;
404 
405       res = create_pipeline_layout(device, &state->depth_decomp[i].p_layout);
406       if (res != VK_SUCCESS)
407          goto fail;
408 
409       if (on_demand)
410          continue;
411 
412       res = create_pipeline(device, samples, state->depth_decomp[i].pass,
413                             state->depth_decomp[i].p_layout, DEPTH_DECOMPRESS,
414                             &state->depth_decomp[i].decompress_pipeline);
415       if (res != VK_SUCCESS)
416          goto fail;
417 
418       res = create_pipeline(device, samples, state->depth_decomp[i].pass,
419                             state->depth_decomp[i].p_layout, DEPTH_RESUMMARIZE,
420                             &state->depth_decomp[i].resummarize_pipeline);
421       if (res != VK_SUCCESS)
422          goto fail;
423    }
424 
425    res = create_expand_depth_stencil_compute(device);
426    if (res != VK_SUCCESS)
427       goto fail;
428 
429    return VK_SUCCESS;
430 
431 fail:
432    radv_device_finish_meta_depth_decomp_state(device);
433    return res;
434 }
435 
436 static VkPipeline *
radv_get_depth_pipeline(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,enum radv_depth_op op)437 radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
438                         const VkImageSubresourceRange *subresourceRange, enum radv_depth_op op)
439 {
440    struct radv_meta_state *state = &cmd_buffer->device->meta_state;
441    uint32_t samples = image->info.samples;
442    uint32_t samples_log2 = ffs(samples) - 1;
443    VkPipeline *pipeline;
444 
445    if (!state->depth_decomp[samples_log2].decompress_pipeline) {
446       VkResult ret;
447 
448       ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].pass,
449                             state->depth_decomp[samples_log2].p_layout, DEPTH_DECOMPRESS,
450                              &state->depth_decomp[samples_log2].decompress_pipeline);
451       if (ret != VK_SUCCESS) {
452          cmd_buffer->record_result = ret;
453          return NULL;
454       }
455 
456       ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].pass,
457                             state->depth_decomp[samples_log2].p_layout, DEPTH_RESUMMARIZE,
458                             &state->depth_decomp[samples_log2].resummarize_pipeline);
459       if (ret != VK_SUCCESS) {
460          cmd_buffer->record_result = ret;
461          return NULL;
462       }
463    }
464 
465    switch (op) {
466    case DEPTH_DECOMPRESS:
467       pipeline = &state->depth_decomp[samples_log2].decompress_pipeline;
468       break;
469    case DEPTH_RESUMMARIZE:
470       pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;
471       break;
472    default:
473       unreachable("unknown operation");
474    }
475 
476    return pipeline;
477 }
478 
479 static void
radv_process_depth_image_layer(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,int level,int layer)480 radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
481                                const VkImageSubresourceRange *range, int level, int layer)
482 {
483    struct radv_device *device = cmd_buffer->device;
484    struct radv_meta_state *state = &device->meta_state;
485    uint32_t samples_log2 = ffs(image->info.samples) - 1;
486    struct radv_image_view iview;
487    uint32_t width, height;
488 
489    width = radv_minify(image->info.width, range->baseMipLevel + level);
490    height = radv_minify(image->info.height, range->baseMipLevel + level);
491 
492    radv_image_view_init(&iview, device,
493                         &(VkImageViewCreateInfo){
494                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
495                            .image = radv_image_to_handle(image),
496                            .viewType = radv_meta_get_view_type(image),
497                            .format = image->vk_format,
498                            .subresourceRange =
499                               {
500                                  .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
501                                  .baseMipLevel = range->baseMipLevel + level,
502                                  .levelCount = 1,
503                                  .baseArrayLayer = range->baseArrayLayer + layer,
504                                  .layerCount = 1,
505                               },
506                         },
507                         NULL);
508 
509    VkFramebuffer fb_h;
510    radv_CreateFramebuffer(
511       radv_device_to_handle(device),
512       &(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
513                                  .attachmentCount = 1,
514                                  .pAttachments = (VkImageView[]){radv_image_view_to_handle(&iview)},
515                                  .width = width,
516                                  .height = height,
517                                  .layers = 1},
518       &cmd_buffer->pool->alloc, &fb_h);
519 
520    radv_cmd_buffer_begin_render_pass(cmd_buffer,
521                                      &(VkRenderPassBeginInfo){
522                                         .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
523                                         .renderPass = state->depth_decomp[samples_log2].pass,
524                                         .framebuffer = fb_h,
525                                         .renderArea = {.offset =
526                                                           {
527                                                              0,
528                                                              0,
529                                                           },
530                                                        .extent =
531                                                           {
532                                                              width,
533                                                              height,
534                                                           }},
535                                         .clearValueCount = 0,
536                                         .pClearValues = NULL,
537                                      },
538                                      NULL);
539    radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
540 
541    radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
542    radv_cmd_buffer_end_render_pass(cmd_buffer);
543 
544    radv_image_view_finish(&iview);
545    radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h, &cmd_buffer->pool->alloc);
546 }
547 
548 static void
radv_process_depth_stencil(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,struct radv_sample_locations_state * sample_locs,enum radv_depth_op op)549 radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
550                            const VkImageSubresourceRange *subresourceRange,
551                            struct radv_sample_locations_state *sample_locs, enum radv_depth_op op)
552 {
553    struct radv_meta_saved_state saved_state;
554    VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
555    VkPipeline *pipeline;
556 
557    radv_meta_save(
558       &saved_state, cmd_buffer,
559       RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_SAMPLE_LOCATIONS | RADV_META_SAVE_PASS);
560 
561    pipeline = radv_get_depth_pipeline(cmd_buffer, image, subresourceRange, op);
562 
563    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
564                         *pipeline);
565 
566    if (sample_locs) {
567       assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
568 
569       /* Set the sample locations specified during explicit or
570        * automatic layout transitions, otherwise the depth decompress
571        * pass uses the default HW locations.
572        */
573       radv_CmdSetSampleLocationsEXT(cmd_buffer_h,
574                                     &(VkSampleLocationsInfoEXT){
575                                        .sampleLocationsPerPixel = sample_locs->per_pixel,
576                                        .sampleLocationGridSize = sample_locs->grid_size,
577                                        .sampleLocationsCount = sample_locs->count,
578                                        .pSampleLocations = sample_locs->locations,
579                                     });
580    }
581 
582    for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
583 
584       /* Do not decompress levels without HTILE. */
585       if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
586          continue;
587 
588       uint32_t width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
589       uint32_t height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
590 
591       radv_CmdSetViewport(cmd_buffer_h, 0, 1,
592                           &(VkViewport){.x = 0,
593                                         .y = 0,
594                                         .width = width,
595                                         .height = height,
596                                         .minDepth = 0.0f,
597                                         .maxDepth = 1.0f});
598 
599       radv_CmdSetScissor(cmd_buffer_h, 0, 1,
600                          &(VkRect2D){
601                             .offset = {0, 0},
602                             .extent = {width, height},
603                          });
604 
605       for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
606          radv_process_depth_image_layer(cmd_buffer, image, subresourceRange, l, s);
607       }
608    }
609 
610    radv_meta_restore(&saved_state, cmd_buffer);
611 }
612 
613 static void
radv_expand_depth_stencil_compute(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)614 radv_expand_depth_stencil_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
615                                   const VkImageSubresourceRange *subresourceRange)
616 {
617    struct radv_meta_saved_state saved_state;
618    struct radv_image_view load_iview = {0};
619    struct radv_image_view store_iview = {0};
620    struct radv_device *device = cmd_buffer->device;
621 
622    assert(radv_image_is_tc_compat_htile(image));
623 
624    cmd_buffer->state.flush_bits |=
625       radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
626 
627    radv_meta_save(&saved_state, cmd_buffer,
628                   RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
629 
630    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
631                         device->meta_state.expand_depth_stencil_compute_pipeline);
632 
633    for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); l++) {
634       uint32_t width, height;
635 
636       /* Do not decompress levels without HTILE. */
637       if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
638          continue;
639 
640       width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
641       height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
642 
643       for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
644          radv_image_view_init(
645             &load_iview, cmd_buffer->device,
646             &(VkImageViewCreateInfo){
647                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
648                .image = radv_image_to_handle(image),
649                .viewType = VK_IMAGE_VIEW_TYPE_2D,
650                .format = image->vk_format,
651                .subresourceRange = {.aspectMask = subresourceRange->aspectMask,
652                                     .baseMipLevel = subresourceRange->baseMipLevel + l,
653                                     .levelCount = 1,
654                                     .baseArrayLayer = subresourceRange->baseArrayLayer + s,
655                                     .layerCount = 1},
656             },
657             &(struct radv_image_view_extra_create_info){.enable_compression = true});
658          radv_image_view_init(
659             &store_iview, cmd_buffer->device,
660             &(VkImageViewCreateInfo){
661                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
662                .image = radv_image_to_handle(image),
663                .viewType = VK_IMAGE_VIEW_TYPE_2D,
664                .format = image->vk_format,
665                .subresourceRange = {.aspectMask = subresourceRange->aspectMask,
666                                     .baseMipLevel = subresourceRange->baseMipLevel + l,
667                                     .levelCount = 1,
668                                     .baseArrayLayer = subresourceRange->baseArrayLayer + s,
669                                     .layerCount = 1},
670             },
671             &(struct radv_image_view_extra_create_info){.disable_compression = true});
672 
673          radv_meta_push_descriptor_set(
674             cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
675             device->meta_state.expand_depth_stencil_compute_p_layout, 0, /* set */
676             2, /* descriptorWriteCount */
677             (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
678                                       .dstBinding = 0,
679                                       .dstArrayElement = 0,
680                                       .descriptorCount = 1,
681                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
682                                       .pImageInfo =
683                                          (VkDescriptorImageInfo[]){
684                                             {
685                                                .sampler = VK_NULL_HANDLE,
686                                                .imageView = radv_image_view_to_handle(&load_iview),
687                                                .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
688                                             },
689                                          }},
690                                      {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
691                                       .dstBinding = 1,
692                                       .dstArrayElement = 0,
693                                       .descriptorCount = 1,
694                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
695                                       .pImageInfo = (VkDescriptorImageInfo[]){
696                                          {
697                                             .sampler = VK_NULL_HANDLE,
698                                             .imageView = radv_image_view_to_handle(&store_iview),
699                                             .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
700                                          },
701                                       }}});
702 
703          radv_unaligned_dispatch(cmd_buffer, width, height, 1);
704 
705          radv_image_view_finish(&load_iview);
706          radv_image_view_finish(&store_iview);
707       }
708    }
709 
710    radv_meta_restore(&saved_state, cmd_buffer);
711 
712    cmd_buffer->state.flush_bits |=
713       RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
714       radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
715 
716    /* Initialize the HTILE metadata as "fully expanded". */
717    uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, image);
718 
719    cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, image, subresourceRange, htile_value);
720 }
721 
722 void
radv_expand_depth_stencil(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,struct radv_sample_locations_state * sample_locs)723 radv_expand_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
724                           const VkImageSubresourceRange *subresourceRange,
725                           struct radv_sample_locations_state *sample_locs)
726 {
727    struct radv_barrier_data barrier = {0};
728 
729    barrier.layout_transitions.depth_stencil_expand = 1;
730    radv_describe_layout_transition(cmd_buffer, &barrier);
731 
732    if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
733       radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_DECOMPRESS);
734    } else {
735       radv_expand_depth_stencil_compute(cmd_buffer, image, subresourceRange);
736    }
737 }
738 
739 void
radv_resummarize_depth_stencil(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,struct radv_sample_locations_state * sample_locs)740 radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
741                                const VkImageSubresourceRange *subresourceRange,
742                                struct radv_sample_locations_state *sample_locs)
743 {
744    struct radv_barrier_data barrier = {0};
745 
746    barrier.layout_transitions.depth_stencil_resummarize = 1;
747    radv_describe_layout_transition(cmd_buffer, &barrier);
748 
749    assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
750    radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_RESUMMARIZE);
751 }
752