1 /*
2  * Copyright © 2020 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26 
27 #include "compiler/nir/nir_builder.h"
28 #include "util/u_pack_color.h"
29 
30 static void
get_hw_clear_color(struct v3dv_device * device,const VkClearColorValue * color,VkFormat fb_format,VkFormat image_format,uint32_t internal_type,uint32_t internal_bpp,uint32_t * hw_color)31 get_hw_clear_color(struct v3dv_device *device,
32                    const VkClearColorValue *color,
33                    VkFormat fb_format,
34                    VkFormat image_format,
35                    uint32_t internal_type,
36                    uint32_t internal_bpp,
37                    uint32_t *hw_color)
38 {
39    const uint32_t internal_size = 4 << internal_bpp;
40 
41    /* If the image format doesn't match the framebuffer format, then we are
42     * trying to clear an unsupported tlb format using a compatible
43     * format for the framebuffer. In this case, we want to make sure that
44     * we pack the clear value according to the original format semantics,
45     * not the compatible format.
46     */
47    if (fb_format == image_format) {
48       v3dv_X(device, get_hw_clear_color)(color, internal_type, internal_size,
49                                          hw_color);
50    } else {
51       union util_color uc;
52       enum pipe_format pipe_image_format =
53          vk_format_to_pipe_format(image_format);
54       util_pack_color(color->float32, pipe_image_format, &uc);
55       memcpy(hw_color, uc.ui, internal_size);
56    }
57 }
58 
59 /* Returns true if the implementation is able to handle the case, false
60  * otherwise.
61 */
62 static bool
clear_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,const VkClearValue * clear_value,const VkImageSubresourceRange * range)63 clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
64                 struct v3dv_image *image,
65                 const VkClearValue *clear_value,
66                 const VkImageSubresourceRange *range)
67 {
68    const VkOffset3D origin = { 0, 0, 0 };
69    VkFormat fb_format;
70    if (!v3dv_meta_can_use_tlb(image, &origin, &fb_format))
71       return false;
72 
73    uint32_t internal_type, internal_bpp;
74    v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
75       (fb_format, range->aspectMask,
76        &internal_type, &internal_bpp);
77 
78    union v3dv_clear_value hw_clear_value = { 0 };
79    if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
80       get_hw_clear_color(cmd_buffer->device, &clear_value->color, fb_format,
81                          image->vk.format, internal_type, internal_bpp,
82                          &hw_clear_value.color[0]);
83    } else {
84       assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
85              (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
86       hw_clear_value.z = clear_value->depthStencil.depth;
87       hw_clear_value.s = clear_value->depthStencil.stencil;
88    }
89 
90    uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
91    uint32_t min_level = range->baseMipLevel;
92    uint32_t max_level = range->baseMipLevel + level_count;
93 
94    /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
95     * Instead, we need to consider the full depth dimension of the image, which
96     * goes from 0 up to the level's depth extent.
97     */
98    uint32_t min_layer;
99    uint32_t max_layer;
100    if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
101       min_layer = range->baseArrayLayer;
102       max_layer = range->baseArrayLayer +
103                   vk_image_subresource_layer_count(&image->vk, range);
104    } else {
105       min_layer = 0;
106       max_layer = 0;
107    }
108 
109    for (uint32_t level = min_level; level < max_level; level++) {
110       if (image->vk.image_type == VK_IMAGE_TYPE_3D)
111          max_layer = u_minify(image->vk.extent.depth, level);
112 
113       uint32_t width = u_minify(image->vk.extent.width, level);
114       uint32_t height = u_minify(image->vk.extent.height, level);
115 
116       struct v3dv_job *job =
117          v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
118 
119       if (!job)
120          return true;
121 
122       v3dv_job_start_frame(job, width, height, max_layer, false,
123                            1, internal_bpp,
124                            image->vk.samples > VK_SAMPLE_COUNT_1_BIT);
125 
126       struct v3dv_meta_framebuffer framebuffer;
127       v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
128                                                  internal_type,
129                                                  &job->frame_tiling);
130 
131       v3dv_X(job->device, job_emit_binning_flush)(job);
132 
133       /* If this triggers it is an application bug: the spec requires
134        * that any aspects to clear are present in the image.
135        */
136       assert(range->aspectMask & image->vk.aspects);
137 
138       v3dv_X(job->device, meta_emit_clear_image_rcl)
139          (job, image, &framebuffer, &hw_clear_value,
140           range->aspectMask, min_layer, max_layer, level);
141 
142       v3dv_cmd_buffer_finish_job(cmd_buffer);
143    }
144 
145    return true;
146 }
147 
148 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)149 v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
150                         VkImage _image,
151                         VkImageLayout imageLayout,
152                         const VkClearColorValue *pColor,
153                         uint32_t rangeCount,
154                         const VkImageSubresourceRange *pRanges)
155 {
156    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
157    V3DV_FROM_HANDLE(v3dv_image, image, _image);
158 
159    const VkClearValue clear_value = {
160       .color = *pColor,
161    };
162 
163    for (uint32_t i = 0; i < rangeCount; i++) {
164       if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
165          continue;
166       unreachable("Unsupported color clear.");
167    }
168 }
169 
170 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)171 v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
172                                VkImage _image,
173                                VkImageLayout imageLayout,
174                                const VkClearDepthStencilValue *pDepthStencil,
175                                uint32_t rangeCount,
176                                const VkImageSubresourceRange *pRanges)
177 {
178    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
179    V3DV_FROM_HANDLE(v3dv_image, image, _image);
180 
181    const VkClearValue clear_value = {
182       .depthStencil = *pDepthStencil,
183    };
184 
185    for (uint32_t i = 0; i < rangeCount; i++) {
186       if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
187          continue;
188       unreachable("Unsupported depth/stencil clear.");
189    }
190 }
191 
192 static void
destroy_color_clear_pipeline(VkDevice _device,uint64_t pipeline,VkAllocationCallbacks * alloc)193 destroy_color_clear_pipeline(VkDevice _device,
194                              uint64_t pipeline,
195                              VkAllocationCallbacks *alloc)
196 {
197    struct v3dv_meta_color_clear_pipeline *p =
198       (struct v3dv_meta_color_clear_pipeline *) (uintptr_t) pipeline;
199    v3dv_DestroyPipeline(_device, p->pipeline, alloc);
200    if (p->cached)
201       v3dv_DestroyRenderPass(_device, p->pass, alloc);
202    vk_free(alloc, p);
203 }
204 
205 static void
destroy_depth_clear_pipeline(VkDevice _device,struct v3dv_meta_depth_clear_pipeline * p,VkAllocationCallbacks * alloc)206 destroy_depth_clear_pipeline(VkDevice _device,
207                              struct v3dv_meta_depth_clear_pipeline *p,
208                              VkAllocationCallbacks *alloc)
209 {
210    v3dv_DestroyPipeline(_device, p->pipeline, alloc);
211    vk_free(alloc, p);
212 }
213 
214 static VkResult
create_color_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)215 create_color_clear_pipeline_layout(struct v3dv_device *device,
216                                    VkPipelineLayout *pipeline_layout)
217 {
218    /* FIXME: this is abusing a bit the API, since not all of our clear
219     * pipelines have a geometry shader. We could create 2 different pipeline
220     * layouts, but this works for us for now.
221     */
222    VkPushConstantRange ranges[2] = {
223       { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 },
224       { VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4 },
225    };
226 
227    VkPipelineLayoutCreateInfo info = {
228       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
229       .setLayoutCount = 0,
230       .pushConstantRangeCount = 2,
231       .pPushConstantRanges = ranges,
232    };
233 
234    return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
235                                     &info, &device->vk.alloc, pipeline_layout);
236 }
237 
238 static VkResult
create_depth_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)239 create_depth_clear_pipeline_layout(struct v3dv_device *device,
240                                    VkPipelineLayout *pipeline_layout)
241 {
242    /* FIXME: this is abusing a bit the API, since not all of our clear
243     * pipelines have a geometry shader. We could create 2 different pipeline
244     * layouts, but this works for us for now.
245     */
246    VkPushConstantRange ranges[2] = {
247       { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 },
248       { VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4 },
249    };
250 
251    VkPipelineLayoutCreateInfo info = {
252       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
253       .setLayoutCount = 0,
254       .pushConstantRangeCount = 2,
255       .pPushConstantRanges = ranges
256    };
257 
258    return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
259                                     &info, &device->vk.alloc, pipeline_layout);
260 }
261 
262 void
v3dv_meta_clear_init(struct v3dv_device * device)263 v3dv_meta_clear_init(struct v3dv_device *device)
264 {
265    device->meta.color_clear.cache =
266       _mesa_hash_table_create(NULL, u64_hash, u64_compare);
267 
268    create_color_clear_pipeline_layout(device,
269                                       &device->meta.color_clear.p_layout);
270 
271    device->meta.depth_clear.cache =
272       _mesa_hash_table_create(NULL, u64_hash, u64_compare);
273 
274    create_depth_clear_pipeline_layout(device,
275                                       &device->meta.depth_clear.p_layout);
276 }
277 
278 void
v3dv_meta_clear_finish(struct v3dv_device * device)279 v3dv_meta_clear_finish(struct v3dv_device *device)
280 {
281    VkDevice _device = v3dv_device_to_handle(device);
282 
283    hash_table_foreach(device->meta.color_clear.cache, entry) {
284       struct v3dv_meta_color_clear_pipeline *item = entry->data;
285       destroy_color_clear_pipeline(_device, (uintptr_t)item, &device->vk.alloc);
286    }
287    _mesa_hash_table_destroy(device->meta.color_clear.cache, NULL);
288 
289    if (device->meta.color_clear.p_layout) {
290       v3dv_DestroyPipelineLayout(_device, device->meta.color_clear.p_layout,
291                                  &device->vk.alloc);
292    }
293 
294    hash_table_foreach(device->meta.depth_clear.cache, entry) {
295       struct v3dv_meta_depth_clear_pipeline *item = entry->data;
296       destroy_depth_clear_pipeline(_device, item, &device->vk.alloc);
297    }
298    _mesa_hash_table_destroy(device->meta.depth_clear.cache, NULL);
299 
300    if (device->meta.depth_clear.p_layout) {
301       v3dv_DestroyPipelineLayout(_device, device->meta.depth_clear.p_layout,
302                                  &device->vk.alloc);
303    }
304 }
305 
306 static nir_ssa_def *
gen_rect_vertices(nir_builder * b)307 gen_rect_vertices(nir_builder *b)
308 {
309    nir_ssa_def *vertex_id = nir_load_vertex_id(b);
310 
311    /* vertex 0: -1.0, -1.0
312     * vertex 1: -1.0,  1.0
313     * vertex 2:  1.0, -1.0
314     * vertex 3:  1.0,  1.0
315     *
316     * so:
317     *
318     * channel 0 is vertex_id < 2 ? -1.0 :  1.0
319     * channel 1 is vertex id & 1 ?  1.0 : -1.0
320     */
321 
322    nir_ssa_def *one = nir_imm_int(b, 1);
323    nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));
324    nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
325 
326    nir_ssa_def *comp[4];
327    comp[0] = nir_bcsel(b, c0cmp,
328                        nir_imm_float(b, -1.0f),
329                        nir_imm_float(b, 1.0f));
330 
331    comp[1] = nir_bcsel(b, c1cmp,
332                        nir_imm_float(b, 1.0f),
333                        nir_imm_float(b, -1.0f));
334    comp[2] = nir_imm_float(b, 0.0f);
335    comp[3] = nir_imm_float(b, 1.0f);
336    return nir_vec(b, comp, 4);
337 }
338 
339 static nir_shader *
get_clear_rect_vs()340 get_clear_rect_vs()
341 {
342    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
343    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
344                                                   "meta clear vs");
345 
346    const struct glsl_type *vec4 = glsl_vec4_type();
347    nir_variable *vs_out_pos =
348       nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
349    vs_out_pos->data.location = VARYING_SLOT_POS;
350 
351    nir_ssa_def *pos = gen_rect_vertices(&b);
352    nir_store_var(&b, vs_out_pos, pos, 0xf);
353 
354    return b.shader;
355 }
356 
357 static nir_shader *
get_clear_rect_gs(uint32_t push_constant_layer_base)358 get_clear_rect_gs(uint32_t push_constant_layer_base)
359 {
360    /* FIXME: this creates a geometry shader that takes the index of a single
361     * layer to clear from push constants, so we need to emit a draw call for
362     * each layer that we want to clear. We could actually do better and have it
363     * take a range of layers and then emit one triangle per layer to clear,
364     * however, if we were to do this we would need to be careful not to exceed
365     * the maximum number of output vertices allowed in a geometry shader.
366     */
367    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
368    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
369                                                   "meta clear gs");
370    nir_shader *nir = b.shader;
371    nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
372    nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
373                                (1ull << VARYING_SLOT_LAYER);
374    nir->info.gs.input_primitive = SHADER_PRIM_TRIANGLES;
375    nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
376    nir->info.gs.vertices_in = 3;
377    nir->info.gs.vertices_out = 3;
378    nir->info.gs.invocations = 1;
379    nir->info.gs.active_stream_mask = 0x1;
380 
381    /* in vec4 gl_Position[3] */
382    nir_variable *gs_in_pos =
383       nir_variable_create(b.shader, nir_var_shader_in,
384                           glsl_array_type(glsl_vec4_type(), 3, 0),
385                           "in_gl_Position");
386    gs_in_pos->data.location = VARYING_SLOT_POS;
387 
388    /* out vec4 gl_Position */
389    nir_variable *gs_out_pos =
390       nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
391                           "out_gl_Position");
392    gs_out_pos->data.location = VARYING_SLOT_POS;
393 
394    /* out float gl_Layer */
395    nir_variable *gs_out_layer =
396       nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
397                           "out_gl_Layer");
398    gs_out_layer->data.location = VARYING_SLOT_LAYER;
399 
400    /* Emit output triangle */
401    for (uint32_t i = 0; i < 3; i++) {
402       /* gl_Position from shader input */
403       nir_deref_instr *in_pos_i =
404          nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
405       nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
406 
407       /* gl_Layer from push constants */
408       nir_ssa_def *layer =
409          nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
410                                 .base = push_constant_layer_base, .range = 4);
411       nir_store_var(&b, gs_out_layer, layer, 0x1);
412 
413       nir_emit_vertex(&b, 0);
414    }
415 
416    nir_end_primitive(&b, 0);
417 
418    return nir;
419 }
420 
421 static nir_shader *
get_color_clear_rect_fs(uint32_t rt_idx,VkFormat format)422 get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format)
423 {
424    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
425    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
426                                                   "meta clear fs");
427 
428    enum pipe_format pformat = vk_format_to_pipe_format(format);
429    const struct glsl_type *fs_out_type =
430       util_format_is_float(pformat) ? glsl_vec4_type() : glsl_uvec4_type();
431 
432    nir_variable *fs_out_color =
433       nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
434    fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;
435 
436    nir_ssa_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
437    nir_store_var(&b, fs_out_color, color_load, 0xf);
438 
439    return b.shader;
440 }
441 
442 static nir_shader *
get_depth_clear_rect_fs()443 get_depth_clear_rect_fs()
444 {
445    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
446    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
447                                                   "meta depth clear fs");
448 
449    nir_variable *fs_out_depth =
450       nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
451                           "out_depth");
452    fs_out_depth->data.location = FRAG_RESULT_DEPTH;
453 
454    nir_ssa_def *depth_load =
455       nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
456 
457    nir_store_var(&b, fs_out_depth, depth_load, 0x1);
458 
459    return b.shader;
460 }
461 
462 static VkResult
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * gs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineLayout layout,VkPipeline * pipeline)463 create_pipeline(struct v3dv_device *device,
464                 struct v3dv_render_pass *pass,
465                 uint32_t subpass_idx,
466                 uint32_t samples,
467                 struct nir_shader *vs_nir,
468                 struct nir_shader *gs_nir,
469                 struct nir_shader *fs_nir,
470                 const VkPipelineVertexInputStateCreateInfo *vi_state,
471                 const VkPipelineDepthStencilStateCreateInfo *ds_state,
472                 const VkPipelineColorBlendStateCreateInfo *cb_state,
473                 const VkPipelineLayout layout,
474                 VkPipeline *pipeline)
475 {
476    VkPipelineShaderStageCreateInfo stages[3] = { 0 };
477    struct vk_shader_module vs_m;
478    struct vk_shader_module gs_m;
479    struct vk_shader_module fs_m;
480 
481    uint32_t stage_count = 0;
482    v3dv_shader_module_internal_init(device, &vs_m, vs_nir);
483    stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
484    stages[stage_count].stage = VK_SHADER_STAGE_VERTEX_BIT;
485    stages[stage_count].module = vk_shader_module_to_handle(&vs_m);
486    stages[stage_count].pName = "main";
487    stage_count++;
488 
489    if (gs_nir) {
490       v3dv_shader_module_internal_init(device, &gs_m, gs_nir);
491       stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
492       stages[stage_count].stage = VK_SHADER_STAGE_GEOMETRY_BIT;
493       stages[stage_count].module = vk_shader_module_to_handle(&gs_m);
494       stages[stage_count].pName = "main";
495       stage_count++;
496    }
497 
498    if (fs_nir) {
499       v3dv_shader_module_internal_init(device, &fs_m, fs_nir);
500       stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
501       stages[stage_count].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
502       stages[stage_count].module = vk_shader_module_to_handle(&fs_m);
503       stages[stage_count].pName = "main";
504       stage_count++;
505    }
506 
507    VkGraphicsPipelineCreateInfo info = {
508       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
509 
510       .stageCount = stage_count,
511       .pStages = stages,
512 
513       .pVertexInputState = vi_state,
514 
515       .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
516          .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
517          .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
518          .primitiveRestartEnable = false,
519       },
520 
521       .pViewportState = &(VkPipelineViewportStateCreateInfo) {
522          .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
523          .viewportCount = 1,
524          .scissorCount = 1,
525       },
526 
527       .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
528          .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
529          .rasterizerDiscardEnable = false,
530          .polygonMode = VK_POLYGON_MODE_FILL,
531          .cullMode = VK_CULL_MODE_NONE,
532          .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
533          .depthBiasEnable = false,
534       },
535 
536       .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
537          .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
538          .rasterizationSamples = samples,
539          .sampleShadingEnable = false,
540          .pSampleMask = NULL,
541          .alphaToCoverageEnable = false,
542          .alphaToOneEnable = false,
543       },
544 
545       .pDepthStencilState = ds_state,
546 
547       .pColorBlendState = cb_state,
548 
549       /* The meta clear pipeline declares all state as dynamic.
550        * As a consequence, vkCmdBindPipeline writes no dynamic state
551        * to the cmd buffer. Therefore, at the end of the meta clear,
552        * we need only restore dynamic state that was vkCmdSet.
553        */
554       .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
555          .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
556          .dynamicStateCount = 6,
557          .pDynamicStates = (VkDynamicState[]) {
558             VK_DYNAMIC_STATE_VIEWPORT,
559             VK_DYNAMIC_STATE_SCISSOR,
560             VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
561             VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
562             VK_DYNAMIC_STATE_STENCIL_REFERENCE,
563             VK_DYNAMIC_STATE_BLEND_CONSTANTS,
564             VK_DYNAMIC_STATE_DEPTH_BIAS,
565             VK_DYNAMIC_STATE_LINE_WIDTH,
566          },
567       },
568 
569       .flags = 0,
570       .layout = layout,
571       .renderPass = v3dv_render_pass_to_handle(pass),
572       .subpass = subpass_idx,
573    };
574 
575    VkResult result =
576       v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
577                                    VK_NULL_HANDLE,
578                                    1, &info,
579                                    &device->vk.alloc,
580                                    pipeline);
581 
582    ralloc_free(vs_nir);
583    ralloc_free(fs_nir);
584 
585    return result;
586 }
587 
588 static VkResult
create_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,VkFormat format,uint32_t samples,uint32_t components,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)589 create_color_clear_pipeline(struct v3dv_device *device,
590                             struct v3dv_render_pass *pass,
591                             uint32_t subpass_idx,
592                             uint32_t rt_idx,
593                             VkFormat format,
594                             uint32_t samples,
595                             uint32_t components,
596                             bool is_layered,
597                             VkPipelineLayout pipeline_layout,
598                             VkPipeline *pipeline)
599 {
600    nir_shader *vs_nir = get_clear_rect_vs();
601    nir_shader *fs_nir = get_color_clear_rect_fs(rt_idx, format);
602    nir_shader *gs_nir = is_layered ? get_clear_rect_gs(16) : NULL;
603 
604    const VkPipelineVertexInputStateCreateInfo vi_state = {
605       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
606       .vertexBindingDescriptionCount = 0,
607       .vertexAttributeDescriptionCount = 0,
608    };
609 
610    const VkPipelineDepthStencilStateCreateInfo ds_state = {
611       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
612       .depthTestEnable = false,
613       .depthWriteEnable = false,
614       .depthBoundsTestEnable = false,
615       .stencilTestEnable = false,
616    };
617 
618    assert(subpass_idx < pass->subpass_count);
619    const uint32_t color_count = pass->subpasses[subpass_idx].color_count;
620    assert(rt_idx < color_count);
621 
622    VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS];
623    for (uint32_t i = 0; i < color_count; i++) {
624       blend_att_state[i] = (VkPipelineColorBlendAttachmentState) {
625          .blendEnable = false,
626          .colorWriteMask = i == rt_idx ? components : 0,
627       };
628    }
629 
630    const VkPipelineColorBlendStateCreateInfo cb_state = {
631       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
632       .logicOpEnable = false,
633       .attachmentCount = color_count,
634       .pAttachments = blend_att_state
635    };
636 
637    return create_pipeline(device,
638                           pass, subpass_idx,
639                           samples,
640                           vs_nir, gs_nir, fs_nir,
641                           &vi_state,
642                           &ds_state,
643                           &cb_state,
644                           pipeline_layout,
645                           pipeline);
646 }
647 
648 static VkResult
create_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)649 create_depth_clear_pipeline(struct v3dv_device *device,
650                             VkImageAspectFlags aspects,
651                             struct v3dv_render_pass *pass,
652                             uint32_t subpass_idx,
653                             uint32_t samples,
654                             bool is_layered,
655                             VkPipelineLayout pipeline_layout,
656                             VkPipeline *pipeline)
657 {
658    const bool has_depth = aspects & VK_IMAGE_ASPECT_DEPTH_BIT;
659    const bool has_stencil = aspects & VK_IMAGE_ASPECT_STENCIL_BIT;
660    assert(has_depth || has_stencil);
661 
662    nir_shader *vs_nir = get_clear_rect_vs();
663    nir_shader *fs_nir = has_depth ? get_depth_clear_rect_fs() : NULL;
664    nir_shader *gs_nir = is_layered ? get_clear_rect_gs(4) : NULL;
665 
666    const VkPipelineVertexInputStateCreateInfo vi_state = {
667       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
668       .vertexBindingDescriptionCount = 0,
669       .vertexAttributeDescriptionCount = 0,
670    };
671 
672    const VkPipelineDepthStencilStateCreateInfo ds_state = {
673       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
674       .depthTestEnable = has_depth,
675       .depthWriteEnable = has_depth,
676       .depthCompareOp = VK_COMPARE_OP_ALWAYS,
677       .depthBoundsTestEnable = false,
678       .stencilTestEnable = has_stencil,
679       .front = {
680          .passOp = VK_STENCIL_OP_REPLACE,
681          .compareOp = VK_COMPARE_OP_ALWAYS,
682          /* compareMask, writeMask and reference are dynamic state */
683       },
684       .back = { 0 },
685    };
686 
687    assert(subpass_idx < pass->subpass_count);
688    VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS] = { 0 };
689    const VkPipelineColorBlendStateCreateInfo cb_state = {
690       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
691       .logicOpEnable = false,
692       .attachmentCount = pass->subpasses[subpass_idx].color_count,
693       .pAttachments = blend_att_state,
694    };
695 
696    return create_pipeline(device,
697                           pass, subpass_idx,
698                           samples,
699                           vs_nir, gs_nir, fs_nir,
700                           &vi_state,
701                           &ds_state,
702                           &cb_state,
703                           pipeline_layout,
704                           pipeline);
705 }
706 
707 static VkResult
create_color_clear_render_pass(struct v3dv_device * device,uint32_t rt_idx,VkFormat format,uint32_t samples,VkRenderPass * pass)708 create_color_clear_render_pass(struct v3dv_device *device,
709                                uint32_t rt_idx,
710                                VkFormat format,
711                                uint32_t samples,
712                                VkRenderPass *pass)
713 {
714    VkAttachmentDescription2 att = {
715       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
716       .format = format,
717       .samples = samples,
718       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
719       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
720       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
721       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
722    };
723 
724    VkAttachmentReference2 att_ref = {
725       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
726       .attachment = rt_idx,
727       .layout = VK_IMAGE_LAYOUT_GENERAL,
728    };
729 
730    VkSubpassDescription2 subpass = {
731       .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
732       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
733       .inputAttachmentCount = 0,
734       .colorAttachmentCount = 1,
735       .pColorAttachments = &att_ref,
736       .pResolveAttachments = NULL,
737       .pDepthStencilAttachment = NULL,
738       .preserveAttachmentCount = 0,
739       .pPreserveAttachments = NULL,
740    };
741 
742    VkRenderPassCreateInfo2 info = {
743       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
744       .attachmentCount = 1,
745       .pAttachments = &att,
746       .subpassCount = 1,
747       .pSubpasses = &subpass,
748       .dependencyCount = 0,
749       .pDependencies = NULL,
750    };
751 
752    return v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
753                                  &info, &device->vk.alloc, pass);
754 }
755 
756 static inline uint64_t
get_color_clear_pipeline_cache_key(uint32_t rt_idx,VkFormat format,uint32_t samples,uint32_t components,bool is_layered)757 get_color_clear_pipeline_cache_key(uint32_t rt_idx,
758                                    VkFormat format,
759                                    uint32_t samples,
760                                    uint32_t components,
761                                    bool is_layered)
762 {
763    assert(rt_idx < V3D_MAX_DRAW_BUFFERS);
764 
765    uint64_t key = 0;
766    uint32_t bit_offset = 0;
767 
768    key |= rt_idx;
769    bit_offset += 2;
770 
771    key |= ((uint64_t) format) << bit_offset;
772    bit_offset += 32;
773 
774    key |= ((uint64_t) samples) << bit_offset;
775    bit_offset += 4;
776 
777    key |= ((uint64_t) components) << bit_offset;
778    bit_offset += 4;
779 
780    key |= (is_layered ? 1ull : 0ull) << bit_offset;
781    bit_offset += 1;
782 
783    assert(bit_offset <= 64);
784    return key;
785 }
786 
787 static inline uint64_t
get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,VkFormat format,uint32_t samples,bool is_layered)788 get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,
789                                    VkFormat format,
790                                    uint32_t samples,
791                                    bool is_layered)
792 {
793    uint64_t key = 0;
794    uint32_t bit_offset = 0;
795 
796    key |= format;
797    bit_offset += 32;
798 
799    key |= ((uint64_t) samples) << bit_offset;
800    bit_offset += 4;
801 
802    const bool has_depth = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? 1 : 0;
803    key |= ((uint64_t) has_depth) << bit_offset;
804    bit_offset++;
805 
806    const bool has_stencil = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
807    key |= ((uint64_t) has_stencil) << bit_offset;
808    bit_offset++;;
809 
810    key |= (is_layered ? 1ull : 0ull) << bit_offset;
811    bit_offset += 1;
812 
813    assert(bit_offset <= 64);
814    return key;
815 }
816 
817 static VkResult
get_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,uint32_t attachment_idx,VkFormat format,uint32_t samples,uint32_t components,bool is_layered,struct v3dv_meta_color_clear_pipeline ** pipeline)818 get_color_clear_pipeline(struct v3dv_device *device,
819                          struct v3dv_render_pass *pass,
820                          uint32_t subpass_idx,
821                          uint32_t rt_idx,
822                          uint32_t attachment_idx,
823                          VkFormat format,
824                          uint32_t samples,
825                          uint32_t components,
826                          bool is_layered,
827                          struct v3dv_meta_color_clear_pipeline **pipeline)
828 {
829    assert(vk_format_is_color(format));
830 
831    VkResult result = VK_SUCCESS;
832 
833    /* If pass != NULL it means that we are emitting the clear as a draw call
834     * in the current pass bound by the application. In that case, we can't
835     * cache the pipeline, since it will be referencing that pass and the
836     * application could be destroying it at any point. Hopefully, the perf
837     * impact is not too big since we still have the device pipeline cache
838     * around and we won't end up re-compiling the clear shader.
839     *
840     * FIXME: alternatively, we could refcount (or maybe clone) the render pass
841     * provided by the application and include it in the pipeline key setup
842     * to make caching safe in this scenario, however, based on tests with
843     * vkQuake3, the fact that we are not caching here doesn't seem to have
844     * any significant impact in performance, so it might not be worth it.
845     */
846    const bool can_cache_pipeline = (pass == NULL);
847 
848    uint64_t key;
849    if (can_cache_pipeline) {
850       key = get_color_clear_pipeline_cache_key(rt_idx, format, samples,
851                                                components, is_layered);
852       mtx_lock(&device->meta.mtx);
853       struct hash_entry *entry =
854          _mesa_hash_table_search(device->meta.color_clear.cache, &key);
855       if (entry) {
856          mtx_unlock(&device->meta.mtx);
857          *pipeline = entry->data;
858          return VK_SUCCESS;
859       }
860    }
861 
862    *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
863                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
864 
865    if (*pipeline == NULL) {
866       result = VK_ERROR_OUT_OF_HOST_MEMORY;
867       goto fail;
868    }
869 
870    if (!pass) {
871       result = create_color_clear_render_pass(device,
872                                               rt_idx,
873                                               format,
874                                               samples,
875                                               &(*pipeline)->pass);
876       if (result != VK_SUCCESS)
877          goto fail;
878 
879       pass = v3dv_render_pass_from_handle((*pipeline)->pass);
880    } else {
881       (*pipeline)->pass = v3dv_render_pass_to_handle(pass);
882    }
883 
884    result = create_color_clear_pipeline(device,
885                                         pass,
886                                         subpass_idx,
887                                         rt_idx,
888                                         format,
889                                         samples,
890                                         components,
891                                         is_layered,
892                                         device->meta.color_clear.p_layout,
893                                         &(*pipeline)->pipeline);
894    if (result != VK_SUCCESS)
895       goto fail;
896 
897    if (can_cache_pipeline) {
898       (*pipeline)->key = key;
899       (*pipeline)->cached = true;
900       _mesa_hash_table_insert(device->meta.color_clear.cache,
901                               &(*pipeline)->key, *pipeline);
902 
903       mtx_unlock(&device->meta.mtx);
904    }
905 
906    return VK_SUCCESS;
907 
908 fail:
909    if (can_cache_pipeline)
910       mtx_unlock(&device->meta.mtx);
911 
912    VkDevice _device = v3dv_device_to_handle(device);
913    if (*pipeline) {
914       if ((*pipeline)->cached)
915          v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
916       if ((*pipeline)->pipeline)
917          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
918       vk_free(&device->vk.alloc, *pipeline);
919       *pipeline = NULL;
920    }
921 
922    return result;
923 }
924 
925 static VkResult
get_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t attachment_idx,bool is_layered,struct v3dv_meta_depth_clear_pipeline ** pipeline)926 get_depth_clear_pipeline(struct v3dv_device *device,
927                          VkImageAspectFlags aspects,
928                          struct v3dv_render_pass *pass,
929                          uint32_t subpass_idx,
930                          uint32_t attachment_idx,
931                          bool is_layered,
932                          struct v3dv_meta_depth_clear_pipeline **pipeline)
933 {
934    assert(subpass_idx < pass->subpass_count);
935    assert(attachment_idx != VK_ATTACHMENT_UNUSED);
936    assert(attachment_idx < pass->attachment_count);
937 
938    VkResult result = VK_SUCCESS;
939 
940    const uint32_t samples = pass->attachments[attachment_idx].desc.samples;
941    const VkFormat format = pass->attachments[attachment_idx].desc.format;
942    assert(vk_format_is_depth_or_stencil(format));
943 
944    const uint64_t key =
945       get_depth_clear_pipeline_cache_key(aspects, format, samples, is_layered);
946    mtx_lock(&device->meta.mtx);
947    struct hash_entry *entry =
948       _mesa_hash_table_search(device->meta.depth_clear.cache, &key);
949    if (entry) {
950       mtx_unlock(&device->meta.mtx);
951       *pipeline = entry->data;
952       return VK_SUCCESS;
953    }
954 
955    *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
956                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
957 
958    if (*pipeline == NULL) {
959       result = VK_ERROR_OUT_OF_HOST_MEMORY;
960       goto fail;
961    }
962 
963    result = create_depth_clear_pipeline(device,
964                                         aspects,
965                                         pass,
966                                         subpass_idx,
967                                         samples,
968                                         is_layered,
969                                         device->meta.depth_clear.p_layout,
970                                         &(*pipeline)->pipeline);
971    if (result != VK_SUCCESS)
972       goto fail;
973 
974    (*pipeline)->key = key;
975    _mesa_hash_table_insert(device->meta.depth_clear.cache,
976                            &(*pipeline)->key, *pipeline);
977 
978    mtx_unlock(&device->meta.mtx);
979    return VK_SUCCESS;
980 
981 fail:
982    mtx_unlock(&device->meta.mtx);
983 
984    VkDevice _device = v3dv_device_to_handle(device);
985    if (*pipeline) {
986       if ((*pipeline)->pipeline)
987          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
988       vk_free(&device->vk.alloc, *pipeline);
989       *pipeline = NULL;
990    }
991 
992    return result;
993 }
994 
995 /* Emits a scissored quad in the clear color */
996 static void
emit_subpass_color_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,uint32_t rt_idx,const VkClearColorValue * clear_color,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)997 emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
998                                struct v3dv_render_pass *pass,
999                                struct v3dv_subpass *subpass,
1000                                uint32_t rt_idx,
1001                                const VkClearColorValue *clear_color,
1002                                bool is_layered,
1003                                bool all_rects_same_layers,
1004                                uint32_t rect_count,
1005                                const VkClearRect *rects)
1006 {
1007    /* Skip if attachment is unused in the current subpass */
1008    assert(rt_idx < subpass->color_count);
1009    const uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;
1010    if (attachment_idx == VK_ATTACHMENT_UNUSED)
1011       return;
1012 
1013    /* Obtain a pipeline for this clear */
1014    assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
1015    const VkFormat format =
1016       cmd_buffer->state.pass->attachments[attachment_idx].desc.format;
1017    const VkFormat samples =
1018       cmd_buffer->state.pass->attachments[attachment_idx].desc.samples;
1019    const uint32_t components = VK_COLOR_COMPONENT_R_BIT |
1020                                VK_COLOR_COMPONENT_G_BIT |
1021                                VK_COLOR_COMPONENT_B_BIT |
1022                                VK_COLOR_COMPONENT_A_BIT;
1023    struct v3dv_meta_color_clear_pipeline *pipeline = NULL;
1024    VkResult result = get_color_clear_pipeline(cmd_buffer->device,
1025                                               pass,
1026                                               cmd_buffer->state.subpass_idx,
1027                                               rt_idx,
1028                                               attachment_idx,
1029                                               format,
1030                                               samples,
1031                                               components,
1032                                               is_layered,
1033                                               &pipeline);
1034    if (result != VK_SUCCESS) {
1035       if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1036          v3dv_flag_oom(cmd_buffer, NULL);
1037       return;
1038    }
1039    assert(pipeline && pipeline->pipeline);
1040 
1041    /* Emit clear rects */
1042    v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1043 
1044    VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1045    v3dv_CmdPushConstants(cmd_buffer_handle,
1046                          cmd_buffer->device->meta.depth_clear.p_layout,
1047                          VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
1048                          clear_color->float32);
1049 
1050    v3dv_CmdBindPipeline(cmd_buffer_handle,
1051                         VK_PIPELINE_BIND_POINT_GRAPHICS,
1052                         pipeline->pipeline);
1053 
1054    uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
1055 
1056    for (uint32_t i = 0; i < rect_count; i++) {
1057       const VkViewport viewport = {
1058          .x = rects[i].rect.offset.x,
1059          .y = rects[i].rect.offset.y,
1060          .width = rects[i].rect.extent.width,
1061          .height = rects[i].rect.extent.height,
1062          .minDepth = 0.0f,
1063          .maxDepth = 1.0f
1064       };
1065       v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1066       v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1067 
1068       if (is_layered) {
1069          for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1070               layer_offset++) {
1071             uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1072             v3dv_CmdPushConstants(cmd_buffer_handle,
1073                                   cmd_buffer->device->meta.depth_clear.p_layout,
1074                                   VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4, &layer);
1075             v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1076          }
1077       } else {
1078          assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1079          v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1080       }
1081    }
1082 
1083    /* Subpass pipelines can't be cached because they include a reference to the
1084     * render pass currently bound by the application, which means that we need
1085     * to destroy them manually here.
1086     */
1087    assert(!pipeline->cached);
1088    v3dv_cmd_buffer_add_private_obj(
1089       cmd_buffer, (uintptr_t)pipeline,
1090       (v3dv_cmd_buffer_private_obj_destroy_cb) destroy_color_clear_pipeline);
1091 
1092    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
1093 }
1094 
1095 /* Emits a scissored quad, clearing the depth aspect by writing to gl_FragDepth
1096  * and the stencil aspect by using stencil testing.
1097  */
1098 static void
emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,VkImageAspectFlags aspects,const VkClearDepthStencilValue * clear_ds,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)1099 emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1100                             struct v3dv_render_pass *pass,
1101                             struct v3dv_subpass *subpass,
1102                             VkImageAspectFlags aspects,
1103                             const VkClearDepthStencilValue *clear_ds,
1104                             bool is_layered,
1105                             bool all_rects_same_layers,
1106                             uint32_t rect_count,
1107                             const VkClearRect *rects)
1108 {
1109    /* Skip if attachment is unused in the current subpass */
1110    const uint32_t attachment_idx = subpass->ds_attachment.attachment;
1111    if (attachment_idx == VK_ATTACHMENT_UNUSED)
1112       return;
1113 
1114    /* Obtain a pipeline for this clear */
1115    assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
1116    struct v3dv_meta_depth_clear_pipeline *pipeline = NULL;
1117    VkResult result = get_depth_clear_pipeline(cmd_buffer->device,
1118                                               aspects,
1119                                               pass,
1120                                               cmd_buffer->state.subpass_idx,
1121                                               attachment_idx,
1122                                               is_layered,
1123                                               &pipeline);
1124    if (result != VK_SUCCESS) {
1125       if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1126          v3dv_flag_oom(cmd_buffer, NULL);
1127       return;
1128    }
1129    assert(pipeline && pipeline->pipeline);
1130 
1131    /* Emit clear rects */
1132    v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1133 
1134    VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1135    v3dv_CmdPushConstants(cmd_buffer_handle,
1136                          cmd_buffer->device->meta.depth_clear.p_layout,
1137                          VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
1138                          &clear_ds->depth);
1139 
1140    v3dv_CmdBindPipeline(cmd_buffer_handle,
1141                         VK_PIPELINE_BIND_POINT_GRAPHICS,
1142                         pipeline->pipeline);
1143 
1144    uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
1145    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1146       v3dv_CmdSetStencilReference(cmd_buffer_handle,
1147                                   VK_STENCIL_FACE_FRONT_AND_BACK,
1148                                   clear_ds->stencil);
1149       v3dv_CmdSetStencilWriteMask(cmd_buffer_handle,
1150                                   VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1151       v3dv_CmdSetStencilCompareMask(cmd_buffer_handle,
1152                                     VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1153       dynamic_states |= VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK |
1154                         VK_DYNAMIC_STATE_STENCIL_WRITE_MASK |
1155                         VK_DYNAMIC_STATE_STENCIL_REFERENCE;
1156    }
1157 
1158    for (uint32_t i = 0; i < rect_count; i++) {
1159       const VkViewport viewport = {
1160          .x = rects[i].rect.offset.x,
1161          .y = rects[i].rect.offset.y,
1162          .width = rects[i].rect.extent.width,
1163          .height = rects[i].rect.extent.height,
1164          .minDepth = 0.0f,
1165          .maxDepth = 1.0f
1166       };
1167       v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1168       v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1169       if (is_layered) {
1170          for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1171               layer_offset++) {
1172             uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1173             v3dv_CmdPushConstants(cmd_buffer_handle,
1174                                   cmd_buffer->device->meta.depth_clear.p_layout,
1175                                   VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4, &layer);
1176             v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1177          }
1178       } else {
1179          assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1180          v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1181       }
1182    }
1183 
1184    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
1185 }
1186 
1187 static void
gather_layering_info(uint32_t rect_count,const VkClearRect * rects,bool * is_layered,bool * all_rects_same_layers)1188 gather_layering_info(uint32_t rect_count, const VkClearRect *rects,
1189                      bool *is_layered, bool *all_rects_same_layers)
1190 {
1191    *all_rects_same_layers = true;
1192 
1193    uint32_t min_layer = rects[0].baseArrayLayer;
1194    uint32_t max_layer = rects[0].baseArrayLayer + rects[0].layerCount - 1;
1195    for (uint32_t i = 1; i < rect_count; i++) {
1196       if (rects[i].baseArrayLayer != rects[i - 1].baseArrayLayer ||
1197           rects[i].layerCount != rects[i - 1].layerCount) {
1198          *all_rects_same_layers = false;
1199          min_layer = MIN2(min_layer, rects[i].baseArrayLayer);
1200          max_layer = MAX2(max_layer, rects[i].baseArrayLayer +
1201                                      rects[i].layerCount - 1);
1202       }
1203    }
1204 
1205    *is_layered = !(min_layer == 0 && max_layer == 0);
1206 }
1207 
1208 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1209 v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
1210                          uint32_t attachmentCount,
1211                          const VkClearAttachment *pAttachments,
1212                          uint32_t rectCount,
1213                          const VkClearRect *pRects)
1214 {
1215    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1216 
1217    /* We can only clear attachments in the current subpass */
1218    assert(attachmentCount <= 5); /* 4 color + D/S */
1219 
1220    struct v3dv_render_pass *pass = cmd_buffer->state.pass;
1221 
1222    assert(cmd_buffer->state.subpass_idx < pass->subpass_count);
1223    struct v3dv_subpass *subpass =
1224       &cmd_buffer->state.pass->subpasses[cmd_buffer->state.subpass_idx];
1225 
1226    /* Emit a clear rect inside the current job for this subpass. For layered
1227     * framebuffers, we use a geometry shader to redirect clears to the
1228     * appropriate layers.
1229     */
1230    bool is_layered, all_rects_same_layers;
1231    gather_layering_info(rectCount, pRects, &is_layered, &all_rects_same_layers);
1232    for (uint32_t i = 0; i < attachmentCount; i++) {
1233       if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1234          emit_subpass_color_clear_rects(cmd_buffer, pass, subpass,
1235                                         pAttachments[i].colorAttachment,
1236                                         &pAttachments[i].clearValue.color,
1237                                         is_layered, all_rects_same_layers,
1238                                         rectCount, pRects);
1239       } else {
1240          emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass,
1241                                      pAttachments[i].aspectMask,
1242                                      &pAttachments[i].clearValue.depthStencil,
1243                                      is_layered, all_rects_same_layers,
1244                                      rectCount, pRects);
1245       }
1246    }
1247 }
1248