1 /*
2  * Copyright © 2020 Raspberry Pi
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26 
27 #include "compiler/nir/nir_builder.h"
28 #include "vk_format_info.h"
29 #include "util/u_pack_color.h"
30 
31 static void
get_hw_clear_color(struct v3dv_device * device,const VkClearColorValue * color,VkFormat fb_format,VkFormat image_format,uint32_t internal_type,uint32_t internal_bpp,uint32_t * hw_color)32 get_hw_clear_color(struct v3dv_device *device,
33                    const VkClearColorValue *color,
34                    VkFormat fb_format,
35                    VkFormat image_format,
36                    uint32_t internal_type,
37                    uint32_t internal_bpp,
38                    uint32_t *hw_color)
39 {
40    const uint32_t internal_size = 4 << internal_bpp;
41 
42    /* If the image format doesn't match the framebuffer format, then we are
43     * trying to clear an unsupported tlb format using a compatible
44     * format for the framebuffer. In this case, we want to make sure that
45     * we pack the clear value according to the original format semantics,
46     * not the compatible format.
47     */
48    if (fb_format == image_format) {
49       v3dv_X(device, get_hw_clear_color)(color, internal_type, internal_size,
50                                          hw_color);
51    } else {
52       union util_color uc;
53       enum pipe_format pipe_image_format =
54          vk_format_to_pipe_format(image_format);
55       util_pack_color(color->float32, pipe_image_format, &uc);
56       memcpy(hw_color, uc.ui, internal_size);
57    }
58 }
59 
60 /* Returns true if the implementation is able to handle the case, false
61  * otherwise.
62 */
63 static bool
clear_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,const VkClearValue * clear_value,const VkImageSubresourceRange * range)64 clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
65                 struct v3dv_image *image,
66                 const VkClearValue *clear_value,
67                 const VkImageSubresourceRange *range)
68 {
69    const VkOffset3D origin = { 0, 0, 0 };
70    VkFormat fb_format;
71    if (!v3dv_meta_can_use_tlb(image, &origin, &fb_format))
72       return false;
73 
74    uint32_t internal_type, internal_bpp;
75    v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
76       (fb_format, range->aspectMask,
77        &internal_type, &internal_bpp);
78 
79    union v3dv_clear_value hw_clear_value = { 0 };
80    if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
81       get_hw_clear_color(cmd_buffer->device, &clear_value->color, fb_format,
82                          image->vk.format, internal_type, internal_bpp,
83                          &hw_clear_value.color[0]);
84    } else {
85       assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
86              (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
87       hw_clear_value.z = clear_value->depthStencil.depth;
88       hw_clear_value.s = clear_value->depthStencil.stencil;
89    }
90 
91    uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
92    uint32_t min_level = range->baseMipLevel;
93    uint32_t max_level = range->baseMipLevel + level_count;
94 
95    /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
96     * Instead, we need to consider the full depth dimension of the image, which
97     * goes from 0 up to the level's depth extent.
98     */
99    uint32_t min_layer;
100    uint32_t max_layer;
101    if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
102       min_layer = range->baseArrayLayer;
103       max_layer = range->baseArrayLayer +
104                   vk_image_subresource_layer_count(&image->vk, range);
105    } else {
106       min_layer = 0;
107       max_layer = 0;
108    }
109 
110    for (uint32_t level = min_level; level < max_level; level++) {
111       if (image->vk.image_type == VK_IMAGE_TYPE_3D)
112          max_layer = u_minify(image->vk.extent.depth, level);
113 
114       uint32_t width = u_minify(image->vk.extent.width, level);
115       uint32_t height = u_minify(image->vk.extent.height, level);
116 
117       struct v3dv_job *job =
118          v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
119 
120       if (!job)
121          return true;
122 
123       v3dv_job_start_frame(job, width, height, max_layer, false,
124                            1, internal_bpp,
125                            image->vk.samples > VK_SAMPLE_COUNT_1_BIT);
126 
127       struct v3dv_meta_framebuffer framebuffer;
128       v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
129                                                  internal_type,
130                                                  &job->frame_tiling);
131 
132       v3dv_X(job->device, job_emit_binning_flush)(job);
133 
134       /* If this triggers it is an application bug: the spec requires
135        * that any aspects to clear are present in the image.
136        */
137       assert(range->aspectMask & image->vk.aspects);
138 
139       v3dv_X(job->device, meta_emit_clear_image_rcl)
140          (job, image, &framebuffer, &hw_clear_value,
141           range->aspectMask, min_layer, max_layer, level);
142 
143       v3dv_cmd_buffer_finish_job(cmd_buffer);
144    }
145 
146    return true;
147 }
148 
149 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)150 v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
151                         VkImage _image,
152                         VkImageLayout imageLayout,
153                         const VkClearColorValue *pColor,
154                         uint32_t rangeCount,
155                         const VkImageSubresourceRange *pRanges)
156 {
157    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
158    V3DV_FROM_HANDLE(v3dv_image, image, _image);
159 
160    const VkClearValue clear_value = {
161       .color = *pColor,
162    };
163 
164    for (uint32_t i = 0; i < rangeCount; i++) {
165       if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
166          continue;
167       unreachable("Unsupported color clear.");
168    }
169 }
170 
171 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)172 v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
173                                VkImage _image,
174                                VkImageLayout imageLayout,
175                                const VkClearDepthStencilValue *pDepthStencil,
176                                uint32_t rangeCount,
177                                const VkImageSubresourceRange *pRanges)
178 {
179    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
180    V3DV_FROM_HANDLE(v3dv_image, image, _image);
181 
182    const VkClearValue clear_value = {
183       .depthStencil = *pDepthStencil,
184    };
185 
186    for (uint32_t i = 0; i < rangeCount; i++) {
187       if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
188          continue;
189       unreachable("Unsupported depth/stencil clear.");
190    }
191 }
192 
193 static void
destroy_color_clear_pipeline(VkDevice _device,uint64_t pipeline,VkAllocationCallbacks * alloc)194 destroy_color_clear_pipeline(VkDevice _device,
195                              uint64_t pipeline,
196                              VkAllocationCallbacks *alloc)
197 {
198    struct v3dv_meta_color_clear_pipeline *p =
199       (struct v3dv_meta_color_clear_pipeline *) (uintptr_t) pipeline;
200    v3dv_DestroyPipeline(_device, p->pipeline, alloc);
201    if (p->cached)
202       v3dv_DestroyRenderPass(_device, p->pass, alloc);
203    vk_free(alloc, p);
204 }
205 
206 static void
destroy_depth_clear_pipeline(VkDevice _device,struct v3dv_meta_depth_clear_pipeline * p,VkAllocationCallbacks * alloc)207 destroy_depth_clear_pipeline(VkDevice _device,
208                              struct v3dv_meta_depth_clear_pipeline *p,
209                              VkAllocationCallbacks *alloc)
210 {
211    v3dv_DestroyPipeline(_device, p->pipeline, alloc);
212    vk_free(alloc, p);
213 }
214 
215 static VkResult
create_color_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)216 create_color_clear_pipeline_layout(struct v3dv_device *device,
217                                    VkPipelineLayout *pipeline_layout)
218 {
219    /* FIXME: this is abusing a bit the API, since not all of our clear
220     * pipelines have a geometry shader. We could create 2 different pipeline
221     * layouts, but this works for us for now.
222     */
223    VkPushConstantRange ranges[2] = {
224       { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 },
225       { VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4 },
226    };
227 
228    VkPipelineLayoutCreateInfo info = {
229       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
230       .setLayoutCount = 0,
231       .pushConstantRangeCount = 2,
232       .pPushConstantRanges = ranges,
233    };
234 
235    return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
236                                     &info, &device->vk.alloc, pipeline_layout);
237 }
238 
239 static VkResult
create_depth_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)240 create_depth_clear_pipeline_layout(struct v3dv_device *device,
241                                    VkPipelineLayout *pipeline_layout)
242 {
243    /* FIXME: this is abusing a bit the API, since not all of our clear
244     * pipelines have a geometry shader. We could create 2 different pipeline
245     * layouts, but this works for us for now.
246     */
247    VkPushConstantRange ranges[2] = {
248       { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 },
249       { VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4 },
250    };
251 
252    VkPipelineLayoutCreateInfo info = {
253       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
254       .setLayoutCount = 0,
255       .pushConstantRangeCount = 2,
256       .pPushConstantRanges = ranges
257    };
258 
259    return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
260                                     &info, &device->vk.alloc, pipeline_layout);
261 }
262 
263 void
v3dv_meta_clear_init(struct v3dv_device * device)264 v3dv_meta_clear_init(struct v3dv_device *device)
265 {
266    device->meta.color_clear.cache =
267       _mesa_hash_table_create(NULL, u64_hash, u64_compare);
268 
269    create_color_clear_pipeline_layout(device,
270                                       &device->meta.color_clear.p_layout);
271 
272    device->meta.depth_clear.cache =
273       _mesa_hash_table_create(NULL, u64_hash, u64_compare);
274 
275    create_depth_clear_pipeline_layout(device,
276                                       &device->meta.depth_clear.p_layout);
277 }
278 
279 void
v3dv_meta_clear_finish(struct v3dv_device * device)280 v3dv_meta_clear_finish(struct v3dv_device *device)
281 {
282    VkDevice _device = v3dv_device_to_handle(device);
283 
284    hash_table_foreach(device->meta.color_clear.cache, entry) {
285       struct v3dv_meta_color_clear_pipeline *item = entry->data;
286       destroy_color_clear_pipeline(_device, (uintptr_t)item, &device->vk.alloc);
287    }
288    _mesa_hash_table_destroy(device->meta.color_clear.cache, NULL);
289 
290    if (device->meta.color_clear.p_layout) {
291       v3dv_DestroyPipelineLayout(_device, device->meta.color_clear.p_layout,
292                                  &device->vk.alloc);
293    }
294 
295    hash_table_foreach(device->meta.depth_clear.cache, entry) {
296       struct v3dv_meta_depth_clear_pipeline *item = entry->data;
297       destroy_depth_clear_pipeline(_device, item, &device->vk.alloc);
298    }
299    _mesa_hash_table_destroy(device->meta.depth_clear.cache, NULL);
300 
301    if (device->meta.depth_clear.p_layout) {
302       v3dv_DestroyPipelineLayout(_device, device->meta.depth_clear.p_layout,
303                                  &device->vk.alloc);
304    }
305 }
306 
307 static nir_ssa_def *
gen_rect_vertices(nir_builder * b)308 gen_rect_vertices(nir_builder *b)
309 {
310    nir_ssa_def *vertex_id = nir_load_vertex_id(b);
311 
312    /* vertex 0: -1.0, -1.0
313     * vertex 1: -1.0,  1.0
314     * vertex 2:  1.0, -1.0
315     * vertex 3:  1.0,  1.0
316     *
317     * so:
318     *
319     * channel 0 is vertex_id < 2 ? -1.0 :  1.0
320     * channel 1 is vertex id & 1 ?  1.0 : -1.0
321     */
322 
323    nir_ssa_def *one = nir_imm_int(b, 1);
324    nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));
325    nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
326 
327    nir_ssa_def *comp[4];
328    comp[0] = nir_bcsel(b, c0cmp,
329                        nir_imm_float(b, -1.0f),
330                        nir_imm_float(b, 1.0f));
331 
332    comp[1] = nir_bcsel(b, c1cmp,
333                        nir_imm_float(b, 1.0f),
334                        nir_imm_float(b, -1.0f));
335    comp[2] = nir_imm_float(b, 0.0f);
336    comp[3] = nir_imm_float(b, 1.0f);
337    return nir_vec(b, comp, 4);
338 }
339 
340 static nir_shader *
get_clear_rect_vs()341 get_clear_rect_vs()
342 {
343    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
344    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
345                                                   "meta clear vs");
346 
347    const struct glsl_type *vec4 = glsl_vec4_type();
348    nir_variable *vs_out_pos =
349       nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
350    vs_out_pos->data.location = VARYING_SLOT_POS;
351 
352    nir_ssa_def *pos = gen_rect_vertices(&b);
353    nir_store_var(&b, vs_out_pos, pos, 0xf);
354 
355    return b.shader;
356 }
357 
358 static nir_shader *
get_clear_rect_gs(uint32_t push_constant_layer_base)359 get_clear_rect_gs(uint32_t push_constant_layer_base)
360 {
361    /* FIXME: this creates a geometry shader that takes the index of a single
362     * layer to clear from push constants, so we need to emit a draw call for
363     * each layer that we want to clear. We could actually do better and have it
364     * take a range of layers and then emit one triangle per layer to clear,
365     * however, if we were to do this we would need to be careful not to exceed
366     * the maximum number of output vertices allowed in a geometry shader.
367     */
368    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
369    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
370                                                   "meta clear gs");
371    nir_shader *nir = b.shader;
372    nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
373    nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
374                                (1ull << VARYING_SLOT_LAYER);
375    nir->info.gs.input_primitive = GL_TRIANGLES;
376    nir->info.gs.output_primitive = GL_TRIANGLE_STRIP;
377    nir->info.gs.vertices_in = 3;
378    nir->info.gs.vertices_out = 3;
379    nir->info.gs.invocations = 1;
380    nir->info.gs.active_stream_mask = 0x1;
381 
382    /* in vec4 gl_Position[3] */
383    nir_variable *gs_in_pos =
384       nir_variable_create(b.shader, nir_var_shader_in,
385                           glsl_array_type(glsl_vec4_type(), 3, 0),
386                           "in_gl_Position");
387    gs_in_pos->data.location = VARYING_SLOT_POS;
388 
389    /* out vec4 gl_Position */
390    nir_variable *gs_out_pos =
391       nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
392                           "out_gl_Position");
393    gs_out_pos->data.location = VARYING_SLOT_POS;
394 
395    /* out float gl_Layer */
396    nir_variable *gs_out_layer =
397       nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
398                           "out_gl_Layer");
399    gs_out_layer->data.location = VARYING_SLOT_LAYER;
400 
401    /* Emit output triangle */
402    for (uint32_t i = 0; i < 3; i++) {
403       /* gl_Position from shader input */
404       nir_deref_instr *in_pos_i =
405          nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
406       nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
407 
408       /* gl_Layer from push constants */
409       nir_ssa_def *layer =
410          nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
411                                 .base = push_constant_layer_base, .range = 4);
412       nir_store_var(&b, gs_out_layer, layer, 0x1);
413 
414       nir_emit_vertex(&b, 0);
415    }
416 
417    nir_end_primitive(&b, 0);
418 
419    return nir;
420 }
421 
422 static nir_shader *
get_color_clear_rect_fs(uint32_t rt_idx,VkFormat format)423 get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format)
424 {
425    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
426    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
427                                                   "meta clear fs");
428 
429    enum pipe_format pformat = vk_format_to_pipe_format(format);
430    const struct glsl_type *fs_out_type =
431       util_format_is_float(pformat) ? glsl_vec4_type() : glsl_uvec4_type();
432 
433    nir_variable *fs_out_color =
434       nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
435    fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;
436 
437    nir_ssa_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
438    nir_store_var(&b, fs_out_color, color_load, 0xf);
439 
440    return b.shader;
441 }
442 
443 static nir_shader *
get_depth_clear_rect_fs()444 get_depth_clear_rect_fs()
445 {
446    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
447    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
448                                                   "meta depth clear fs");
449 
450    nir_variable *fs_out_depth =
451       nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
452                           "out_depth");
453    fs_out_depth->data.location = FRAG_RESULT_DEPTH;
454 
455    nir_ssa_def *depth_load =
456       nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
457 
458    nir_store_var(&b, fs_out_depth, depth_load, 0x1);
459 
460    return b.shader;
461 }
462 
463 static VkResult
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * gs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineLayout layout,VkPipeline * pipeline)464 create_pipeline(struct v3dv_device *device,
465                 struct v3dv_render_pass *pass,
466                 uint32_t subpass_idx,
467                 uint32_t samples,
468                 struct nir_shader *vs_nir,
469                 struct nir_shader *gs_nir,
470                 struct nir_shader *fs_nir,
471                 const VkPipelineVertexInputStateCreateInfo *vi_state,
472                 const VkPipelineDepthStencilStateCreateInfo *ds_state,
473                 const VkPipelineColorBlendStateCreateInfo *cb_state,
474                 const VkPipelineLayout layout,
475                 VkPipeline *pipeline)
476 {
477    VkPipelineShaderStageCreateInfo stages[3] = { 0 };
478    struct vk_shader_module vs_m;
479    struct vk_shader_module gs_m;
480    struct vk_shader_module fs_m;
481 
482    uint32_t stage_count = 0;
483    v3dv_shader_module_internal_init(device, &vs_m, vs_nir);
484    stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
485    stages[stage_count].stage = VK_SHADER_STAGE_VERTEX_BIT;
486    stages[stage_count].module = vk_shader_module_to_handle(&vs_m);
487    stages[stage_count].pName = "main";
488    stage_count++;
489 
490    if (gs_nir) {
491       v3dv_shader_module_internal_init(device, &gs_m, gs_nir);
492       stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
493       stages[stage_count].stage = VK_SHADER_STAGE_GEOMETRY_BIT;
494       stages[stage_count].module = vk_shader_module_to_handle(&gs_m);
495       stages[stage_count].pName = "main";
496       stage_count++;
497    }
498 
499    if (fs_nir) {
500       v3dv_shader_module_internal_init(device, &fs_m, fs_nir);
501       stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
502       stages[stage_count].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
503       stages[stage_count].module = vk_shader_module_to_handle(&fs_m);
504       stages[stage_count].pName = "main";
505       stage_count++;
506    }
507 
508    VkGraphicsPipelineCreateInfo info = {
509       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
510 
511       .stageCount = stage_count,
512       .pStages = stages,
513 
514       .pVertexInputState = vi_state,
515 
516       .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
517          .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
518          .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
519          .primitiveRestartEnable = false,
520       },
521 
522       .pViewportState = &(VkPipelineViewportStateCreateInfo) {
523          .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
524          .viewportCount = 1,
525          .scissorCount = 1,
526       },
527 
528       .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
529          .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
530          .rasterizerDiscardEnable = false,
531          .polygonMode = VK_POLYGON_MODE_FILL,
532          .cullMode = VK_CULL_MODE_NONE,
533          .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
534          .depthBiasEnable = false,
535       },
536 
537       .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
538          .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
539          .rasterizationSamples = samples,
540          .sampleShadingEnable = false,
541          .pSampleMask = NULL,
542          .alphaToCoverageEnable = false,
543          .alphaToOneEnable = false,
544       },
545 
546       .pDepthStencilState = ds_state,
547 
548       .pColorBlendState = cb_state,
549 
550       /* The meta clear pipeline declares all state as dynamic.
551        * As a consequence, vkCmdBindPipeline writes no dynamic state
552        * to the cmd buffer. Therefore, at the end of the meta clear,
553        * we need only restore dynamic state that was vkCmdSet.
554        */
555       .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
556          .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
557          .dynamicStateCount = 6,
558          .pDynamicStates = (VkDynamicState[]) {
559             VK_DYNAMIC_STATE_VIEWPORT,
560             VK_DYNAMIC_STATE_SCISSOR,
561             VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
562             VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
563             VK_DYNAMIC_STATE_STENCIL_REFERENCE,
564             VK_DYNAMIC_STATE_BLEND_CONSTANTS,
565             VK_DYNAMIC_STATE_DEPTH_BIAS,
566             VK_DYNAMIC_STATE_LINE_WIDTH,
567          },
568       },
569 
570       .flags = 0,
571       .layout = layout,
572       .renderPass = v3dv_render_pass_to_handle(pass),
573       .subpass = subpass_idx,
574    };
575 
576    VkResult result =
577       v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
578                                    VK_NULL_HANDLE,
579                                    1, &info,
580                                    &device->vk.alloc,
581                                    pipeline);
582 
583    ralloc_free(vs_nir);
584    ralloc_free(fs_nir);
585 
586    return result;
587 }
588 
589 static VkResult
create_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,VkFormat format,uint32_t samples,uint32_t components,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)590 create_color_clear_pipeline(struct v3dv_device *device,
591                             struct v3dv_render_pass *pass,
592                             uint32_t subpass_idx,
593                             uint32_t rt_idx,
594                             VkFormat format,
595                             uint32_t samples,
596                             uint32_t components,
597                             bool is_layered,
598                             VkPipelineLayout pipeline_layout,
599                             VkPipeline *pipeline)
600 {
601    nir_shader *vs_nir = get_clear_rect_vs();
602    nir_shader *fs_nir = get_color_clear_rect_fs(rt_idx, format);
603    nir_shader *gs_nir = is_layered ? get_clear_rect_gs(16) : NULL;
604 
605    const VkPipelineVertexInputStateCreateInfo vi_state = {
606       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
607       .vertexBindingDescriptionCount = 0,
608       .vertexAttributeDescriptionCount = 0,
609    };
610 
611    const VkPipelineDepthStencilStateCreateInfo ds_state = {
612       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
613       .depthTestEnable = false,
614       .depthWriteEnable = false,
615       .depthBoundsTestEnable = false,
616       .stencilTestEnable = false,
617    };
618 
619    assert(subpass_idx < pass->subpass_count);
620    const uint32_t color_count = pass->subpasses[subpass_idx].color_count;
621    assert(rt_idx < color_count);
622 
623    VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS];
624    for (uint32_t i = 0; i < color_count; i++) {
625       blend_att_state[i] = (VkPipelineColorBlendAttachmentState) {
626          .blendEnable = false,
627          .colorWriteMask = i == rt_idx ? components : 0,
628       };
629    }
630 
631    const VkPipelineColorBlendStateCreateInfo cb_state = {
632       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
633       .logicOpEnable = false,
634       .attachmentCount = color_count,
635       .pAttachments = blend_att_state
636    };
637 
638    return create_pipeline(device,
639                           pass, subpass_idx,
640                           samples,
641                           vs_nir, gs_nir, fs_nir,
642                           &vi_state,
643                           &ds_state,
644                           &cb_state,
645                           pipeline_layout,
646                           pipeline);
647 }
648 
649 static VkResult
create_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)650 create_depth_clear_pipeline(struct v3dv_device *device,
651                             VkImageAspectFlags aspects,
652                             struct v3dv_render_pass *pass,
653                             uint32_t subpass_idx,
654                             uint32_t samples,
655                             bool is_layered,
656                             VkPipelineLayout pipeline_layout,
657                             VkPipeline *pipeline)
658 {
659    const bool has_depth = aspects & VK_IMAGE_ASPECT_DEPTH_BIT;
660    const bool has_stencil = aspects & VK_IMAGE_ASPECT_STENCIL_BIT;
661    assert(has_depth || has_stencil);
662 
663    nir_shader *vs_nir = get_clear_rect_vs();
664    nir_shader *fs_nir = has_depth ? get_depth_clear_rect_fs() : NULL;
665    nir_shader *gs_nir = is_layered ? get_clear_rect_gs(4) : NULL;
666 
667    const VkPipelineVertexInputStateCreateInfo vi_state = {
668       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
669       .vertexBindingDescriptionCount = 0,
670       .vertexAttributeDescriptionCount = 0,
671    };
672 
673    const VkPipelineDepthStencilStateCreateInfo ds_state = {
674       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
675       .depthTestEnable = has_depth,
676       .depthWriteEnable = has_depth,
677       .depthCompareOp = VK_COMPARE_OP_ALWAYS,
678       .depthBoundsTestEnable = false,
679       .stencilTestEnable = has_stencil,
680       .front = {
681          .passOp = VK_STENCIL_OP_REPLACE,
682          .compareOp = VK_COMPARE_OP_ALWAYS,
683          /* compareMask, writeMask and reference are dynamic state */
684       },
685       .back = { 0 },
686    };
687 
688    assert(subpass_idx < pass->subpass_count);
689    VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS] = { 0 };
690    const VkPipelineColorBlendStateCreateInfo cb_state = {
691       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
692       .logicOpEnable = false,
693       .attachmentCount = pass->subpasses[subpass_idx].color_count,
694       .pAttachments = blend_att_state,
695    };
696 
697    return create_pipeline(device,
698                           pass, subpass_idx,
699                           samples,
700                           vs_nir, gs_nir, fs_nir,
701                           &vi_state,
702                           &ds_state,
703                           &cb_state,
704                           pipeline_layout,
705                           pipeline);
706 }
707 
708 static VkResult
create_color_clear_render_pass(struct v3dv_device * device,uint32_t rt_idx,VkFormat format,uint32_t samples,VkRenderPass * pass)709 create_color_clear_render_pass(struct v3dv_device *device,
710                                uint32_t rt_idx,
711                                VkFormat format,
712                                uint32_t samples,
713                                VkRenderPass *pass)
714 {
715    VkAttachmentDescription att = {
716       .format = format,
717       .samples = samples,
718       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
719       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
720       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
721       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
722    };
723 
724    VkAttachmentReference att_ref = {
725       .attachment = rt_idx,
726       .layout = VK_IMAGE_LAYOUT_GENERAL,
727    };
728 
729    VkSubpassDescription subpass = {
730       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
731       .inputAttachmentCount = 0,
732       .colorAttachmentCount = 1,
733       .pColorAttachments = &att_ref,
734       .pResolveAttachments = NULL,
735       .pDepthStencilAttachment = NULL,
736       .preserveAttachmentCount = 0,
737       .pPreserveAttachments = NULL,
738    };
739 
740    VkRenderPassCreateInfo info = {
741       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
742       .attachmentCount = 1,
743       .pAttachments = &att,
744       .subpassCount = 1,
745       .pSubpasses = &subpass,
746       .dependencyCount = 0,
747       .pDependencies = NULL,
748    };
749 
750    return v3dv_CreateRenderPass(v3dv_device_to_handle(device),
751                                 &info, &device->vk.alloc, pass);
752 }
753 
754 static inline uint64_t
get_color_clear_pipeline_cache_key(uint32_t rt_idx,VkFormat format,uint32_t samples,uint32_t components,bool is_layered)755 get_color_clear_pipeline_cache_key(uint32_t rt_idx,
756                                    VkFormat format,
757                                    uint32_t samples,
758                                    uint32_t components,
759                                    bool is_layered)
760 {
761    assert(rt_idx < V3D_MAX_DRAW_BUFFERS);
762 
763    uint64_t key = 0;
764    uint32_t bit_offset = 0;
765 
766    key |= rt_idx;
767    bit_offset += 2;
768 
769    key |= ((uint64_t) format) << bit_offset;
770    bit_offset += 32;
771 
772    key |= ((uint64_t) samples) << bit_offset;
773    bit_offset += 4;
774 
775    key |= ((uint64_t) components) << bit_offset;
776    bit_offset += 4;
777 
778    key |= (is_layered ? 1ull : 0ull) << bit_offset;
779    bit_offset += 1;
780 
781    assert(bit_offset <= 64);
782    return key;
783 }
784 
785 static inline uint64_t
get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,VkFormat format,uint32_t samples,bool is_layered)786 get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,
787                                    VkFormat format,
788                                    uint32_t samples,
789                                    bool is_layered)
790 {
791    uint64_t key = 0;
792    uint32_t bit_offset = 0;
793 
794    key |= format;
795    bit_offset += 32;
796 
797    key |= ((uint64_t) samples) << bit_offset;
798    bit_offset += 4;
799 
800    const bool has_depth = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? 1 : 0;
801    key |= ((uint64_t) has_depth) << bit_offset;
802    bit_offset++;
803 
804    const bool has_stencil = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
805    key |= ((uint64_t) has_stencil) << bit_offset;
806    bit_offset++;;
807 
808    key |= (is_layered ? 1ull : 0ull) << bit_offset;
809    bit_offset += 1;
810 
811    assert(bit_offset <= 64);
812    return key;
813 }
814 
815 static VkResult
get_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,uint32_t attachment_idx,VkFormat format,uint32_t samples,uint32_t components,bool is_layered,struct v3dv_meta_color_clear_pipeline ** pipeline)816 get_color_clear_pipeline(struct v3dv_device *device,
817                          struct v3dv_render_pass *pass,
818                          uint32_t subpass_idx,
819                          uint32_t rt_idx,
820                          uint32_t attachment_idx,
821                          VkFormat format,
822                          uint32_t samples,
823                          uint32_t components,
824                          bool is_layered,
825                          struct v3dv_meta_color_clear_pipeline **pipeline)
826 {
827    assert(vk_format_is_color(format));
828 
829    VkResult result = VK_SUCCESS;
830 
831    /* If pass != NULL it means that we are emitting the clear as a draw call
832     * in the current pass bound by the application. In that case, we can't
833     * cache the pipeline, since it will be referencing that pass and the
834     * application could be destroying it at any point. Hopefully, the perf
835     * impact is not too big since we still have the device pipeline cache
836     * around and we won't end up re-compiling the clear shader.
837     *
838     * FIXME: alternatively, we could refcount (or maybe clone) the render pass
839     * provided by the application and include it in the pipeline key setup
840     * to make caching safe in this scenario, however, based on tests with
841     * vkQuake3, the fact that we are not caching here doesn't seem to have
842     * any significant impact in performance, so it might not be worth it.
843     */
844    const bool can_cache_pipeline = (pass == NULL);
845 
846    uint64_t key;
847    if (can_cache_pipeline) {
848       key = get_color_clear_pipeline_cache_key(rt_idx, format, samples,
849                                                components, is_layered);
850       mtx_lock(&device->meta.mtx);
851       struct hash_entry *entry =
852          _mesa_hash_table_search(device->meta.color_clear.cache, &key);
853       if (entry) {
854          mtx_unlock(&device->meta.mtx);
855          *pipeline = entry->data;
856          return VK_SUCCESS;
857       }
858    }
859 
860    *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
861                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
862 
863    if (*pipeline == NULL) {
864       result = VK_ERROR_OUT_OF_HOST_MEMORY;
865       goto fail;
866    }
867 
868    if (!pass) {
869       result = create_color_clear_render_pass(device,
870                                               rt_idx,
871                                               format,
872                                               samples,
873                                               &(*pipeline)->pass);
874       if (result != VK_SUCCESS)
875          goto fail;
876 
877       pass = v3dv_render_pass_from_handle((*pipeline)->pass);
878    } else {
879       (*pipeline)->pass = v3dv_render_pass_to_handle(pass);
880    }
881 
882    result = create_color_clear_pipeline(device,
883                                         pass,
884                                         subpass_idx,
885                                         rt_idx,
886                                         format,
887                                         samples,
888                                         components,
889                                         is_layered,
890                                         device->meta.color_clear.p_layout,
891                                         &(*pipeline)->pipeline);
892    if (result != VK_SUCCESS)
893       goto fail;
894 
895    if (can_cache_pipeline) {
896       (*pipeline)->key = key;
897       (*pipeline)->cached = true;
898       _mesa_hash_table_insert(device->meta.color_clear.cache,
899                               &(*pipeline)->key, *pipeline);
900 
901       mtx_unlock(&device->meta.mtx);
902    }
903 
904    return VK_SUCCESS;
905 
906 fail:
907    if (can_cache_pipeline)
908       mtx_unlock(&device->meta.mtx);
909 
910    VkDevice _device = v3dv_device_to_handle(device);
911    if (*pipeline) {
912       if ((*pipeline)->cached)
913          v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
914       if ((*pipeline)->pipeline)
915          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
916       vk_free(&device->vk.alloc, *pipeline);
917       *pipeline = NULL;
918    }
919 
920    return result;
921 }
922 
923 static VkResult
get_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t attachment_idx,bool is_layered,struct v3dv_meta_depth_clear_pipeline ** pipeline)924 get_depth_clear_pipeline(struct v3dv_device *device,
925                          VkImageAspectFlags aspects,
926                          struct v3dv_render_pass *pass,
927                          uint32_t subpass_idx,
928                          uint32_t attachment_idx,
929                          bool is_layered,
930                          struct v3dv_meta_depth_clear_pipeline **pipeline)
931 {
932    assert(subpass_idx < pass->subpass_count);
933    assert(attachment_idx != VK_ATTACHMENT_UNUSED);
934    assert(attachment_idx < pass->attachment_count);
935 
936    VkResult result = VK_SUCCESS;
937 
938    const uint32_t samples = pass->attachments[attachment_idx].desc.samples;
939    const VkFormat format = pass->attachments[attachment_idx].desc.format;
940    assert(vk_format_is_depth_or_stencil(format));
941 
942    const uint64_t key =
943       get_depth_clear_pipeline_cache_key(aspects, format, samples, is_layered);
944    mtx_lock(&device->meta.mtx);
945    struct hash_entry *entry =
946       _mesa_hash_table_search(device->meta.depth_clear.cache, &key);
947    if (entry) {
948       mtx_unlock(&device->meta.mtx);
949       *pipeline = entry->data;
950       return VK_SUCCESS;
951    }
952 
953    *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
954                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
955 
956    if (*pipeline == NULL) {
957       result = VK_ERROR_OUT_OF_HOST_MEMORY;
958       goto fail;
959    }
960 
961    result = create_depth_clear_pipeline(device,
962                                         aspects,
963                                         pass,
964                                         subpass_idx,
965                                         samples,
966                                         is_layered,
967                                         device->meta.depth_clear.p_layout,
968                                         &(*pipeline)->pipeline);
969    if (result != VK_SUCCESS)
970       goto fail;
971 
972    (*pipeline)->key = key;
973    _mesa_hash_table_insert(device->meta.depth_clear.cache,
974                            &(*pipeline)->key, *pipeline);
975 
976    mtx_unlock(&device->meta.mtx);
977    return VK_SUCCESS;
978 
979 fail:
980    mtx_unlock(&device->meta.mtx);
981 
982    VkDevice _device = v3dv_device_to_handle(device);
983    if (*pipeline) {
984       if ((*pipeline)->pipeline)
985          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
986       vk_free(&device->vk.alloc, *pipeline);
987       *pipeline = NULL;
988    }
989 
990    return result;
991 }
992 
993 /* Emits a scissored quad in the clear color */
994 static void
emit_subpass_color_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,uint32_t rt_idx,const VkClearColorValue * clear_color,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)995 emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
996                                struct v3dv_render_pass *pass,
997                                struct v3dv_subpass *subpass,
998                                uint32_t rt_idx,
999                                const VkClearColorValue *clear_color,
1000                                bool is_layered,
1001                                bool all_rects_same_layers,
1002                                uint32_t rect_count,
1003                                const VkClearRect *rects)
1004 {
1005    /* Skip if attachment is unused in the current subpass */
1006    assert(rt_idx < subpass->color_count);
1007    const uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;
1008    if (attachment_idx == VK_ATTACHMENT_UNUSED)
1009       return;
1010 
1011    /* Obtain a pipeline for this clear */
1012    assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
1013    const VkFormat format =
1014       cmd_buffer->state.pass->attachments[attachment_idx].desc.format;
1015    const VkFormat samples =
1016       cmd_buffer->state.pass->attachments[attachment_idx].desc.samples;
1017    const uint32_t components = VK_COLOR_COMPONENT_R_BIT |
1018                                VK_COLOR_COMPONENT_G_BIT |
1019                                VK_COLOR_COMPONENT_B_BIT |
1020                                VK_COLOR_COMPONENT_A_BIT;
1021    struct v3dv_meta_color_clear_pipeline *pipeline = NULL;
1022    VkResult result = get_color_clear_pipeline(cmd_buffer->device,
1023                                               pass,
1024                                               cmd_buffer->state.subpass_idx,
1025                                               rt_idx,
1026                                               attachment_idx,
1027                                               format,
1028                                               samples,
1029                                               components,
1030                                               is_layered,
1031                                               &pipeline);
1032    if (result != VK_SUCCESS) {
1033       if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1034          v3dv_flag_oom(cmd_buffer, NULL);
1035       return;
1036    }
1037    assert(pipeline && pipeline->pipeline);
1038 
1039    /* Emit clear rects */
1040    v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1041 
1042    VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1043    v3dv_CmdPushConstants(cmd_buffer_handle,
1044                          cmd_buffer->device->meta.depth_clear.p_layout,
1045                          VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
1046                          clear_color->float32);
1047 
1048    v3dv_CmdBindPipeline(cmd_buffer_handle,
1049                         VK_PIPELINE_BIND_POINT_GRAPHICS,
1050                         pipeline->pipeline);
1051 
1052    uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
1053 
1054    for (uint32_t i = 0; i < rect_count; i++) {
1055       const VkViewport viewport = {
1056          .x = rects[i].rect.offset.x,
1057          .y = rects[i].rect.offset.y,
1058          .width = rects[i].rect.extent.width,
1059          .height = rects[i].rect.extent.height,
1060          .minDepth = 0.0f,
1061          .maxDepth = 1.0f
1062       };
1063       v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1064       v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1065 
1066       if (is_layered) {
1067          for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1068               layer_offset++) {
1069             uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1070             v3dv_CmdPushConstants(cmd_buffer_handle,
1071                                   cmd_buffer->device->meta.depth_clear.p_layout,
1072                                   VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4, &layer);
1073             v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1074          }
1075       } else {
1076          assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1077          v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1078       }
1079    }
1080 
1081    /* Subpass pipelines can't be cached because they include a reference to the
1082     * render pass currently bound by the application, which means that we need
1083     * to destroy them manually here.
1084     */
1085    assert(!pipeline->cached);
1086    v3dv_cmd_buffer_add_private_obj(
1087       cmd_buffer, (uintptr_t)pipeline,
1088       (v3dv_cmd_buffer_private_obj_destroy_cb) destroy_color_clear_pipeline);
1089 
1090    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
1091 }
1092 
1093 /* Emits a scissored quad, clearing the depth aspect by writing to gl_FragDepth
1094  * and the stencil aspect by using stencil testing.
1095  */
1096 static void
emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,VkImageAspectFlags aspects,const VkClearDepthStencilValue * clear_ds,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)1097 emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1098                             struct v3dv_render_pass *pass,
1099                             struct v3dv_subpass *subpass,
1100                             VkImageAspectFlags aspects,
1101                             const VkClearDepthStencilValue *clear_ds,
1102                             bool is_layered,
1103                             bool all_rects_same_layers,
1104                             uint32_t rect_count,
1105                             const VkClearRect *rects)
1106 {
1107    /* Skip if attachment is unused in the current subpass */
1108    const uint32_t attachment_idx = subpass->ds_attachment.attachment;
1109    if (attachment_idx == VK_ATTACHMENT_UNUSED)
1110       return;
1111 
1112    /* Obtain a pipeline for this clear */
1113    assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
1114    struct v3dv_meta_depth_clear_pipeline *pipeline = NULL;
1115    VkResult result = get_depth_clear_pipeline(cmd_buffer->device,
1116                                               aspects,
1117                                               pass,
1118                                               cmd_buffer->state.subpass_idx,
1119                                               attachment_idx,
1120                                               is_layered,
1121                                               &pipeline);
1122    if (result != VK_SUCCESS) {
1123       if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1124          v3dv_flag_oom(cmd_buffer, NULL);
1125       return;
1126    }
1127    assert(pipeline && pipeline->pipeline);
1128 
1129    /* Emit clear rects */
1130    v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1131 
1132    VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1133    v3dv_CmdPushConstants(cmd_buffer_handle,
1134                          cmd_buffer->device->meta.depth_clear.p_layout,
1135                          VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
1136                          &clear_ds->depth);
1137 
1138    v3dv_CmdBindPipeline(cmd_buffer_handle,
1139                         VK_PIPELINE_BIND_POINT_GRAPHICS,
1140                         pipeline->pipeline);
1141 
1142    uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
1143    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1144       v3dv_CmdSetStencilReference(cmd_buffer_handle,
1145                                   VK_STENCIL_FACE_FRONT_AND_BACK,
1146                                   clear_ds->stencil);
1147       v3dv_CmdSetStencilWriteMask(cmd_buffer_handle,
1148                                   VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1149       v3dv_CmdSetStencilCompareMask(cmd_buffer_handle,
1150                                     VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1151       dynamic_states |= VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK |
1152                         VK_DYNAMIC_STATE_STENCIL_WRITE_MASK |
1153                         VK_DYNAMIC_STATE_STENCIL_REFERENCE;
1154    }
1155 
1156    for (uint32_t i = 0; i < rect_count; i++) {
1157       const VkViewport viewport = {
1158          .x = rects[i].rect.offset.x,
1159          .y = rects[i].rect.offset.y,
1160          .width = rects[i].rect.extent.width,
1161          .height = rects[i].rect.extent.height,
1162          .minDepth = 0.0f,
1163          .maxDepth = 1.0f
1164       };
1165       v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1166       v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1167       if (is_layered) {
1168          for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1169               layer_offset++) {
1170             uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1171             v3dv_CmdPushConstants(cmd_buffer_handle,
1172                                   cmd_buffer->device->meta.depth_clear.p_layout,
1173                                   VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4, &layer);
1174             v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1175          }
1176       } else {
1177          assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1178          v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1179       }
1180    }
1181 
1182    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
1183 }
1184 
1185 static void
gather_layering_info(uint32_t rect_count,const VkClearRect * rects,bool * is_layered,bool * all_rects_same_layers)1186 gather_layering_info(uint32_t rect_count, const VkClearRect *rects,
1187                      bool *is_layered, bool *all_rects_same_layers)
1188 {
1189    *all_rects_same_layers = true;
1190 
1191    uint32_t min_layer = rects[0].baseArrayLayer;
1192    uint32_t max_layer = rects[0].baseArrayLayer + rects[0].layerCount - 1;
1193    for (uint32_t i = 1; i < rect_count; i++) {
1194       if (rects[i].baseArrayLayer != rects[i - 1].baseArrayLayer ||
1195           rects[i].layerCount != rects[i - 1].layerCount) {
1196          *all_rects_same_layers = false;
1197          min_layer = MIN2(min_layer, rects[i].baseArrayLayer);
1198          max_layer = MAX2(max_layer, rects[i].baseArrayLayer +
1199                                      rects[i].layerCount - 1);
1200       }
1201    }
1202 
1203    *is_layered = !(min_layer == 0 && max_layer == 0);
1204 }
1205 
1206 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1207 v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
1208                          uint32_t attachmentCount,
1209                          const VkClearAttachment *pAttachments,
1210                          uint32_t rectCount,
1211                          const VkClearRect *pRects)
1212 {
1213    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1214 
1215    /* We can only clear attachments in the current subpass */
1216    assert(attachmentCount <= 5); /* 4 color + D/S */
1217 
1218    struct v3dv_render_pass *pass = cmd_buffer->state.pass;
1219 
1220    assert(cmd_buffer->state.subpass_idx < pass->subpass_count);
1221    struct v3dv_subpass *subpass =
1222       &cmd_buffer->state.pass->subpasses[cmd_buffer->state.subpass_idx];
1223 
1224    /* Emit a clear rect inside the current job for this subpass. For layered
1225     * framebuffers, we use a geometry shader to redirect clears to the
1226     * appropriate layers.
1227     */
1228    bool is_layered, all_rects_same_layers;
1229    gather_layering_info(rectCount, pRects, &is_layered, &all_rects_same_layers);
1230    for (uint32_t i = 0; i < attachmentCount; i++) {
1231       if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1232          emit_subpass_color_clear_rects(cmd_buffer, pass, subpass,
1233                                         pAttachments[i].colorAttachment,
1234                                         &pAttachments[i].clearValue.color,
1235                                         is_layered, all_rects_same_layers,
1236                                         rectCount, pRects);
1237       } else {
1238          emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass,
1239                                      pAttachments[i].aspectMask,
1240                                      &pAttachments[i].clearValue.depthStencil,
1241                                      is_layered, all_rects_same_layers,
1242                                      rectCount, pRects);
1243       }
1244    }
1245 }
1246