1 /*
2 * Copyright © 2020 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26
27 #include "compiler/nir/nir_builder.h"
28 #include "util/u_pack_color.h"
29
30 static void
get_hw_clear_color(struct v3dv_device * device,const VkClearColorValue * color,VkFormat fb_format,VkFormat image_format,uint32_t internal_type,uint32_t internal_bpp,uint32_t * hw_color)31 get_hw_clear_color(struct v3dv_device *device,
32 const VkClearColorValue *color,
33 VkFormat fb_format,
34 VkFormat image_format,
35 uint32_t internal_type,
36 uint32_t internal_bpp,
37 uint32_t *hw_color)
38 {
39 const uint32_t internal_size = 4 << internal_bpp;
40
41 /* If the image format doesn't match the framebuffer format, then we are
42 * trying to clear an unsupported tlb format using a compatible
43 * format for the framebuffer. In this case, we want to make sure that
44 * we pack the clear value according to the original format semantics,
45 * not the compatible format.
46 */
47 if (fb_format == image_format) {
48 v3dv_X(device, get_hw_clear_color)(color, internal_type, internal_size,
49 hw_color);
50 } else {
51 union util_color uc;
52 enum pipe_format pipe_image_format =
53 vk_format_to_pipe_format(image_format);
54 util_pack_color(color->float32, pipe_image_format, &uc);
55 memcpy(hw_color, uc.ui, internal_size);
56 }
57 }
58
59 /* Returns true if the implementation is able to handle the case, false
60 * otherwise.
61 */
62 static bool
clear_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,const VkClearValue * clear_value,const VkImageSubresourceRange * range)63 clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
64 struct v3dv_image *image,
65 const VkClearValue *clear_value,
66 const VkImageSubresourceRange *range)
67 {
68 const VkOffset3D origin = { 0, 0, 0 };
69 VkFormat fb_format;
70 if (!v3dv_meta_can_use_tlb(image, &origin, &fb_format))
71 return false;
72
73 uint32_t internal_type, internal_bpp;
74 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
75 (fb_format, range->aspectMask,
76 &internal_type, &internal_bpp);
77
78 union v3dv_clear_value hw_clear_value = { 0 };
79 if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
80 get_hw_clear_color(cmd_buffer->device, &clear_value->color, fb_format,
81 image->vk.format, internal_type, internal_bpp,
82 &hw_clear_value.color[0]);
83 } else {
84 assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
85 (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
86 hw_clear_value.z = clear_value->depthStencil.depth;
87 hw_clear_value.s = clear_value->depthStencil.stencil;
88 }
89
90 uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
91 uint32_t min_level = range->baseMipLevel;
92 uint32_t max_level = range->baseMipLevel + level_count;
93
94 /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
95 * Instead, we need to consider the full depth dimension of the image, which
96 * goes from 0 up to the level's depth extent.
97 */
98 uint32_t min_layer;
99 uint32_t max_layer;
100 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
101 min_layer = range->baseArrayLayer;
102 max_layer = range->baseArrayLayer +
103 vk_image_subresource_layer_count(&image->vk, range);
104 } else {
105 min_layer = 0;
106 max_layer = 0;
107 }
108
109 for (uint32_t level = min_level; level < max_level; level++) {
110 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
111 max_layer = u_minify(image->vk.extent.depth, level);
112
113 uint32_t width = u_minify(image->vk.extent.width, level);
114 uint32_t height = u_minify(image->vk.extent.height, level);
115
116 struct v3dv_job *job =
117 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
118
119 if (!job)
120 return true;
121
122 v3dv_job_start_frame(job, width, height, max_layer, false,
123 1, internal_bpp,
124 image->vk.samples > VK_SAMPLE_COUNT_1_BIT);
125
126 struct v3dv_meta_framebuffer framebuffer;
127 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
128 internal_type,
129 &job->frame_tiling);
130
131 v3dv_X(job->device, job_emit_binning_flush)(job);
132
133 /* If this triggers it is an application bug: the spec requires
134 * that any aspects to clear are present in the image.
135 */
136 assert(range->aspectMask & image->vk.aspects);
137
138 v3dv_X(job->device, meta_emit_clear_image_rcl)
139 (job, image, &framebuffer, &hw_clear_value,
140 range->aspectMask, min_layer, max_layer, level);
141
142 v3dv_cmd_buffer_finish_job(cmd_buffer);
143 }
144
145 return true;
146 }
147
148 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)149 v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
150 VkImage _image,
151 VkImageLayout imageLayout,
152 const VkClearColorValue *pColor,
153 uint32_t rangeCount,
154 const VkImageSubresourceRange *pRanges)
155 {
156 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
157 V3DV_FROM_HANDLE(v3dv_image, image, _image);
158
159 const VkClearValue clear_value = {
160 .color = *pColor,
161 };
162
163 for (uint32_t i = 0; i < rangeCount; i++) {
164 if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
165 continue;
166 unreachable("Unsupported color clear.");
167 }
168 }
169
170 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)171 v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
172 VkImage _image,
173 VkImageLayout imageLayout,
174 const VkClearDepthStencilValue *pDepthStencil,
175 uint32_t rangeCount,
176 const VkImageSubresourceRange *pRanges)
177 {
178 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
179 V3DV_FROM_HANDLE(v3dv_image, image, _image);
180
181 const VkClearValue clear_value = {
182 .depthStencil = *pDepthStencil,
183 };
184
185 for (uint32_t i = 0; i < rangeCount; i++) {
186 if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
187 continue;
188 unreachable("Unsupported depth/stencil clear.");
189 }
190 }
191
192 static void
destroy_color_clear_pipeline(VkDevice _device,uint64_t pipeline,VkAllocationCallbacks * alloc)193 destroy_color_clear_pipeline(VkDevice _device,
194 uint64_t pipeline,
195 VkAllocationCallbacks *alloc)
196 {
197 struct v3dv_meta_color_clear_pipeline *p =
198 (struct v3dv_meta_color_clear_pipeline *) (uintptr_t) pipeline;
199 v3dv_DestroyPipeline(_device, p->pipeline, alloc);
200 if (p->cached)
201 v3dv_DestroyRenderPass(_device, p->pass, alloc);
202 vk_free(alloc, p);
203 }
204
205 static void
destroy_depth_clear_pipeline(VkDevice _device,struct v3dv_meta_depth_clear_pipeline * p,VkAllocationCallbacks * alloc)206 destroy_depth_clear_pipeline(VkDevice _device,
207 struct v3dv_meta_depth_clear_pipeline *p,
208 VkAllocationCallbacks *alloc)
209 {
210 v3dv_DestroyPipeline(_device, p->pipeline, alloc);
211 vk_free(alloc, p);
212 }
213
214 static VkResult
create_color_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)215 create_color_clear_pipeline_layout(struct v3dv_device *device,
216 VkPipelineLayout *pipeline_layout)
217 {
218 /* FIXME: this is abusing a bit the API, since not all of our clear
219 * pipelines have a geometry shader. We could create 2 different pipeline
220 * layouts, but this works for us for now.
221 */
222 VkPushConstantRange ranges[2] = {
223 { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 },
224 { VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4 },
225 };
226
227 VkPipelineLayoutCreateInfo info = {
228 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
229 .setLayoutCount = 0,
230 .pushConstantRangeCount = 2,
231 .pPushConstantRanges = ranges,
232 };
233
234 return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
235 &info, &device->vk.alloc, pipeline_layout);
236 }
237
238 static VkResult
create_depth_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)239 create_depth_clear_pipeline_layout(struct v3dv_device *device,
240 VkPipelineLayout *pipeline_layout)
241 {
242 /* FIXME: this is abusing a bit the API, since not all of our clear
243 * pipelines have a geometry shader. We could create 2 different pipeline
244 * layouts, but this works for us for now.
245 */
246 VkPushConstantRange ranges[2] = {
247 { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 },
248 { VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4 },
249 };
250
251 VkPipelineLayoutCreateInfo info = {
252 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
253 .setLayoutCount = 0,
254 .pushConstantRangeCount = 2,
255 .pPushConstantRanges = ranges
256 };
257
258 return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
259 &info, &device->vk.alloc, pipeline_layout);
260 }
261
262 void
v3dv_meta_clear_init(struct v3dv_device * device)263 v3dv_meta_clear_init(struct v3dv_device *device)
264 {
265 device->meta.color_clear.cache =
266 _mesa_hash_table_create(NULL, u64_hash, u64_compare);
267
268 create_color_clear_pipeline_layout(device,
269 &device->meta.color_clear.p_layout);
270
271 device->meta.depth_clear.cache =
272 _mesa_hash_table_create(NULL, u64_hash, u64_compare);
273
274 create_depth_clear_pipeline_layout(device,
275 &device->meta.depth_clear.p_layout);
276 }
277
278 void
v3dv_meta_clear_finish(struct v3dv_device * device)279 v3dv_meta_clear_finish(struct v3dv_device *device)
280 {
281 VkDevice _device = v3dv_device_to_handle(device);
282
283 hash_table_foreach(device->meta.color_clear.cache, entry) {
284 struct v3dv_meta_color_clear_pipeline *item = entry->data;
285 destroy_color_clear_pipeline(_device, (uintptr_t)item, &device->vk.alloc);
286 }
287 _mesa_hash_table_destroy(device->meta.color_clear.cache, NULL);
288
289 if (device->meta.color_clear.p_layout) {
290 v3dv_DestroyPipelineLayout(_device, device->meta.color_clear.p_layout,
291 &device->vk.alloc);
292 }
293
294 hash_table_foreach(device->meta.depth_clear.cache, entry) {
295 struct v3dv_meta_depth_clear_pipeline *item = entry->data;
296 destroy_depth_clear_pipeline(_device, item, &device->vk.alloc);
297 }
298 _mesa_hash_table_destroy(device->meta.depth_clear.cache, NULL);
299
300 if (device->meta.depth_clear.p_layout) {
301 v3dv_DestroyPipelineLayout(_device, device->meta.depth_clear.p_layout,
302 &device->vk.alloc);
303 }
304 }
305
306 static nir_ssa_def *
gen_rect_vertices(nir_builder * b)307 gen_rect_vertices(nir_builder *b)
308 {
309 nir_ssa_def *vertex_id = nir_load_vertex_id(b);
310
311 /* vertex 0: -1.0, -1.0
312 * vertex 1: -1.0, 1.0
313 * vertex 2: 1.0, -1.0
314 * vertex 3: 1.0, 1.0
315 *
316 * so:
317 *
318 * channel 0 is vertex_id < 2 ? -1.0 : 1.0
319 * channel 1 is vertex id & 1 ? 1.0 : -1.0
320 */
321
322 nir_ssa_def *one = nir_imm_int(b, 1);
323 nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));
324 nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
325
326 nir_ssa_def *comp[4];
327 comp[0] = nir_bcsel(b, c0cmp,
328 nir_imm_float(b, -1.0f),
329 nir_imm_float(b, 1.0f));
330
331 comp[1] = nir_bcsel(b, c1cmp,
332 nir_imm_float(b, 1.0f),
333 nir_imm_float(b, -1.0f));
334 comp[2] = nir_imm_float(b, 0.0f);
335 comp[3] = nir_imm_float(b, 1.0f);
336 return nir_vec(b, comp, 4);
337 }
338
339 static nir_shader *
get_clear_rect_vs()340 get_clear_rect_vs()
341 {
342 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
343 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
344 "meta clear vs");
345
346 const struct glsl_type *vec4 = glsl_vec4_type();
347 nir_variable *vs_out_pos =
348 nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
349 vs_out_pos->data.location = VARYING_SLOT_POS;
350
351 nir_ssa_def *pos = gen_rect_vertices(&b);
352 nir_store_var(&b, vs_out_pos, pos, 0xf);
353
354 return b.shader;
355 }
356
357 static nir_shader *
get_clear_rect_gs(uint32_t push_constant_layer_base)358 get_clear_rect_gs(uint32_t push_constant_layer_base)
359 {
360 /* FIXME: this creates a geometry shader that takes the index of a single
361 * layer to clear from push constants, so we need to emit a draw call for
362 * each layer that we want to clear. We could actually do better and have it
363 * take a range of layers and then emit one triangle per layer to clear,
364 * however, if we were to do this we would need to be careful not to exceed
365 * the maximum number of output vertices allowed in a geometry shader.
366 */
367 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
368 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
369 "meta clear gs");
370 nir_shader *nir = b.shader;
371 nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
372 nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
373 (1ull << VARYING_SLOT_LAYER);
374 nir->info.gs.input_primitive = SHADER_PRIM_TRIANGLES;
375 nir->info.gs.output_primitive = SHADER_PRIM_TRIANGLE_STRIP;
376 nir->info.gs.vertices_in = 3;
377 nir->info.gs.vertices_out = 3;
378 nir->info.gs.invocations = 1;
379 nir->info.gs.active_stream_mask = 0x1;
380
381 /* in vec4 gl_Position[3] */
382 nir_variable *gs_in_pos =
383 nir_variable_create(b.shader, nir_var_shader_in,
384 glsl_array_type(glsl_vec4_type(), 3, 0),
385 "in_gl_Position");
386 gs_in_pos->data.location = VARYING_SLOT_POS;
387
388 /* out vec4 gl_Position */
389 nir_variable *gs_out_pos =
390 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
391 "out_gl_Position");
392 gs_out_pos->data.location = VARYING_SLOT_POS;
393
394 /* out float gl_Layer */
395 nir_variable *gs_out_layer =
396 nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
397 "out_gl_Layer");
398 gs_out_layer->data.location = VARYING_SLOT_LAYER;
399
400 /* Emit output triangle */
401 for (uint32_t i = 0; i < 3; i++) {
402 /* gl_Position from shader input */
403 nir_deref_instr *in_pos_i =
404 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
405 nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
406
407 /* gl_Layer from push constants */
408 nir_ssa_def *layer =
409 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
410 .base = push_constant_layer_base, .range = 4);
411 nir_store_var(&b, gs_out_layer, layer, 0x1);
412
413 nir_emit_vertex(&b, 0);
414 }
415
416 nir_end_primitive(&b, 0);
417
418 return nir;
419 }
420
421 static nir_shader *
get_color_clear_rect_fs(uint32_t rt_idx,VkFormat format)422 get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format)
423 {
424 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
425 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
426 "meta clear fs");
427
428 enum pipe_format pformat = vk_format_to_pipe_format(format);
429 const struct glsl_type *fs_out_type =
430 util_format_is_float(pformat) ? glsl_vec4_type() : glsl_uvec4_type();
431
432 nir_variable *fs_out_color =
433 nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
434 fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;
435
436 nir_ssa_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
437 nir_store_var(&b, fs_out_color, color_load, 0xf);
438
439 return b.shader;
440 }
441
442 static nir_shader *
get_depth_clear_rect_fs()443 get_depth_clear_rect_fs()
444 {
445 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
446 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
447 "meta depth clear fs");
448
449 nir_variable *fs_out_depth =
450 nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
451 "out_depth");
452 fs_out_depth->data.location = FRAG_RESULT_DEPTH;
453
454 nir_ssa_def *depth_load =
455 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
456
457 nir_store_var(&b, fs_out_depth, depth_load, 0x1);
458
459 return b.shader;
460 }
461
462 static VkResult
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * gs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineLayout layout,VkPipeline * pipeline)463 create_pipeline(struct v3dv_device *device,
464 struct v3dv_render_pass *pass,
465 uint32_t subpass_idx,
466 uint32_t samples,
467 struct nir_shader *vs_nir,
468 struct nir_shader *gs_nir,
469 struct nir_shader *fs_nir,
470 const VkPipelineVertexInputStateCreateInfo *vi_state,
471 const VkPipelineDepthStencilStateCreateInfo *ds_state,
472 const VkPipelineColorBlendStateCreateInfo *cb_state,
473 const VkPipelineLayout layout,
474 VkPipeline *pipeline)
475 {
476 VkPipelineShaderStageCreateInfo stages[3] = { 0 };
477 struct vk_shader_module vs_m;
478 struct vk_shader_module gs_m;
479 struct vk_shader_module fs_m;
480
481 uint32_t stage_count = 0;
482 v3dv_shader_module_internal_init(device, &vs_m, vs_nir);
483 stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
484 stages[stage_count].stage = VK_SHADER_STAGE_VERTEX_BIT;
485 stages[stage_count].module = vk_shader_module_to_handle(&vs_m);
486 stages[stage_count].pName = "main";
487 stage_count++;
488
489 if (gs_nir) {
490 v3dv_shader_module_internal_init(device, &gs_m, gs_nir);
491 stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
492 stages[stage_count].stage = VK_SHADER_STAGE_GEOMETRY_BIT;
493 stages[stage_count].module = vk_shader_module_to_handle(&gs_m);
494 stages[stage_count].pName = "main";
495 stage_count++;
496 }
497
498 if (fs_nir) {
499 v3dv_shader_module_internal_init(device, &fs_m, fs_nir);
500 stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
501 stages[stage_count].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
502 stages[stage_count].module = vk_shader_module_to_handle(&fs_m);
503 stages[stage_count].pName = "main";
504 stage_count++;
505 }
506
507 VkGraphicsPipelineCreateInfo info = {
508 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
509
510 .stageCount = stage_count,
511 .pStages = stages,
512
513 .pVertexInputState = vi_state,
514
515 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
516 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
517 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
518 .primitiveRestartEnable = false,
519 },
520
521 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
522 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
523 .viewportCount = 1,
524 .scissorCount = 1,
525 },
526
527 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
528 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
529 .rasterizerDiscardEnable = false,
530 .polygonMode = VK_POLYGON_MODE_FILL,
531 .cullMode = VK_CULL_MODE_NONE,
532 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
533 .depthBiasEnable = false,
534 },
535
536 .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
537 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
538 .rasterizationSamples = samples,
539 .sampleShadingEnable = false,
540 .pSampleMask = NULL,
541 .alphaToCoverageEnable = false,
542 .alphaToOneEnable = false,
543 },
544
545 .pDepthStencilState = ds_state,
546
547 .pColorBlendState = cb_state,
548
549 /* The meta clear pipeline declares all state as dynamic.
550 * As a consequence, vkCmdBindPipeline writes no dynamic state
551 * to the cmd buffer. Therefore, at the end of the meta clear,
552 * we need only restore dynamic state that was vkCmdSet.
553 */
554 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
555 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
556 .dynamicStateCount = 6,
557 .pDynamicStates = (VkDynamicState[]) {
558 VK_DYNAMIC_STATE_VIEWPORT,
559 VK_DYNAMIC_STATE_SCISSOR,
560 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
561 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
562 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
563 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
564 VK_DYNAMIC_STATE_DEPTH_BIAS,
565 VK_DYNAMIC_STATE_LINE_WIDTH,
566 },
567 },
568
569 .flags = 0,
570 .layout = layout,
571 .renderPass = v3dv_render_pass_to_handle(pass),
572 .subpass = subpass_idx,
573 };
574
575 VkResult result =
576 v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
577 VK_NULL_HANDLE,
578 1, &info,
579 &device->vk.alloc,
580 pipeline);
581
582 ralloc_free(vs_nir);
583 ralloc_free(fs_nir);
584
585 return result;
586 }
587
588 static VkResult
create_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,VkFormat format,uint32_t samples,uint32_t components,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)589 create_color_clear_pipeline(struct v3dv_device *device,
590 struct v3dv_render_pass *pass,
591 uint32_t subpass_idx,
592 uint32_t rt_idx,
593 VkFormat format,
594 uint32_t samples,
595 uint32_t components,
596 bool is_layered,
597 VkPipelineLayout pipeline_layout,
598 VkPipeline *pipeline)
599 {
600 nir_shader *vs_nir = get_clear_rect_vs();
601 nir_shader *fs_nir = get_color_clear_rect_fs(rt_idx, format);
602 nir_shader *gs_nir = is_layered ? get_clear_rect_gs(16) : NULL;
603
604 const VkPipelineVertexInputStateCreateInfo vi_state = {
605 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
606 .vertexBindingDescriptionCount = 0,
607 .vertexAttributeDescriptionCount = 0,
608 };
609
610 const VkPipelineDepthStencilStateCreateInfo ds_state = {
611 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
612 .depthTestEnable = false,
613 .depthWriteEnable = false,
614 .depthBoundsTestEnable = false,
615 .stencilTestEnable = false,
616 };
617
618 assert(subpass_idx < pass->subpass_count);
619 const uint32_t color_count = pass->subpasses[subpass_idx].color_count;
620 assert(rt_idx < color_count);
621
622 VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS];
623 for (uint32_t i = 0; i < color_count; i++) {
624 blend_att_state[i] = (VkPipelineColorBlendAttachmentState) {
625 .blendEnable = false,
626 .colorWriteMask = i == rt_idx ? components : 0,
627 };
628 }
629
630 const VkPipelineColorBlendStateCreateInfo cb_state = {
631 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
632 .logicOpEnable = false,
633 .attachmentCount = color_count,
634 .pAttachments = blend_att_state
635 };
636
637 return create_pipeline(device,
638 pass, subpass_idx,
639 samples,
640 vs_nir, gs_nir, fs_nir,
641 &vi_state,
642 &ds_state,
643 &cb_state,
644 pipeline_layout,
645 pipeline);
646 }
647
648 static VkResult
create_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)649 create_depth_clear_pipeline(struct v3dv_device *device,
650 VkImageAspectFlags aspects,
651 struct v3dv_render_pass *pass,
652 uint32_t subpass_idx,
653 uint32_t samples,
654 bool is_layered,
655 VkPipelineLayout pipeline_layout,
656 VkPipeline *pipeline)
657 {
658 const bool has_depth = aspects & VK_IMAGE_ASPECT_DEPTH_BIT;
659 const bool has_stencil = aspects & VK_IMAGE_ASPECT_STENCIL_BIT;
660 assert(has_depth || has_stencil);
661
662 nir_shader *vs_nir = get_clear_rect_vs();
663 nir_shader *fs_nir = has_depth ? get_depth_clear_rect_fs() : NULL;
664 nir_shader *gs_nir = is_layered ? get_clear_rect_gs(4) : NULL;
665
666 const VkPipelineVertexInputStateCreateInfo vi_state = {
667 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
668 .vertexBindingDescriptionCount = 0,
669 .vertexAttributeDescriptionCount = 0,
670 };
671
672 const VkPipelineDepthStencilStateCreateInfo ds_state = {
673 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
674 .depthTestEnable = has_depth,
675 .depthWriteEnable = has_depth,
676 .depthCompareOp = VK_COMPARE_OP_ALWAYS,
677 .depthBoundsTestEnable = false,
678 .stencilTestEnable = has_stencil,
679 .front = {
680 .passOp = VK_STENCIL_OP_REPLACE,
681 .compareOp = VK_COMPARE_OP_ALWAYS,
682 /* compareMask, writeMask and reference are dynamic state */
683 },
684 .back = { 0 },
685 };
686
687 assert(subpass_idx < pass->subpass_count);
688 VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS] = { 0 };
689 const VkPipelineColorBlendStateCreateInfo cb_state = {
690 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
691 .logicOpEnable = false,
692 .attachmentCount = pass->subpasses[subpass_idx].color_count,
693 .pAttachments = blend_att_state,
694 };
695
696 return create_pipeline(device,
697 pass, subpass_idx,
698 samples,
699 vs_nir, gs_nir, fs_nir,
700 &vi_state,
701 &ds_state,
702 &cb_state,
703 pipeline_layout,
704 pipeline);
705 }
706
707 static VkResult
create_color_clear_render_pass(struct v3dv_device * device,uint32_t rt_idx,VkFormat format,uint32_t samples,VkRenderPass * pass)708 create_color_clear_render_pass(struct v3dv_device *device,
709 uint32_t rt_idx,
710 VkFormat format,
711 uint32_t samples,
712 VkRenderPass *pass)
713 {
714 VkAttachmentDescription2 att = {
715 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
716 .format = format,
717 .samples = samples,
718 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
719 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
720 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
721 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
722 };
723
724 VkAttachmentReference2 att_ref = {
725 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
726 .attachment = rt_idx,
727 .layout = VK_IMAGE_LAYOUT_GENERAL,
728 };
729
730 VkSubpassDescription2 subpass = {
731 .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
732 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
733 .inputAttachmentCount = 0,
734 .colorAttachmentCount = 1,
735 .pColorAttachments = &att_ref,
736 .pResolveAttachments = NULL,
737 .pDepthStencilAttachment = NULL,
738 .preserveAttachmentCount = 0,
739 .pPreserveAttachments = NULL,
740 };
741
742 VkRenderPassCreateInfo2 info = {
743 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
744 .attachmentCount = 1,
745 .pAttachments = &att,
746 .subpassCount = 1,
747 .pSubpasses = &subpass,
748 .dependencyCount = 0,
749 .pDependencies = NULL,
750 };
751
752 return v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
753 &info, &device->vk.alloc, pass);
754 }
755
756 static inline uint64_t
get_color_clear_pipeline_cache_key(uint32_t rt_idx,VkFormat format,uint32_t samples,uint32_t components,bool is_layered)757 get_color_clear_pipeline_cache_key(uint32_t rt_idx,
758 VkFormat format,
759 uint32_t samples,
760 uint32_t components,
761 bool is_layered)
762 {
763 assert(rt_idx < V3D_MAX_DRAW_BUFFERS);
764
765 uint64_t key = 0;
766 uint32_t bit_offset = 0;
767
768 key |= rt_idx;
769 bit_offset += 2;
770
771 key |= ((uint64_t) format) << bit_offset;
772 bit_offset += 32;
773
774 key |= ((uint64_t) samples) << bit_offset;
775 bit_offset += 4;
776
777 key |= ((uint64_t) components) << bit_offset;
778 bit_offset += 4;
779
780 key |= (is_layered ? 1ull : 0ull) << bit_offset;
781 bit_offset += 1;
782
783 assert(bit_offset <= 64);
784 return key;
785 }
786
787 static inline uint64_t
get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,VkFormat format,uint32_t samples,bool is_layered)788 get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,
789 VkFormat format,
790 uint32_t samples,
791 bool is_layered)
792 {
793 uint64_t key = 0;
794 uint32_t bit_offset = 0;
795
796 key |= format;
797 bit_offset += 32;
798
799 key |= ((uint64_t) samples) << bit_offset;
800 bit_offset += 4;
801
802 const bool has_depth = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? 1 : 0;
803 key |= ((uint64_t) has_depth) << bit_offset;
804 bit_offset++;
805
806 const bool has_stencil = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
807 key |= ((uint64_t) has_stencil) << bit_offset;
808 bit_offset++;;
809
810 key |= (is_layered ? 1ull : 0ull) << bit_offset;
811 bit_offset += 1;
812
813 assert(bit_offset <= 64);
814 return key;
815 }
816
817 static VkResult
get_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,uint32_t attachment_idx,VkFormat format,uint32_t samples,uint32_t components,bool is_layered,struct v3dv_meta_color_clear_pipeline ** pipeline)818 get_color_clear_pipeline(struct v3dv_device *device,
819 struct v3dv_render_pass *pass,
820 uint32_t subpass_idx,
821 uint32_t rt_idx,
822 uint32_t attachment_idx,
823 VkFormat format,
824 uint32_t samples,
825 uint32_t components,
826 bool is_layered,
827 struct v3dv_meta_color_clear_pipeline **pipeline)
828 {
829 assert(vk_format_is_color(format));
830
831 VkResult result = VK_SUCCESS;
832
833 /* If pass != NULL it means that we are emitting the clear as a draw call
834 * in the current pass bound by the application. In that case, we can't
835 * cache the pipeline, since it will be referencing that pass and the
836 * application could be destroying it at any point. Hopefully, the perf
837 * impact is not too big since we still have the device pipeline cache
838 * around and we won't end up re-compiling the clear shader.
839 *
840 * FIXME: alternatively, we could refcount (or maybe clone) the render pass
841 * provided by the application and include it in the pipeline key setup
842 * to make caching safe in this scenario, however, based on tests with
843 * vkQuake3, the fact that we are not caching here doesn't seem to have
844 * any significant impact in performance, so it might not be worth it.
845 */
846 const bool can_cache_pipeline = (pass == NULL);
847
848 uint64_t key;
849 if (can_cache_pipeline) {
850 key = get_color_clear_pipeline_cache_key(rt_idx, format, samples,
851 components, is_layered);
852 mtx_lock(&device->meta.mtx);
853 struct hash_entry *entry =
854 _mesa_hash_table_search(device->meta.color_clear.cache, &key);
855 if (entry) {
856 mtx_unlock(&device->meta.mtx);
857 *pipeline = entry->data;
858 return VK_SUCCESS;
859 }
860 }
861
862 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
863 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
864
865 if (*pipeline == NULL) {
866 result = VK_ERROR_OUT_OF_HOST_MEMORY;
867 goto fail;
868 }
869
870 if (!pass) {
871 result = create_color_clear_render_pass(device,
872 rt_idx,
873 format,
874 samples,
875 &(*pipeline)->pass);
876 if (result != VK_SUCCESS)
877 goto fail;
878
879 pass = v3dv_render_pass_from_handle((*pipeline)->pass);
880 } else {
881 (*pipeline)->pass = v3dv_render_pass_to_handle(pass);
882 }
883
884 result = create_color_clear_pipeline(device,
885 pass,
886 subpass_idx,
887 rt_idx,
888 format,
889 samples,
890 components,
891 is_layered,
892 device->meta.color_clear.p_layout,
893 &(*pipeline)->pipeline);
894 if (result != VK_SUCCESS)
895 goto fail;
896
897 if (can_cache_pipeline) {
898 (*pipeline)->key = key;
899 (*pipeline)->cached = true;
900 _mesa_hash_table_insert(device->meta.color_clear.cache,
901 &(*pipeline)->key, *pipeline);
902
903 mtx_unlock(&device->meta.mtx);
904 }
905
906 return VK_SUCCESS;
907
908 fail:
909 if (can_cache_pipeline)
910 mtx_unlock(&device->meta.mtx);
911
912 VkDevice _device = v3dv_device_to_handle(device);
913 if (*pipeline) {
914 if ((*pipeline)->cached)
915 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
916 if ((*pipeline)->pipeline)
917 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
918 vk_free(&device->vk.alloc, *pipeline);
919 *pipeline = NULL;
920 }
921
922 return result;
923 }
924
925 static VkResult
get_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t attachment_idx,bool is_layered,struct v3dv_meta_depth_clear_pipeline ** pipeline)926 get_depth_clear_pipeline(struct v3dv_device *device,
927 VkImageAspectFlags aspects,
928 struct v3dv_render_pass *pass,
929 uint32_t subpass_idx,
930 uint32_t attachment_idx,
931 bool is_layered,
932 struct v3dv_meta_depth_clear_pipeline **pipeline)
933 {
934 assert(subpass_idx < pass->subpass_count);
935 assert(attachment_idx != VK_ATTACHMENT_UNUSED);
936 assert(attachment_idx < pass->attachment_count);
937
938 VkResult result = VK_SUCCESS;
939
940 const uint32_t samples = pass->attachments[attachment_idx].desc.samples;
941 const VkFormat format = pass->attachments[attachment_idx].desc.format;
942 assert(vk_format_is_depth_or_stencil(format));
943
944 const uint64_t key =
945 get_depth_clear_pipeline_cache_key(aspects, format, samples, is_layered);
946 mtx_lock(&device->meta.mtx);
947 struct hash_entry *entry =
948 _mesa_hash_table_search(device->meta.depth_clear.cache, &key);
949 if (entry) {
950 mtx_unlock(&device->meta.mtx);
951 *pipeline = entry->data;
952 return VK_SUCCESS;
953 }
954
955 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
956 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
957
958 if (*pipeline == NULL) {
959 result = VK_ERROR_OUT_OF_HOST_MEMORY;
960 goto fail;
961 }
962
963 result = create_depth_clear_pipeline(device,
964 aspects,
965 pass,
966 subpass_idx,
967 samples,
968 is_layered,
969 device->meta.depth_clear.p_layout,
970 &(*pipeline)->pipeline);
971 if (result != VK_SUCCESS)
972 goto fail;
973
974 (*pipeline)->key = key;
975 _mesa_hash_table_insert(device->meta.depth_clear.cache,
976 &(*pipeline)->key, *pipeline);
977
978 mtx_unlock(&device->meta.mtx);
979 return VK_SUCCESS;
980
981 fail:
982 mtx_unlock(&device->meta.mtx);
983
984 VkDevice _device = v3dv_device_to_handle(device);
985 if (*pipeline) {
986 if ((*pipeline)->pipeline)
987 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
988 vk_free(&device->vk.alloc, *pipeline);
989 *pipeline = NULL;
990 }
991
992 return result;
993 }
994
995 /* Emits a scissored quad in the clear color */
996 static void
emit_subpass_color_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,uint32_t rt_idx,const VkClearColorValue * clear_color,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)997 emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
998 struct v3dv_render_pass *pass,
999 struct v3dv_subpass *subpass,
1000 uint32_t rt_idx,
1001 const VkClearColorValue *clear_color,
1002 bool is_layered,
1003 bool all_rects_same_layers,
1004 uint32_t rect_count,
1005 const VkClearRect *rects)
1006 {
1007 /* Skip if attachment is unused in the current subpass */
1008 assert(rt_idx < subpass->color_count);
1009 const uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;
1010 if (attachment_idx == VK_ATTACHMENT_UNUSED)
1011 return;
1012
1013 /* Obtain a pipeline for this clear */
1014 assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
1015 const VkFormat format =
1016 cmd_buffer->state.pass->attachments[attachment_idx].desc.format;
1017 const VkFormat samples =
1018 cmd_buffer->state.pass->attachments[attachment_idx].desc.samples;
1019 const uint32_t components = VK_COLOR_COMPONENT_R_BIT |
1020 VK_COLOR_COMPONENT_G_BIT |
1021 VK_COLOR_COMPONENT_B_BIT |
1022 VK_COLOR_COMPONENT_A_BIT;
1023 struct v3dv_meta_color_clear_pipeline *pipeline = NULL;
1024 VkResult result = get_color_clear_pipeline(cmd_buffer->device,
1025 pass,
1026 cmd_buffer->state.subpass_idx,
1027 rt_idx,
1028 attachment_idx,
1029 format,
1030 samples,
1031 components,
1032 is_layered,
1033 &pipeline);
1034 if (result != VK_SUCCESS) {
1035 if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1036 v3dv_flag_oom(cmd_buffer, NULL);
1037 return;
1038 }
1039 assert(pipeline && pipeline->pipeline);
1040
1041 /* Emit clear rects */
1042 v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1043
1044 VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1045 v3dv_CmdPushConstants(cmd_buffer_handle,
1046 cmd_buffer->device->meta.depth_clear.p_layout,
1047 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
1048 clear_color->float32);
1049
1050 v3dv_CmdBindPipeline(cmd_buffer_handle,
1051 VK_PIPELINE_BIND_POINT_GRAPHICS,
1052 pipeline->pipeline);
1053
1054 uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
1055
1056 for (uint32_t i = 0; i < rect_count; i++) {
1057 const VkViewport viewport = {
1058 .x = rects[i].rect.offset.x,
1059 .y = rects[i].rect.offset.y,
1060 .width = rects[i].rect.extent.width,
1061 .height = rects[i].rect.extent.height,
1062 .minDepth = 0.0f,
1063 .maxDepth = 1.0f
1064 };
1065 v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1066 v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1067
1068 if (is_layered) {
1069 for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1070 layer_offset++) {
1071 uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1072 v3dv_CmdPushConstants(cmd_buffer_handle,
1073 cmd_buffer->device->meta.depth_clear.p_layout,
1074 VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4, &layer);
1075 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1076 }
1077 } else {
1078 assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1079 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1080 }
1081 }
1082
1083 /* Subpass pipelines can't be cached because they include a reference to the
1084 * render pass currently bound by the application, which means that we need
1085 * to destroy them manually here.
1086 */
1087 assert(!pipeline->cached);
1088 v3dv_cmd_buffer_add_private_obj(
1089 cmd_buffer, (uintptr_t)pipeline,
1090 (v3dv_cmd_buffer_private_obj_destroy_cb) destroy_color_clear_pipeline);
1091
1092 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
1093 }
1094
1095 /* Emits a scissored quad, clearing the depth aspect by writing to gl_FragDepth
1096 * and the stencil aspect by using stencil testing.
1097 */
1098 static void
emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,VkImageAspectFlags aspects,const VkClearDepthStencilValue * clear_ds,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)1099 emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1100 struct v3dv_render_pass *pass,
1101 struct v3dv_subpass *subpass,
1102 VkImageAspectFlags aspects,
1103 const VkClearDepthStencilValue *clear_ds,
1104 bool is_layered,
1105 bool all_rects_same_layers,
1106 uint32_t rect_count,
1107 const VkClearRect *rects)
1108 {
1109 /* Skip if attachment is unused in the current subpass */
1110 const uint32_t attachment_idx = subpass->ds_attachment.attachment;
1111 if (attachment_idx == VK_ATTACHMENT_UNUSED)
1112 return;
1113
1114 /* Obtain a pipeline for this clear */
1115 assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
1116 struct v3dv_meta_depth_clear_pipeline *pipeline = NULL;
1117 VkResult result = get_depth_clear_pipeline(cmd_buffer->device,
1118 aspects,
1119 pass,
1120 cmd_buffer->state.subpass_idx,
1121 attachment_idx,
1122 is_layered,
1123 &pipeline);
1124 if (result != VK_SUCCESS) {
1125 if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1126 v3dv_flag_oom(cmd_buffer, NULL);
1127 return;
1128 }
1129 assert(pipeline && pipeline->pipeline);
1130
1131 /* Emit clear rects */
1132 v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1133
1134 VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1135 v3dv_CmdPushConstants(cmd_buffer_handle,
1136 cmd_buffer->device->meta.depth_clear.p_layout,
1137 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
1138 &clear_ds->depth);
1139
1140 v3dv_CmdBindPipeline(cmd_buffer_handle,
1141 VK_PIPELINE_BIND_POINT_GRAPHICS,
1142 pipeline->pipeline);
1143
1144 uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
1145 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1146 v3dv_CmdSetStencilReference(cmd_buffer_handle,
1147 VK_STENCIL_FACE_FRONT_AND_BACK,
1148 clear_ds->stencil);
1149 v3dv_CmdSetStencilWriteMask(cmd_buffer_handle,
1150 VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1151 v3dv_CmdSetStencilCompareMask(cmd_buffer_handle,
1152 VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1153 dynamic_states |= VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK |
1154 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK |
1155 VK_DYNAMIC_STATE_STENCIL_REFERENCE;
1156 }
1157
1158 for (uint32_t i = 0; i < rect_count; i++) {
1159 const VkViewport viewport = {
1160 .x = rects[i].rect.offset.x,
1161 .y = rects[i].rect.offset.y,
1162 .width = rects[i].rect.extent.width,
1163 .height = rects[i].rect.extent.height,
1164 .minDepth = 0.0f,
1165 .maxDepth = 1.0f
1166 };
1167 v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1168 v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1169 if (is_layered) {
1170 for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1171 layer_offset++) {
1172 uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1173 v3dv_CmdPushConstants(cmd_buffer_handle,
1174 cmd_buffer->device->meta.depth_clear.p_layout,
1175 VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4, &layer);
1176 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1177 }
1178 } else {
1179 assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1180 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1181 }
1182 }
1183
1184 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
1185 }
1186
1187 static void
gather_layering_info(uint32_t rect_count,const VkClearRect * rects,bool * is_layered,bool * all_rects_same_layers)1188 gather_layering_info(uint32_t rect_count, const VkClearRect *rects,
1189 bool *is_layered, bool *all_rects_same_layers)
1190 {
1191 *all_rects_same_layers = true;
1192
1193 uint32_t min_layer = rects[0].baseArrayLayer;
1194 uint32_t max_layer = rects[0].baseArrayLayer + rects[0].layerCount - 1;
1195 for (uint32_t i = 1; i < rect_count; i++) {
1196 if (rects[i].baseArrayLayer != rects[i - 1].baseArrayLayer ||
1197 rects[i].layerCount != rects[i - 1].layerCount) {
1198 *all_rects_same_layers = false;
1199 min_layer = MIN2(min_layer, rects[i].baseArrayLayer);
1200 max_layer = MAX2(max_layer, rects[i].baseArrayLayer +
1201 rects[i].layerCount - 1);
1202 }
1203 }
1204
1205 *is_layered = !(min_layer == 0 && max_layer == 0);
1206 }
1207
1208 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1209 v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
1210 uint32_t attachmentCount,
1211 const VkClearAttachment *pAttachments,
1212 uint32_t rectCount,
1213 const VkClearRect *pRects)
1214 {
1215 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1216
1217 /* We can only clear attachments in the current subpass */
1218 assert(attachmentCount <= 5); /* 4 color + D/S */
1219
1220 struct v3dv_render_pass *pass = cmd_buffer->state.pass;
1221
1222 assert(cmd_buffer->state.subpass_idx < pass->subpass_count);
1223 struct v3dv_subpass *subpass =
1224 &cmd_buffer->state.pass->subpasses[cmd_buffer->state.subpass_idx];
1225
1226 /* Emit a clear rect inside the current job for this subpass. For layered
1227 * framebuffers, we use a geometry shader to redirect clears to the
1228 * appropriate layers.
1229 */
1230 bool is_layered, all_rects_same_layers;
1231 gather_layering_info(rectCount, pRects, &is_layered, &all_rects_same_layers);
1232 for (uint32_t i = 0; i < attachmentCount; i++) {
1233 if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1234 emit_subpass_color_clear_rects(cmd_buffer, pass, subpass,
1235 pAttachments[i].colorAttachment,
1236 &pAttachments[i].clearValue.color,
1237 is_layered, all_rects_same_layers,
1238 rectCount, pRects);
1239 } else {
1240 emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass,
1241 pAttachments[i].aspectMask,
1242 &pAttachments[i].clearValue.depthStencil,
1243 is_layered, all_rects_same_layers,
1244 rectCount, pRects);
1245 }
1246 }
1247 }
1248