1 /*
2 * Copyright © 2020 Raspberry Pi
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26
27 #include "compiler/nir/nir_builder.h"
28 #include "vk_format_info.h"
29 #include "util/u_pack_color.h"
30
31 static void
get_hw_clear_color(struct v3dv_device * device,const VkClearColorValue * color,VkFormat fb_format,VkFormat image_format,uint32_t internal_type,uint32_t internal_bpp,uint32_t * hw_color)32 get_hw_clear_color(struct v3dv_device *device,
33 const VkClearColorValue *color,
34 VkFormat fb_format,
35 VkFormat image_format,
36 uint32_t internal_type,
37 uint32_t internal_bpp,
38 uint32_t *hw_color)
39 {
40 const uint32_t internal_size = 4 << internal_bpp;
41
42 /* If the image format doesn't match the framebuffer format, then we are
43 * trying to clear an unsupported tlb format using a compatible
44 * format for the framebuffer. In this case, we want to make sure that
45 * we pack the clear value according to the original format semantics,
46 * not the compatible format.
47 */
48 if (fb_format == image_format) {
49 v3dv_X(device, get_hw_clear_color)(color, internal_type, internal_size,
50 hw_color);
51 } else {
52 union util_color uc;
53 enum pipe_format pipe_image_format =
54 vk_format_to_pipe_format(image_format);
55 util_pack_color(color->float32, pipe_image_format, &uc);
56 memcpy(hw_color, uc.ui, internal_size);
57 }
58 }
59
60 /* Returns true if the implementation is able to handle the case, false
61 * otherwise.
62 */
63 static bool
clear_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,const VkClearValue * clear_value,const VkImageSubresourceRange * range)64 clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
65 struct v3dv_image *image,
66 const VkClearValue *clear_value,
67 const VkImageSubresourceRange *range)
68 {
69 const VkOffset3D origin = { 0, 0, 0 };
70 VkFormat fb_format;
71 if (!v3dv_meta_can_use_tlb(image, &origin, &fb_format))
72 return false;
73
74 uint32_t internal_type, internal_bpp;
75 v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
76 (fb_format, range->aspectMask,
77 &internal_type, &internal_bpp);
78
79 union v3dv_clear_value hw_clear_value = { 0 };
80 if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
81 get_hw_clear_color(cmd_buffer->device, &clear_value->color, fb_format,
82 image->vk.format, internal_type, internal_bpp,
83 &hw_clear_value.color[0]);
84 } else {
85 assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
86 (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
87 hw_clear_value.z = clear_value->depthStencil.depth;
88 hw_clear_value.s = clear_value->depthStencil.stencil;
89 }
90
91 uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
92 uint32_t min_level = range->baseMipLevel;
93 uint32_t max_level = range->baseMipLevel + level_count;
94
95 /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
96 * Instead, we need to consider the full depth dimension of the image, which
97 * goes from 0 up to the level's depth extent.
98 */
99 uint32_t min_layer;
100 uint32_t max_layer;
101 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
102 min_layer = range->baseArrayLayer;
103 max_layer = range->baseArrayLayer +
104 vk_image_subresource_layer_count(&image->vk, range);
105 } else {
106 min_layer = 0;
107 max_layer = 0;
108 }
109
110 for (uint32_t level = min_level; level < max_level; level++) {
111 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
112 max_layer = u_minify(image->vk.extent.depth, level);
113
114 uint32_t width = u_minify(image->vk.extent.width, level);
115 uint32_t height = u_minify(image->vk.extent.height, level);
116
117 struct v3dv_job *job =
118 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
119
120 if (!job)
121 return true;
122
123 v3dv_job_start_frame(job, width, height, max_layer, false,
124 1, internal_bpp,
125 image->vk.samples > VK_SAMPLE_COUNT_1_BIT);
126
127 struct v3dv_meta_framebuffer framebuffer;
128 v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
129 internal_type,
130 &job->frame_tiling);
131
132 v3dv_X(job->device, job_emit_binning_flush)(job);
133
134 /* If this triggers it is an application bug: the spec requires
135 * that any aspects to clear are present in the image.
136 */
137 assert(range->aspectMask & image->vk.aspects);
138
139 v3dv_X(job->device, meta_emit_clear_image_rcl)
140 (job, image, &framebuffer, &hw_clear_value,
141 range->aspectMask, min_layer, max_layer, level);
142
143 v3dv_cmd_buffer_finish_job(cmd_buffer);
144 }
145
146 return true;
147 }
148
149 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)150 v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
151 VkImage _image,
152 VkImageLayout imageLayout,
153 const VkClearColorValue *pColor,
154 uint32_t rangeCount,
155 const VkImageSubresourceRange *pRanges)
156 {
157 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
158 V3DV_FROM_HANDLE(v3dv_image, image, _image);
159
160 const VkClearValue clear_value = {
161 .color = *pColor,
162 };
163
164 for (uint32_t i = 0; i < rangeCount; i++) {
165 if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
166 continue;
167 unreachable("Unsupported color clear.");
168 }
169 }
170
171 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)172 v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
173 VkImage _image,
174 VkImageLayout imageLayout,
175 const VkClearDepthStencilValue *pDepthStencil,
176 uint32_t rangeCount,
177 const VkImageSubresourceRange *pRanges)
178 {
179 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
180 V3DV_FROM_HANDLE(v3dv_image, image, _image);
181
182 const VkClearValue clear_value = {
183 .depthStencil = *pDepthStencil,
184 };
185
186 for (uint32_t i = 0; i < rangeCount; i++) {
187 if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
188 continue;
189 unreachable("Unsupported depth/stencil clear.");
190 }
191 }
192
193 static void
destroy_color_clear_pipeline(VkDevice _device,uint64_t pipeline,VkAllocationCallbacks * alloc)194 destroy_color_clear_pipeline(VkDevice _device,
195 uint64_t pipeline,
196 VkAllocationCallbacks *alloc)
197 {
198 struct v3dv_meta_color_clear_pipeline *p =
199 (struct v3dv_meta_color_clear_pipeline *) (uintptr_t) pipeline;
200 v3dv_DestroyPipeline(_device, p->pipeline, alloc);
201 if (p->cached)
202 v3dv_DestroyRenderPass(_device, p->pass, alloc);
203 vk_free(alloc, p);
204 }
205
206 static void
destroy_depth_clear_pipeline(VkDevice _device,struct v3dv_meta_depth_clear_pipeline * p,VkAllocationCallbacks * alloc)207 destroy_depth_clear_pipeline(VkDevice _device,
208 struct v3dv_meta_depth_clear_pipeline *p,
209 VkAllocationCallbacks *alloc)
210 {
211 v3dv_DestroyPipeline(_device, p->pipeline, alloc);
212 vk_free(alloc, p);
213 }
214
215 static VkResult
create_color_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)216 create_color_clear_pipeline_layout(struct v3dv_device *device,
217 VkPipelineLayout *pipeline_layout)
218 {
219 /* FIXME: this is abusing a bit the API, since not all of our clear
220 * pipelines have a geometry shader. We could create 2 different pipeline
221 * layouts, but this works for us for now.
222 */
223 VkPushConstantRange ranges[2] = {
224 { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 },
225 { VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4 },
226 };
227
228 VkPipelineLayoutCreateInfo info = {
229 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
230 .setLayoutCount = 0,
231 .pushConstantRangeCount = 2,
232 .pPushConstantRanges = ranges,
233 };
234
235 return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
236 &info, &device->vk.alloc, pipeline_layout);
237 }
238
239 static VkResult
create_depth_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)240 create_depth_clear_pipeline_layout(struct v3dv_device *device,
241 VkPipelineLayout *pipeline_layout)
242 {
243 /* FIXME: this is abusing a bit the API, since not all of our clear
244 * pipelines have a geometry shader. We could create 2 different pipeline
245 * layouts, but this works for us for now.
246 */
247 VkPushConstantRange ranges[2] = {
248 { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 },
249 { VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4 },
250 };
251
252 VkPipelineLayoutCreateInfo info = {
253 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
254 .setLayoutCount = 0,
255 .pushConstantRangeCount = 2,
256 .pPushConstantRanges = ranges
257 };
258
259 return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
260 &info, &device->vk.alloc, pipeline_layout);
261 }
262
263 void
v3dv_meta_clear_init(struct v3dv_device * device)264 v3dv_meta_clear_init(struct v3dv_device *device)
265 {
266 device->meta.color_clear.cache =
267 _mesa_hash_table_create(NULL, u64_hash, u64_compare);
268
269 create_color_clear_pipeline_layout(device,
270 &device->meta.color_clear.p_layout);
271
272 device->meta.depth_clear.cache =
273 _mesa_hash_table_create(NULL, u64_hash, u64_compare);
274
275 create_depth_clear_pipeline_layout(device,
276 &device->meta.depth_clear.p_layout);
277 }
278
279 void
v3dv_meta_clear_finish(struct v3dv_device * device)280 v3dv_meta_clear_finish(struct v3dv_device *device)
281 {
282 VkDevice _device = v3dv_device_to_handle(device);
283
284 hash_table_foreach(device->meta.color_clear.cache, entry) {
285 struct v3dv_meta_color_clear_pipeline *item = entry->data;
286 destroy_color_clear_pipeline(_device, (uintptr_t)item, &device->vk.alloc);
287 }
288 _mesa_hash_table_destroy(device->meta.color_clear.cache, NULL);
289
290 if (device->meta.color_clear.p_layout) {
291 v3dv_DestroyPipelineLayout(_device, device->meta.color_clear.p_layout,
292 &device->vk.alloc);
293 }
294
295 hash_table_foreach(device->meta.depth_clear.cache, entry) {
296 struct v3dv_meta_depth_clear_pipeline *item = entry->data;
297 destroy_depth_clear_pipeline(_device, item, &device->vk.alloc);
298 }
299 _mesa_hash_table_destroy(device->meta.depth_clear.cache, NULL);
300
301 if (device->meta.depth_clear.p_layout) {
302 v3dv_DestroyPipelineLayout(_device, device->meta.depth_clear.p_layout,
303 &device->vk.alloc);
304 }
305 }
306
307 static nir_ssa_def *
gen_rect_vertices(nir_builder * b)308 gen_rect_vertices(nir_builder *b)
309 {
310 nir_ssa_def *vertex_id = nir_load_vertex_id(b);
311
312 /* vertex 0: -1.0, -1.0
313 * vertex 1: -1.0, 1.0
314 * vertex 2: 1.0, -1.0
315 * vertex 3: 1.0, 1.0
316 *
317 * so:
318 *
319 * channel 0 is vertex_id < 2 ? -1.0 : 1.0
320 * channel 1 is vertex id & 1 ? 1.0 : -1.0
321 */
322
323 nir_ssa_def *one = nir_imm_int(b, 1);
324 nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));
325 nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
326
327 nir_ssa_def *comp[4];
328 comp[0] = nir_bcsel(b, c0cmp,
329 nir_imm_float(b, -1.0f),
330 nir_imm_float(b, 1.0f));
331
332 comp[1] = nir_bcsel(b, c1cmp,
333 nir_imm_float(b, 1.0f),
334 nir_imm_float(b, -1.0f));
335 comp[2] = nir_imm_float(b, 0.0f);
336 comp[3] = nir_imm_float(b, 1.0f);
337 return nir_vec(b, comp, 4);
338 }
339
340 static nir_shader *
get_clear_rect_vs()341 get_clear_rect_vs()
342 {
343 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
344 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
345 "meta clear vs");
346
347 const struct glsl_type *vec4 = glsl_vec4_type();
348 nir_variable *vs_out_pos =
349 nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
350 vs_out_pos->data.location = VARYING_SLOT_POS;
351
352 nir_ssa_def *pos = gen_rect_vertices(&b);
353 nir_store_var(&b, vs_out_pos, pos, 0xf);
354
355 return b.shader;
356 }
357
358 static nir_shader *
get_clear_rect_gs(uint32_t push_constant_layer_base)359 get_clear_rect_gs(uint32_t push_constant_layer_base)
360 {
361 /* FIXME: this creates a geometry shader that takes the index of a single
362 * layer to clear from push constants, so we need to emit a draw call for
363 * each layer that we want to clear. We could actually do better and have it
364 * take a range of layers and then emit one triangle per layer to clear,
365 * however, if we were to do this we would need to be careful not to exceed
366 * the maximum number of output vertices allowed in a geometry shader.
367 */
368 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
369 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
370 "meta clear gs");
371 nir_shader *nir = b.shader;
372 nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
373 nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
374 (1ull << VARYING_SLOT_LAYER);
375 nir->info.gs.input_primitive = GL_TRIANGLES;
376 nir->info.gs.output_primitive = GL_TRIANGLE_STRIP;
377 nir->info.gs.vertices_in = 3;
378 nir->info.gs.vertices_out = 3;
379 nir->info.gs.invocations = 1;
380 nir->info.gs.active_stream_mask = 0x1;
381
382 /* in vec4 gl_Position[3] */
383 nir_variable *gs_in_pos =
384 nir_variable_create(b.shader, nir_var_shader_in,
385 glsl_array_type(glsl_vec4_type(), 3, 0),
386 "in_gl_Position");
387 gs_in_pos->data.location = VARYING_SLOT_POS;
388
389 /* out vec4 gl_Position */
390 nir_variable *gs_out_pos =
391 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
392 "out_gl_Position");
393 gs_out_pos->data.location = VARYING_SLOT_POS;
394
395 /* out float gl_Layer */
396 nir_variable *gs_out_layer =
397 nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
398 "out_gl_Layer");
399 gs_out_layer->data.location = VARYING_SLOT_LAYER;
400
401 /* Emit output triangle */
402 for (uint32_t i = 0; i < 3; i++) {
403 /* gl_Position from shader input */
404 nir_deref_instr *in_pos_i =
405 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
406 nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
407
408 /* gl_Layer from push constants */
409 nir_ssa_def *layer =
410 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
411 .base = push_constant_layer_base, .range = 4);
412 nir_store_var(&b, gs_out_layer, layer, 0x1);
413
414 nir_emit_vertex(&b, 0);
415 }
416
417 nir_end_primitive(&b, 0);
418
419 return nir;
420 }
421
422 static nir_shader *
get_color_clear_rect_fs(uint32_t rt_idx,VkFormat format)423 get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format)
424 {
425 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
426 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
427 "meta clear fs");
428
429 enum pipe_format pformat = vk_format_to_pipe_format(format);
430 const struct glsl_type *fs_out_type =
431 util_format_is_float(pformat) ? glsl_vec4_type() : glsl_uvec4_type();
432
433 nir_variable *fs_out_color =
434 nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
435 fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;
436
437 nir_ssa_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
438 nir_store_var(&b, fs_out_color, color_load, 0xf);
439
440 return b.shader;
441 }
442
443 static nir_shader *
get_depth_clear_rect_fs()444 get_depth_clear_rect_fs()
445 {
446 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
447 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
448 "meta depth clear fs");
449
450 nir_variable *fs_out_depth =
451 nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
452 "out_depth");
453 fs_out_depth->data.location = FRAG_RESULT_DEPTH;
454
455 nir_ssa_def *depth_load =
456 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
457
458 nir_store_var(&b, fs_out_depth, depth_load, 0x1);
459
460 return b.shader;
461 }
462
463 static VkResult
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * gs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineLayout layout,VkPipeline * pipeline)464 create_pipeline(struct v3dv_device *device,
465 struct v3dv_render_pass *pass,
466 uint32_t subpass_idx,
467 uint32_t samples,
468 struct nir_shader *vs_nir,
469 struct nir_shader *gs_nir,
470 struct nir_shader *fs_nir,
471 const VkPipelineVertexInputStateCreateInfo *vi_state,
472 const VkPipelineDepthStencilStateCreateInfo *ds_state,
473 const VkPipelineColorBlendStateCreateInfo *cb_state,
474 const VkPipelineLayout layout,
475 VkPipeline *pipeline)
476 {
477 VkPipelineShaderStageCreateInfo stages[3] = { 0 };
478 struct vk_shader_module vs_m;
479 struct vk_shader_module gs_m;
480 struct vk_shader_module fs_m;
481
482 uint32_t stage_count = 0;
483 v3dv_shader_module_internal_init(device, &vs_m, vs_nir);
484 stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
485 stages[stage_count].stage = VK_SHADER_STAGE_VERTEX_BIT;
486 stages[stage_count].module = vk_shader_module_to_handle(&vs_m);
487 stages[stage_count].pName = "main";
488 stage_count++;
489
490 if (gs_nir) {
491 v3dv_shader_module_internal_init(device, &gs_m, gs_nir);
492 stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
493 stages[stage_count].stage = VK_SHADER_STAGE_GEOMETRY_BIT;
494 stages[stage_count].module = vk_shader_module_to_handle(&gs_m);
495 stages[stage_count].pName = "main";
496 stage_count++;
497 }
498
499 if (fs_nir) {
500 v3dv_shader_module_internal_init(device, &fs_m, fs_nir);
501 stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
502 stages[stage_count].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
503 stages[stage_count].module = vk_shader_module_to_handle(&fs_m);
504 stages[stage_count].pName = "main";
505 stage_count++;
506 }
507
508 VkGraphicsPipelineCreateInfo info = {
509 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
510
511 .stageCount = stage_count,
512 .pStages = stages,
513
514 .pVertexInputState = vi_state,
515
516 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
517 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
518 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
519 .primitiveRestartEnable = false,
520 },
521
522 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
523 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
524 .viewportCount = 1,
525 .scissorCount = 1,
526 },
527
528 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
529 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
530 .rasterizerDiscardEnable = false,
531 .polygonMode = VK_POLYGON_MODE_FILL,
532 .cullMode = VK_CULL_MODE_NONE,
533 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
534 .depthBiasEnable = false,
535 },
536
537 .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
538 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
539 .rasterizationSamples = samples,
540 .sampleShadingEnable = false,
541 .pSampleMask = NULL,
542 .alphaToCoverageEnable = false,
543 .alphaToOneEnable = false,
544 },
545
546 .pDepthStencilState = ds_state,
547
548 .pColorBlendState = cb_state,
549
550 /* The meta clear pipeline declares all state as dynamic.
551 * As a consequence, vkCmdBindPipeline writes no dynamic state
552 * to the cmd buffer. Therefore, at the end of the meta clear,
553 * we need only restore dynamic state that was vkCmdSet.
554 */
555 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
556 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
557 .dynamicStateCount = 6,
558 .pDynamicStates = (VkDynamicState[]) {
559 VK_DYNAMIC_STATE_VIEWPORT,
560 VK_DYNAMIC_STATE_SCISSOR,
561 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
562 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
563 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
564 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
565 VK_DYNAMIC_STATE_DEPTH_BIAS,
566 VK_DYNAMIC_STATE_LINE_WIDTH,
567 },
568 },
569
570 .flags = 0,
571 .layout = layout,
572 .renderPass = v3dv_render_pass_to_handle(pass),
573 .subpass = subpass_idx,
574 };
575
576 VkResult result =
577 v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
578 VK_NULL_HANDLE,
579 1, &info,
580 &device->vk.alloc,
581 pipeline);
582
583 ralloc_free(vs_nir);
584 ralloc_free(fs_nir);
585
586 return result;
587 }
588
589 static VkResult
create_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,VkFormat format,uint32_t samples,uint32_t components,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)590 create_color_clear_pipeline(struct v3dv_device *device,
591 struct v3dv_render_pass *pass,
592 uint32_t subpass_idx,
593 uint32_t rt_idx,
594 VkFormat format,
595 uint32_t samples,
596 uint32_t components,
597 bool is_layered,
598 VkPipelineLayout pipeline_layout,
599 VkPipeline *pipeline)
600 {
601 nir_shader *vs_nir = get_clear_rect_vs();
602 nir_shader *fs_nir = get_color_clear_rect_fs(rt_idx, format);
603 nir_shader *gs_nir = is_layered ? get_clear_rect_gs(16) : NULL;
604
605 const VkPipelineVertexInputStateCreateInfo vi_state = {
606 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
607 .vertexBindingDescriptionCount = 0,
608 .vertexAttributeDescriptionCount = 0,
609 };
610
611 const VkPipelineDepthStencilStateCreateInfo ds_state = {
612 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
613 .depthTestEnable = false,
614 .depthWriteEnable = false,
615 .depthBoundsTestEnable = false,
616 .stencilTestEnable = false,
617 };
618
619 assert(subpass_idx < pass->subpass_count);
620 const uint32_t color_count = pass->subpasses[subpass_idx].color_count;
621 assert(rt_idx < color_count);
622
623 VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS];
624 for (uint32_t i = 0; i < color_count; i++) {
625 blend_att_state[i] = (VkPipelineColorBlendAttachmentState) {
626 .blendEnable = false,
627 .colorWriteMask = i == rt_idx ? components : 0,
628 };
629 }
630
631 const VkPipelineColorBlendStateCreateInfo cb_state = {
632 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
633 .logicOpEnable = false,
634 .attachmentCount = color_count,
635 .pAttachments = blend_att_state
636 };
637
638 return create_pipeline(device,
639 pass, subpass_idx,
640 samples,
641 vs_nir, gs_nir, fs_nir,
642 &vi_state,
643 &ds_state,
644 &cb_state,
645 pipeline_layout,
646 pipeline);
647 }
648
649 static VkResult
create_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)650 create_depth_clear_pipeline(struct v3dv_device *device,
651 VkImageAspectFlags aspects,
652 struct v3dv_render_pass *pass,
653 uint32_t subpass_idx,
654 uint32_t samples,
655 bool is_layered,
656 VkPipelineLayout pipeline_layout,
657 VkPipeline *pipeline)
658 {
659 const bool has_depth = aspects & VK_IMAGE_ASPECT_DEPTH_BIT;
660 const bool has_stencil = aspects & VK_IMAGE_ASPECT_STENCIL_BIT;
661 assert(has_depth || has_stencil);
662
663 nir_shader *vs_nir = get_clear_rect_vs();
664 nir_shader *fs_nir = has_depth ? get_depth_clear_rect_fs() : NULL;
665 nir_shader *gs_nir = is_layered ? get_clear_rect_gs(4) : NULL;
666
667 const VkPipelineVertexInputStateCreateInfo vi_state = {
668 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
669 .vertexBindingDescriptionCount = 0,
670 .vertexAttributeDescriptionCount = 0,
671 };
672
673 const VkPipelineDepthStencilStateCreateInfo ds_state = {
674 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
675 .depthTestEnable = has_depth,
676 .depthWriteEnable = has_depth,
677 .depthCompareOp = VK_COMPARE_OP_ALWAYS,
678 .depthBoundsTestEnable = false,
679 .stencilTestEnable = has_stencil,
680 .front = {
681 .passOp = VK_STENCIL_OP_REPLACE,
682 .compareOp = VK_COMPARE_OP_ALWAYS,
683 /* compareMask, writeMask and reference are dynamic state */
684 },
685 .back = { 0 },
686 };
687
688 assert(subpass_idx < pass->subpass_count);
689 VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS] = { 0 };
690 const VkPipelineColorBlendStateCreateInfo cb_state = {
691 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
692 .logicOpEnable = false,
693 .attachmentCount = pass->subpasses[subpass_idx].color_count,
694 .pAttachments = blend_att_state,
695 };
696
697 return create_pipeline(device,
698 pass, subpass_idx,
699 samples,
700 vs_nir, gs_nir, fs_nir,
701 &vi_state,
702 &ds_state,
703 &cb_state,
704 pipeline_layout,
705 pipeline);
706 }
707
708 static VkResult
create_color_clear_render_pass(struct v3dv_device * device,uint32_t rt_idx,VkFormat format,uint32_t samples,VkRenderPass * pass)709 create_color_clear_render_pass(struct v3dv_device *device,
710 uint32_t rt_idx,
711 VkFormat format,
712 uint32_t samples,
713 VkRenderPass *pass)
714 {
715 VkAttachmentDescription att = {
716 .format = format,
717 .samples = samples,
718 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
719 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
720 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
721 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
722 };
723
724 VkAttachmentReference att_ref = {
725 .attachment = rt_idx,
726 .layout = VK_IMAGE_LAYOUT_GENERAL,
727 };
728
729 VkSubpassDescription subpass = {
730 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
731 .inputAttachmentCount = 0,
732 .colorAttachmentCount = 1,
733 .pColorAttachments = &att_ref,
734 .pResolveAttachments = NULL,
735 .pDepthStencilAttachment = NULL,
736 .preserveAttachmentCount = 0,
737 .pPreserveAttachments = NULL,
738 };
739
740 VkRenderPassCreateInfo info = {
741 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
742 .attachmentCount = 1,
743 .pAttachments = &att,
744 .subpassCount = 1,
745 .pSubpasses = &subpass,
746 .dependencyCount = 0,
747 .pDependencies = NULL,
748 };
749
750 return v3dv_CreateRenderPass(v3dv_device_to_handle(device),
751 &info, &device->vk.alloc, pass);
752 }
753
754 static inline uint64_t
get_color_clear_pipeline_cache_key(uint32_t rt_idx,VkFormat format,uint32_t samples,uint32_t components,bool is_layered)755 get_color_clear_pipeline_cache_key(uint32_t rt_idx,
756 VkFormat format,
757 uint32_t samples,
758 uint32_t components,
759 bool is_layered)
760 {
761 assert(rt_idx < V3D_MAX_DRAW_BUFFERS);
762
763 uint64_t key = 0;
764 uint32_t bit_offset = 0;
765
766 key |= rt_idx;
767 bit_offset += 2;
768
769 key |= ((uint64_t) format) << bit_offset;
770 bit_offset += 32;
771
772 key |= ((uint64_t) samples) << bit_offset;
773 bit_offset += 4;
774
775 key |= ((uint64_t) components) << bit_offset;
776 bit_offset += 4;
777
778 key |= (is_layered ? 1ull : 0ull) << bit_offset;
779 bit_offset += 1;
780
781 assert(bit_offset <= 64);
782 return key;
783 }
784
785 static inline uint64_t
get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,VkFormat format,uint32_t samples,bool is_layered)786 get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,
787 VkFormat format,
788 uint32_t samples,
789 bool is_layered)
790 {
791 uint64_t key = 0;
792 uint32_t bit_offset = 0;
793
794 key |= format;
795 bit_offset += 32;
796
797 key |= ((uint64_t) samples) << bit_offset;
798 bit_offset += 4;
799
800 const bool has_depth = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? 1 : 0;
801 key |= ((uint64_t) has_depth) << bit_offset;
802 bit_offset++;
803
804 const bool has_stencil = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
805 key |= ((uint64_t) has_stencil) << bit_offset;
806 bit_offset++;;
807
808 key |= (is_layered ? 1ull : 0ull) << bit_offset;
809 bit_offset += 1;
810
811 assert(bit_offset <= 64);
812 return key;
813 }
814
815 static VkResult
get_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,uint32_t attachment_idx,VkFormat format,uint32_t samples,uint32_t components,bool is_layered,struct v3dv_meta_color_clear_pipeline ** pipeline)816 get_color_clear_pipeline(struct v3dv_device *device,
817 struct v3dv_render_pass *pass,
818 uint32_t subpass_idx,
819 uint32_t rt_idx,
820 uint32_t attachment_idx,
821 VkFormat format,
822 uint32_t samples,
823 uint32_t components,
824 bool is_layered,
825 struct v3dv_meta_color_clear_pipeline **pipeline)
826 {
827 assert(vk_format_is_color(format));
828
829 VkResult result = VK_SUCCESS;
830
831 /* If pass != NULL it means that we are emitting the clear as a draw call
832 * in the current pass bound by the application. In that case, we can't
833 * cache the pipeline, since it will be referencing that pass and the
834 * application could be destroying it at any point. Hopefully, the perf
835 * impact is not too big since we still have the device pipeline cache
836 * around and we won't end up re-compiling the clear shader.
837 *
838 * FIXME: alternatively, we could refcount (or maybe clone) the render pass
839 * provided by the application and include it in the pipeline key setup
840 * to make caching safe in this scenario, however, based on tests with
841 * vkQuake3, the fact that we are not caching here doesn't seem to have
842 * any significant impact in performance, so it might not be worth it.
843 */
844 const bool can_cache_pipeline = (pass == NULL);
845
846 uint64_t key;
847 if (can_cache_pipeline) {
848 key = get_color_clear_pipeline_cache_key(rt_idx, format, samples,
849 components, is_layered);
850 mtx_lock(&device->meta.mtx);
851 struct hash_entry *entry =
852 _mesa_hash_table_search(device->meta.color_clear.cache, &key);
853 if (entry) {
854 mtx_unlock(&device->meta.mtx);
855 *pipeline = entry->data;
856 return VK_SUCCESS;
857 }
858 }
859
860 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
861 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
862
863 if (*pipeline == NULL) {
864 result = VK_ERROR_OUT_OF_HOST_MEMORY;
865 goto fail;
866 }
867
868 if (!pass) {
869 result = create_color_clear_render_pass(device,
870 rt_idx,
871 format,
872 samples,
873 &(*pipeline)->pass);
874 if (result != VK_SUCCESS)
875 goto fail;
876
877 pass = v3dv_render_pass_from_handle((*pipeline)->pass);
878 } else {
879 (*pipeline)->pass = v3dv_render_pass_to_handle(pass);
880 }
881
882 result = create_color_clear_pipeline(device,
883 pass,
884 subpass_idx,
885 rt_idx,
886 format,
887 samples,
888 components,
889 is_layered,
890 device->meta.color_clear.p_layout,
891 &(*pipeline)->pipeline);
892 if (result != VK_SUCCESS)
893 goto fail;
894
895 if (can_cache_pipeline) {
896 (*pipeline)->key = key;
897 (*pipeline)->cached = true;
898 _mesa_hash_table_insert(device->meta.color_clear.cache,
899 &(*pipeline)->key, *pipeline);
900
901 mtx_unlock(&device->meta.mtx);
902 }
903
904 return VK_SUCCESS;
905
906 fail:
907 if (can_cache_pipeline)
908 mtx_unlock(&device->meta.mtx);
909
910 VkDevice _device = v3dv_device_to_handle(device);
911 if (*pipeline) {
912 if ((*pipeline)->cached)
913 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
914 if ((*pipeline)->pipeline)
915 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
916 vk_free(&device->vk.alloc, *pipeline);
917 *pipeline = NULL;
918 }
919
920 return result;
921 }
922
923 static VkResult
get_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t attachment_idx,bool is_layered,struct v3dv_meta_depth_clear_pipeline ** pipeline)924 get_depth_clear_pipeline(struct v3dv_device *device,
925 VkImageAspectFlags aspects,
926 struct v3dv_render_pass *pass,
927 uint32_t subpass_idx,
928 uint32_t attachment_idx,
929 bool is_layered,
930 struct v3dv_meta_depth_clear_pipeline **pipeline)
931 {
932 assert(subpass_idx < pass->subpass_count);
933 assert(attachment_idx != VK_ATTACHMENT_UNUSED);
934 assert(attachment_idx < pass->attachment_count);
935
936 VkResult result = VK_SUCCESS;
937
938 const uint32_t samples = pass->attachments[attachment_idx].desc.samples;
939 const VkFormat format = pass->attachments[attachment_idx].desc.format;
940 assert(vk_format_is_depth_or_stencil(format));
941
942 const uint64_t key =
943 get_depth_clear_pipeline_cache_key(aspects, format, samples, is_layered);
944 mtx_lock(&device->meta.mtx);
945 struct hash_entry *entry =
946 _mesa_hash_table_search(device->meta.depth_clear.cache, &key);
947 if (entry) {
948 mtx_unlock(&device->meta.mtx);
949 *pipeline = entry->data;
950 return VK_SUCCESS;
951 }
952
953 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
954 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
955
956 if (*pipeline == NULL) {
957 result = VK_ERROR_OUT_OF_HOST_MEMORY;
958 goto fail;
959 }
960
961 result = create_depth_clear_pipeline(device,
962 aspects,
963 pass,
964 subpass_idx,
965 samples,
966 is_layered,
967 device->meta.depth_clear.p_layout,
968 &(*pipeline)->pipeline);
969 if (result != VK_SUCCESS)
970 goto fail;
971
972 (*pipeline)->key = key;
973 _mesa_hash_table_insert(device->meta.depth_clear.cache,
974 &(*pipeline)->key, *pipeline);
975
976 mtx_unlock(&device->meta.mtx);
977 return VK_SUCCESS;
978
979 fail:
980 mtx_unlock(&device->meta.mtx);
981
982 VkDevice _device = v3dv_device_to_handle(device);
983 if (*pipeline) {
984 if ((*pipeline)->pipeline)
985 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
986 vk_free(&device->vk.alloc, *pipeline);
987 *pipeline = NULL;
988 }
989
990 return result;
991 }
992
993 /* Emits a scissored quad in the clear color */
994 static void
emit_subpass_color_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,uint32_t rt_idx,const VkClearColorValue * clear_color,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)995 emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
996 struct v3dv_render_pass *pass,
997 struct v3dv_subpass *subpass,
998 uint32_t rt_idx,
999 const VkClearColorValue *clear_color,
1000 bool is_layered,
1001 bool all_rects_same_layers,
1002 uint32_t rect_count,
1003 const VkClearRect *rects)
1004 {
1005 /* Skip if attachment is unused in the current subpass */
1006 assert(rt_idx < subpass->color_count);
1007 const uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;
1008 if (attachment_idx == VK_ATTACHMENT_UNUSED)
1009 return;
1010
1011 /* Obtain a pipeline for this clear */
1012 assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
1013 const VkFormat format =
1014 cmd_buffer->state.pass->attachments[attachment_idx].desc.format;
1015 const VkFormat samples =
1016 cmd_buffer->state.pass->attachments[attachment_idx].desc.samples;
1017 const uint32_t components = VK_COLOR_COMPONENT_R_BIT |
1018 VK_COLOR_COMPONENT_G_BIT |
1019 VK_COLOR_COMPONENT_B_BIT |
1020 VK_COLOR_COMPONENT_A_BIT;
1021 struct v3dv_meta_color_clear_pipeline *pipeline = NULL;
1022 VkResult result = get_color_clear_pipeline(cmd_buffer->device,
1023 pass,
1024 cmd_buffer->state.subpass_idx,
1025 rt_idx,
1026 attachment_idx,
1027 format,
1028 samples,
1029 components,
1030 is_layered,
1031 &pipeline);
1032 if (result != VK_SUCCESS) {
1033 if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1034 v3dv_flag_oom(cmd_buffer, NULL);
1035 return;
1036 }
1037 assert(pipeline && pipeline->pipeline);
1038
1039 /* Emit clear rects */
1040 v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1041
1042 VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1043 v3dv_CmdPushConstants(cmd_buffer_handle,
1044 cmd_buffer->device->meta.depth_clear.p_layout,
1045 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
1046 clear_color->float32);
1047
1048 v3dv_CmdBindPipeline(cmd_buffer_handle,
1049 VK_PIPELINE_BIND_POINT_GRAPHICS,
1050 pipeline->pipeline);
1051
1052 uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
1053
1054 for (uint32_t i = 0; i < rect_count; i++) {
1055 const VkViewport viewport = {
1056 .x = rects[i].rect.offset.x,
1057 .y = rects[i].rect.offset.y,
1058 .width = rects[i].rect.extent.width,
1059 .height = rects[i].rect.extent.height,
1060 .minDepth = 0.0f,
1061 .maxDepth = 1.0f
1062 };
1063 v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1064 v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1065
1066 if (is_layered) {
1067 for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1068 layer_offset++) {
1069 uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1070 v3dv_CmdPushConstants(cmd_buffer_handle,
1071 cmd_buffer->device->meta.depth_clear.p_layout,
1072 VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4, &layer);
1073 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1074 }
1075 } else {
1076 assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1077 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1078 }
1079 }
1080
1081 /* Subpass pipelines can't be cached because they include a reference to the
1082 * render pass currently bound by the application, which means that we need
1083 * to destroy them manually here.
1084 */
1085 assert(!pipeline->cached);
1086 v3dv_cmd_buffer_add_private_obj(
1087 cmd_buffer, (uintptr_t)pipeline,
1088 (v3dv_cmd_buffer_private_obj_destroy_cb) destroy_color_clear_pipeline);
1089
1090 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
1091 }
1092
1093 /* Emits a scissored quad, clearing the depth aspect by writing to gl_FragDepth
1094 * and the stencil aspect by using stencil testing.
1095 */
1096 static void
emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,VkImageAspectFlags aspects,const VkClearDepthStencilValue * clear_ds,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)1097 emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1098 struct v3dv_render_pass *pass,
1099 struct v3dv_subpass *subpass,
1100 VkImageAspectFlags aspects,
1101 const VkClearDepthStencilValue *clear_ds,
1102 bool is_layered,
1103 bool all_rects_same_layers,
1104 uint32_t rect_count,
1105 const VkClearRect *rects)
1106 {
1107 /* Skip if attachment is unused in the current subpass */
1108 const uint32_t attachment_idx = subpass->ds_attachment.attachment;
1109 if (attachment_idx == VK_ATTACHMENT_UNUSED)
1110 return;
1111
1112 /* Obtain a pipeline for this clear */
1113 assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
1114 struct v3dv_meta_depth_clear_pipeline *pipeline = NULL;
1115 VkResult result = get_depth_clear_pipeline(cmd_buffer->device,
1116 aspects,
1117 pass,
1118 cmd_buffer->state.subpass_idx,
1119 attachment_idx,
1120 is_layered,
1121 &pipeline);
1122 if (result != VK_SUCCESS) {
1123 if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1124 v3dv_flag_oom(cmd_buffer, NULL);
1125 return;
1126 }
1127 assert(pipeline && pipeline->pipeline);
1128
1129 /* Emit clear rects */
1130 v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1131
1132 VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1133 v3dv_CmdPushConstants(cmd_buffer_handle,
1134 cmd_buffer->device->meta.depth_clear.p_layout,
1135 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
1136 &clear_ds->depth);
1137
1138 v3dv_CmdBindPipeline(cmd_buffer_handle,
1139 VK_PIPELINE_BIND_POINT_GRAPHICS,
1140 pipeline->pipeline);
1141
1142 uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
1143 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1144 v3dv_CmdSetStencilReference(cmd_buffer_handle,
1145 VK_STENCIL_FACE_FRONT_AND_BACK,
1146 clear_ds->stencil);
1147 v3dv_CmdSetStencilWriteMask(cmd_buffer_handle,
1148 VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1149 v3dv_CmdSetStencilCompareMask(cmd_buffer_handle,
1150 VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1151 dynamic_states |= VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK |
1152 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK |
1153 VK_DYNAMIC_STATE_STENCIL_REFERENCE;
1154 }
1155
1156 for (uint32_t i = 0; i < rect_count; i++) {
1157 const VkViewport viewport = {
1158 .x = rects[i].rect.offset.x,
1159 .y = rects[i].rect.offset.y,
1160 .width = rects[i].rect.extent.width,
1161 .height = rects[i].rect.extent.height,
1162 .minDepth = 0.0f,
1163 .maxDepth = 1.0f
1164 };
1165 v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1166 v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1167 if (is_layered) {
1168 for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1169 layer_offset++) {
1170 uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1171 v3dv_CmdPushConstants(cmd_buffer_handle,
1172 cmd_buffer->device->meta.depth_clear.p_layout,
1173 VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4, &layer);
1174 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1175 }
1176 } else {
1177 assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1178 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1179 }
1180 }
1181
1182 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
1183 }
1184
1185 static void
gather_layering_info(uint32_t rect_count,const VkClearRect * rects,bool * is_layered,bool * all_rects_same_layers)1186 gather_layering_info(uint32_t rect_count, const VkClearRect *rects,
1187 bool *is_layered, bool *all_rects_same_layers)
1188 {
1189 *all_rects_same_layers = true;
1190
1191 uint32_t min_layer = rects[0].baseArrayLayer;
1192 uint32_t max_layer = rects[0].baseArrayLayer + rects[0].layerCount - 1;
1193 for (uint32_t i = 1; i < rect_count; i++) {
1194 if (rects[i].baseArrayLayer != rects[i - 1].baseArrayLayer ||
1195 rects[i].layerCount != rects[i - 1].layerCount) {
1196 *all_rects_same_layers = false;
1197 min_layer = MIN2(min_layer, rects[i].baseArrayLayer);
1198 max_layer = MAX2(max_layer, rects[i].baseArrayLayer +
1199 rects[i].layerCount - 1);
1200 }
1201 }
1202
1203 *is_layered = !(min_layer == 0 && max_layer == 0);
1204 }
1205
1206 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1207 v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
1208 uint32_t attachmentCount,
1209 const VkClearAttachment *pAttachments,
1210 uint32_t rectCount,
1211 const VkClearRect *pRects)
1212 {
1213 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1214
1215 /* We can only clear attachments in the current subpass */
1216 assert(attachmentCount <= 5); /* 4 color + D/S */
1217
1218 struct v3dv_render_pass *pass = cmd_buffer->state.pass;
1219
1220 assert(cmd_buffer->state.subpass_idx < pass->subpass_count);
1221 struct v3dv_subpass *subpass =
1222 &cmd_buffer->state.pass->subpasses[cmd_buffer->state.subpass_idx];
1223
1224 /* Emit a clear rect inside the current job for this subpass. For layered
1225 * framebuffers, we use a geometry shader to redirect clears to the
1226 * appropriate layers.
1227 */
1228 bool is_layered, all_rects_same_layers;
1229 gather_layering_info(rectCount, pRects, &is_layered, &all_rects_same_layers);
1230 for (uint32_t i = 0; i < attachmentCount; i++) {
1231 if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1232 emit_subpass_color_clear_rects(cmd_buffer, pass, subpass,
1233 pAttachments[i].colorAttachment,
1234 &pAttachments[i].clearValue.color,
1235 is_layered, all_rects_same_layers,
1236 rectCount, pRects);
1237 } else {
1238 emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass,
1239 pAttachments[i].aspectMask,
1240 &pAttachments[i].clearValue.depthStencil,
1241 is_layered, all_rects_same_layers,
1242 rectCount, pRects);
1243 }
1244 }
1245 }
1246