1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "util/mesa-sha1.h"
31 #include "util/os_time.h"
32 #include "common/intel_l3_config.h"
33 #include "common/intel_disasm.h"
34 #include "common/intel_sample_positions.h"
35 #include "anv_private.h"
36 #include "compiler/brw_nir.h"
37 #include "compiler/brw_nir_rt.h"
38 #include "anv_nir.h"
39 #include "nir/nir_xfb_info.h"
40 #include "spirv/nir_spirv.h"
41 #include "vk_util.h"
42
43 /* Needed for SWIZZLE macros */
44 #include "program/prog_instruction.h"
45
46 // Shader functions
47 #define SPIR_V_MAGIC_NUMBER 0x07230203
48
49 struct anv_spirv_debug_data {
50 struct anv_device *device;
51 const struct vk_shader_module *module;
52 };
53
anv_spirv_nir_debug(void * private_data,enum nir_spirv_debug_level level,size_t spirv_offset,const char * message)54 static void anv_spirv_nir_debug(void *private_data,
55 enum nir_spirv_debug_level level,
56 size_t spirv_offset,
57 const char *message)
58 {
59 struct anv_spirv_debug_data *debug_data = private_data;
60
61 switch (level) {
62 case NIR_SPIRV_DEBUG_LEVEL_INFO:
63 vk_logi(VK_LOG_OBJS(&debug_data->module->base),
64 "SPIR-V offset %lu: %s",
65 (unsigned long) spirv_offset, message);
66 break;
67 case NIR_SPIRV_DEBUG_LEVEL_WARNING:
68 vk_logw(VK_LOG_OBJS(&debug_data->module->base),
69 "SPIR-V offset %lu: %s",
70 (unsigned long) spirv_offset, message);
71 break;
72 case NIR_SPIRV_DEBUG_LEVEL_ERROR:
73 vk_loge(VK_LOG_OBJS(&debug_data->module->base),
74 "SPIR-V offset %lu: %s",
75 (unsigned long) spirv_offset, message);
76 break;
77 default:
78 break;
79 }
80 }
81
82 /* Eventually, this will become part of anv_CreateShader. Unfortunately,
83 * we can't do that yet because we don't have the ability to copy nir.
84 */
85 static nir_shader *
anv_shader_compile_to_nir(struct anv_device * device,void * mem_ctx,const struct vk_shader_module * module,const char * entrypoint_name,gl_shader_stage stage,const VkSpecializationInfo * spec_info)86 anv_shader_compile_to_nir(struct anv_device *device,
87 void *mem_ctx,
88 const struct vk_shader_module *module,
89 const char *entrypoint_name,
90 gl_shader_stage stage,
91 const VkSpecializationInfo *spec_info)
92 {
93 const struct anv_physical_device *pdevice = device->physical;
94 const struct brw_compiler *compiler = pdevice->compiler;
95 const nir_shader_compiler_options *nir_options =
96 compiler->glsl_compiler_options[stage].NirOptions;
97
98 uint32_t *spirv = (uint32_t *) module->data;
99 assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
100 assert(module->size % 4 == 0);
101
102 uint32_t num_spec_entries = 0;
103 struct nir_spirv_specialization *spec_entries =
104 vk_spec_info_to_nir_spirv(spec_info, &num_spec_entries);
105
106 struct anv_spirv_debug_data spirv_debug_data = {
107 .device = device,
108 .module = module,
109 };
110 struct spirv_to_nir_options spirv_options = {
111 .caps = {
112 .demote_to_helper_invocation = true,
113 .derivative_group = true,
114 .descriptor_array_dynamic_indexing = true,
115 .descriptor_array_non_uniform_indexing = true,
116 .descriptor_indexing = true,
117 .device_group = true,
118 .draw_parameters = true,
119 .float16 = pdevice->info.ver >= 8,
120 .float32_atomic_add = pdevice->info.has_lsc,
121 .float32_atomic_min_max = pdevice->info.ver >= 9,
122 .float64 = pdevice->info.ver >= 8,
123 .float64_atomic_min_max = pdevice->info.has_lsc,
124 .fragment_shader_sample_interlock = pdevice->info.ver >= 9,
125 .fragment_shader_pixel_interlock = pdevice->info.ver >= 9,
126 .geometry_streams = true,
127 /* When KHR_format_feature_flags2 is enabled, the read/write without
128 * format is per format, so just report true. It's up to the
129 * application to check.
130 */
131 .image_read_without_format = device->vk.enabled_extensions.KHR_format_feature_flags2,
132 .image_write_without_format = true,
133 .int8 = pdevice->info.ver >= 8,
134 .int16 = pdevice->info.ver >= 8,
135 .int64 = pdevice->info.ver >= 8,
136 .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin,
137 .integer_functions2 = pdevice->info.ver >= 8,
138 .min_lod = true,
139 .multiview = true,
140 .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
141 .post_depth_coverage = pdevice->info.ver >= 9,
142 .runtime_descriptor_array = true,
143 .float_controls = pdevice->info.ver >= 8,
144 .ray_tracing = pdevice->info.has_ray_tracing,
145 .shader_clock = true,
146 .shader_viewport_index_layer = true,
147 .stencil_export = pdevice->info.ver >= 9,
148 .storage_8bit = pdevice->info.ver >= 8,
149 .storage_16bit = pdevice->info.ver >= 8,
150 .subgroup_arithmetic = true,
151 .subgroup_basic = true,
152 .subgroup_ballot = true,
153 .subgroup_dispatch = true,
154 .subgroup_quad = true,
155 .subgroup_uniform_control_flow = true,
156 .subgroup_shuffle = true,
157 .subgroup_vote = true,
158 .tessellation = true,
159 .transform_feedback = pdevice->info.ver >= 8,
160 .variable_pointers = true,
161 .vk_memory_model = true,
162 .vk_memory_model_device_scope = true,
163 .workgroup_memory_explicit_layout = true,
164 .fragment_shading_rate = pdevice->info.ver >= 11,
165 },
166 .ubo_addr_format =
167 anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access),
168 .ssbo_addr_format =
169 anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
170 .phys_ssbo_addr_format = nir_address_format_64bit_global,
171 .push_const_addr_format = nir_address_format_logical,
172
173 /* TODO: Consider changing this to an address format that has the NULL
174 * pointer equals to 0. That might be a better format to play nice
175 * with certain code / code generators.
176 */
177 .shared_addr_format = nir_address_format_32bit_offset,
178 .debug = {
179 .func = anv_spirv_nir_debug,
180 .private_data = &spirv_debug_data,
181 },
182 };
183
184
185 nir_shader *nir =
186 spirv_to_nir(spirv, module->size / 4,
187 spec_entries, num_spec_entries,
188 stage, entrypoint_name, &spirv_options, nir_options);
189 if (!nir) {
190 free(spec_entries);
191 return NULL;
192 }
193
194 assert(nir->info.stage == stage);
195 nir_validate_shader(nir, "after spirv_to_nir");
196 nir_validate_ssa_dominance(nir, "after spirv_to_nir");
197 ralloc_steal(mem_ctx, nir);
198
199 free(spec_entries);
200
201 const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
202 .point_coord = true,
203 };
204 NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
205
206 if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) {
207 fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
208 gl_shader_stage_name(stage));
209 nir_print_shader(nir, stderr);
210 }
211
212 /* We have to lower away local constant initializers right before we
213 * inline functions. That way they get properly initialized at the top
214 * of the function and not at the top of its caller.
215 */
216 NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
217 NIR_PASS_V(nir, nir_lower_returns);
218 NIR_PASS_V(nir, nir_inline_functions);
219 NIR_PASS_V(nir, nir_copy_prop);
220 NIR_PASS_V(nir, nir_opt_deref);
221
222 /* Pick off the single entrypoint that we want */
223 foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
224 if (!func->is_entrypoint)
225 exec_node_remove(&func->node);
226 }
227 assert(exec_list_length(&nir->functions) == 1);
228
229 /* Now that we've deleted all but the main function, we can go ahead and
230 * lower the rest of the constant initializers. We do this here so that
231 * nir_remove_dead_variables and split_per_member_structs below see the
232 * corresponding stores.
233 */
234 NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
235
236 /* Split member structs. We do this before lower_io_to_temporaries so that
237 * it doesn't lower system values to temporaries by accident.
238 */
239 NIR_PASS_V(nir, nir_split_var_copies);
240 NIR_PASS_V(nir, nir_split_per_member_structs);
241
242 NIR_PASS_V(nir, nir_remove_dead_variables,
243 nir_var_shader_in | nir_var_shader_out | nir_var_system_value |
244 nir_var_shader_call_data | nir_var_ray_hit_attrib,
245 NULL);
246
247 NIR_PASS_V(nir, nir_propagate_invariant, false);
248 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
249 nir_shader_get_entrypoint(nir), true, false);
250
251 NIR_PASS_V(nir, nir_lower_frexp);
252
253 /* Vulkan uses the separate-shader linking model */
254 nir->info.separate_shader = true;
255
256 brw_preprocess_nir(compiler, nir, NULL);
257
258 return nir;
259 }
260
261 VkResult
anv_pipeline_init(struct anv_pipeline * pipeline,struct anv_device * device,enum anv_pipeline_type type,VkPipelineCreateFlags flags,const VkAllocationCallbacks * pAllocator)262 anv_pipeline_init(struct anv_pipeline *pipeline,
263 struct anv_device *device,
264 enum anv_pipeline_type type,
265 VkPipelineCreateFlags flags,
266 const VkAllocationCallbacks *pAllocator)
267 {
268 VkResult result;
269
270 memset(pipeline, 0, sizeof(*pipeline));
271
272 vk_object_base_init(&device->vk, &pipeline->base,
273 VK_OBJECT_TYPE_PIPELINE);
274 pipeline->device = device;
275
276 /* It's the job of the child class to provide actual backing storage for
277 * the batch by setting batch.start, batch.next, and batch.end.
278 */
279 pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc;
280 pipeline->batch.relocs = &pipeline->batch_relocs;
281 pipeline->batch.status = VK_SUCCESS;
282
283 result = anv_reloc_list_init(&pipeline->batch_relocs,
284 pipeline->batch.alloc);
285 if (result != VK_SUCCESS)
286 return result;
287
288 pipeline->mem_ctx = ralloc_context(NULL);
289
290 pipeline->type = type;
291 pipeline->flags = flags;
292
293 util_dynarray_init(&pipeline->executables, pipeline->mem_ctx);
294
295 return VK_SUCCESS;
296 }
297
298 void
anv_pipeline_finish(struct anv_pipeline * pipeline,struct anv_device * device,const VkAllocationCallbacks * pAllocator)299 anv_pipeline_finish(struct anv_pipeline *pipeline,
300 struct anv_device *device,
301 const VkAllocationCallbacks *pAllocator)
302 {
303 anv_reloc_list_finish(&pipeline->batch_relocs,
304 pAllocator ? pAllocator : &device->vk.alloc);
305 ralloc_free(pipeline->mem_ctx);
306 vk_object_base_finish(&pipeline->base);
307 }
308
anv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)309 void anv_DestroyPipeline(
310 VkDevice _device,
311 VkPipeline _pipeline,
312 const VkAllocationCallbacks* pAllocator)
313 {
314 ANV_FROM_HANDLE(anv_device, device, _device);
315 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
316
317 if (!pipeline)
318 return;
319
320 switch (pipeline->type) {
321 case ANV_PIPELINE_GRAPHICS: {
322 struct anv_graphics_pipeline *gfx_pipeline =
323 anv_pipeline_to_graphics(pipeline);
324
325 if (gfx_pipeline->blend_state.map)
326 anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state);
327 if (gfx_pipeline->cps_state.map)
328 anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->cps_state);
329
330 for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) {
331 if (gfx_pipeline->shaders[s])
332 anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
333 }
334 break;
335 }
336
337 case ANV_PIPELINE_COMPUTE: {
338 struct anv_compute_pipeline *compute_pipeline =
339 anv_pipeline_to_compute(pipeline);
340
341 if (compute_pipeline->cs)
342 anv_shader_bin_unref(device, compute_pipeline->cs);
343
344 break;
345 }
346
347 case ANV_PIPELINE_RAY_TRACING: {
348 struct anv_ray_tracing_pipeline *rt_pipeline =
349 anv_pipeline_to_ray_tracing(pipeline);
350
351 util_dynarray_foreach(&rt_pipeline->shaders,
352 struct anv_shader_bin *, shader) {
353 anv_shader_bin_unref(device, *shader);
354 }
355 break;
356 }
357
358 default:
359 unreachable("invalid pipeline type");
360 }
361
362 anv_pipeline_finish(pipeline, device, pAllocator);
363 vk_free2(&device->vk.alloc, pAllocator, pipeline);
364 }
365
366 static const uint32_t vk_to_intel_primitive_type[] = {
367 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
368 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
369 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
370 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
371 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
372 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
373 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
374 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
375 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
376 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
377 };
378
379 static void
populate_sampler_prog_key(const struct intel_device_info * devinfo,struct brw_sampler_prog_key_data * key)380 populate_sampler_prog_key(const struct intel_device_info *devinfo,
381 struct brw_sampler_prog_key_data *key)
382 {
383 /* Almost all multisampled textures are compressed. The only time when we
384 * don't compress a multisampled texture is for 16x MSAA with a surface
385 * width greater than 8k which is a bit of an edge case. Since the sampler
386 * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
387 * to tell the compiler to always assume compression.
388 */
389 key->compressed_multisample_layout_mask = ~0;
390
391 /* SkyLake added support for 16x MSAA. With this came a new message for
392 * reading from a 16x MSAA surface with compression. The new message was
393 * needed because now the MCS data is 64 bits instead of 32 or lower as is
394 * the case for 8x, 4x, and 2x. The key->msaa_16 bit-field controls which
395 * message we use. Fortunately, the 16x message works for 8x, 4x, and 2x
396 * so we can just use it unconditionally. This may not be quite as
397 * efficient but it saves us from recompiling.
398 */
399 if (devinfo->ver >= 9)
400 key->msaa_16 = ~0;
401
402 /* XXX: Handle texture swizzle on HSW- */
403 for (int i = 0; i < MAX_SAMPLERS; i++) {
404 /* Assume color sampler, no swizzling. (Works for BDW+) */
405 key->swizzles[i] = SWIZZLE_XYZW;
406 }
407 }
408
409 static void
populate_base_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_base_prog_key * key)410 populate_base_prog_key(const struct intel_device_info *devinfo,
411 enum brw_subgroup_size_type subgroup_size_type,
412 bool robust_buffer_acccess,
413 struct brw_base_prog_key *key)
414 {
415 key->subgroup_size_type = subgroup_size_type;
416 key->robust_buffer_access = robust_buffer_acccess;
417
418 populate_sampler_prog_key(devinfo, &key->tex);
419 }
420
421 static void
populate_vs_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_vs_prog_key * key)422 populate_vs_prog_key(const struct intel_device_info *devinfo,
423 enum brw_subgroup_size_type subgroup_size_type,
424 bool robust_buffer_acccess,
425 struct brw_vs_prog_key *key)
426 {
427 memset(key, 0, sizeof(*key));
428
429 populate_base_prog_key(devinfo, subgroup_size_type,
430 robust_buffer_acccess, &key->base);
431
432 /* XXX: Handle vertex input work-arounds */
433
434 /* XXX: Handle sampler_prog_key */
435 }
436
437 static void
populate_tcs_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,unsigned input_vertices,struct brw_tcs_prog_key * key)438 populate_tcs_prog_key(const struct intel_device_info *devinfo,
439 enum brw_subgroup_size_type subgroup_size_type,
440 bool robust_buffer_acccess,
441 unsigned input_vertices,
442 struct brw_tcs_prog_key *key)
443 {
444 memset(key, 0, sizeof(*key));
445
446 populate_base_prog_key(devinfo, subgroup_size_type,
447 robust_buffer_acccess, &key->base);
448
449 key->input_vertices = input_vertices;
450 }
451
452 static void
populate_tes_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_tes_prog_key * key)453 populate_tes_prog_key(const struct intel_device_info *devinfo,
454 enum brw_subgroup_size_type subgroup_size_type,
455 bool robust_buffer_acccess,
456 struct brw_tes_prog_key *key)
457 {
458 memset(key, 0, sizeof(*key));
459
460 populate_base_prog_key(devinfo, subgroup_size_type,
461 robust_buffer_acccess, &key->base);
462 }
463
464 static void
populate_gs_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_gs_prog_key * key)465 populate_gs_prog_key(const struct intel_device_info *devinfo,
466 enum brw_subgroup_size_type subgroup_size_type,
467 bool robust_buffer_acccess,
468 struct brw_gs_prog_key *key)
469 {
470 memset(key, 0, sizeof(*key));
471
472 populate_base_prog_key(devinfo, subgroup_size_type,
473 robust_buffer_acccess, &key->base);
474 }
475
476 static bool
pipeline_has_coarse_pixel(const struct anv_graphics_pipeline * pipeline,const VkPipelineFragmentShadingRateStateCreateInfoKHR * fsr_info)477 pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
478 const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info)
479 {
480 if (pipeline->sample_shading_enable)
481 return false;
482
483 /* Not dynamic & not specified for the pipeline. */
484 if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 && !fsr_info)
485 return false;
486
487 /* Not dynamic & pipeline has a 1x1 fragment shading rate with no
488 * possibility for element of the pipeline to change the value.
489 */
490 if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 &&
491 fsr_info->fragmentSize.width <= 1 &&
492 fsr_info->fragmentSize.height <= 1 &&
493 fsr_info->combinerOps[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
494 fsr_info->combinerOps[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)
495 return false;
496
497 return true;
498 }
499
500 static void
populate_wm_prog_key(const struct anv_graphics_pipeline * pipeline,VkPipelineShaderStageCreateFlags flags,bool robust_buffer_acccess,const struct anv_subpass * subpass,const VkPipelineMultisampleStateCreateInfo * ms_info,const VkPipelineFragmentShadingRateStateCreateInfoKHR * fsr_info,struct brw_wm_prog_key * key)501 populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
502 VkPipelineShaderStageCreateFlags flags,
503 bool robust_buffer_acccess,
504 const struct anv_subpass *subpass,
505 const VkPipelineMultisampleStateCreateInfo *ms_info,
506 const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info,
507 struct brw_wm_prog_key *key)
508 {
509 const struct anv_device *device = pipeline->base.device;
510 const struct intel_device_info *devinfo = &device->info;
511
512 memset(key, 0, sizeof(*key));
513
514 populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);
515
516 /* We set this to 0 here and set to the actual value before we call
517 * brw_compile_fs.
518 */
519 key->input_slots_valid = 0;
520
521 /* Vulkan doesn't specify a default */
522 key->high_quality_derivatives = false;
523
524 /* XXX Vulkan doesn't appear to specify */
525 key->clamp_fragment_color = false;
526
527 key->ignore_sample_mask_out = false;
528
529 assert(subpass->color_count <= MAX_RTS);
530 for (uint32_t i = 0; i < subpass->color_count; i++) {
531 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
532 key->color_outputs_valid |= (1 << i);
533 }
534
535 key->nr_color_regions = subpass->color_count;
536
537 /* To reduce possible shader recompilations we would need to know if
538 * there is a SampleMask output variable to compute if we should emit
539 * code to workaround the issue that hardware disables alpha to coverage
540 * when there is SampleMask output.
541 */
542 key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable;
543
544 /* Vulkan doesn't support fixed-function alpha test */
545 key->alpha_test_replicate_alpha = false;
546
547 if (ms_info) {
548 /* We should probably pull this out of the shader, but it's fairly
549 * harmless to compute it and then let dead-code take care of it.
550 */
551 if (ms_info->rasterizationSamples > 1) {
552 key->persample_interp = ms_info->sampleShadingEnable &&
553 (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
554 key->multisample_fbo = true;
555 }
556
557 key->frag_coord_adds_sample_pos = key->persample_interp;
558 }
559
560 key->coarse_pixel =
561 device->vk.enabled_extensions.KHR_fragment_shading_rate &&
562 pipeline_has_coarse_pixel(pipeline, fsr_info);
563 }
564
565 static void
populate_cs_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_cs_prog_key * key)566 populate_cs_prog_key(const struct intel_device_info *devinfo,
567 enum brw_subgroup_size_type subgroup_size_type,
568 bool robust_buffer_acccess,
569 struct brw_cs_prog_key *key)
570 {
571 memset(key, 0, sizeof(*key));
572
573 populate_base_prog_key(devinfo, subgroup_size_type,
574 robust_buffer_acccess, &key->base);
575 }
576
577 static void
populate_bs_prog_key(const struct intel_device_info * devinfo,VkPipelineShaderStageCreateFlags flags,bool robust_buffer_access,struct brw_bs_prog_key * key)578 populate_bs_prog_key(const struct intel_device_info *devinfo,
579 VkPipelineShaderStageCreateFlags flags,
580 bool robust_buffer_access,
581 struct brw_bs_prog_key *key)
582 {
583 memset(key, 0, sizeof(*key));
584
585 populate_base_prog_key(devinfo, flags, robust_buffer_access, &key->base);
586 }
587
588 struct anv_pipeline_stage {
589 gl_shader_stage stage;
590
591 const struct vk_shader_module *module;
592 const char *entrypoint;
593 const VkSpecializationInfo *spec_info;
594
595 unsigned char shader_sha1[20];
596
597 union brw_any_prog_key key;
598
599 struct {
600 gl_shader_stage stage;
601 unsigned char sha1[20];
602 } cache_key;
603
604 nir_shader *nir;
605
606 struct anv_pipeline_binding surface_to_descriptor[256];
607 struct anv_pipeline_binding sampler_to_descriptor[256];
608 struct anv_pipeline_bind_map bind_map;
609
610 union brw_any_prog_data prog_data;
611
612 uint32_t num_stats;
613 struct brw_compile_stats stats[3];
614 char *disasm[3];
615
616 VkPipelineCreationFeedbackEXT feedback;
617
618 const unsigned *code;
619
620 struct anv_shader_bin *bin;
621 };
622
623 static void
anv_pipeline_hash_shader(const struct vk_shader_module * module,const char * entrypoint,gl_shader_stage stage,const VkSpecializationInfo * spec_info,unsigned char * sha1_out)624 anv_pipeline_hash_shader(const struct vk_shader_module *module,
625 const char *entrypoint,
626 gl_shader_stage stage,
627 const VkSpecializationInfo *spec_info,
628 unsigned char *sha1_out)
629 {
630 struct mesa_sha1 ctx;
631 _mesa_sha1_init(&ctx);
632
633 _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
634 _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
635 _mesa_sha1_update(&ctx, &stage, sizeof(stage));
636 if (spec_info) {
637 _mesa_sha1_update(&ctx, spec_info->pMapEntries,
638 spec_info->mapEntryCount *
639 sizeof(*spec_info->pMapEntries));
640 _mesa_sha1_update(&ctx, spec_info->pData,
641 spec_info->dataSize);
642 }
643
644 _mesa_sha1_final(&ctx, sha1_out);
645 }
646
647 static void
anv_pipeline_hash_graphics(struct anv_graphics_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stages,unsigned char * sha1_out)648 anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,
649 struct anv_pipeline_layout *layout,
650 struct anv_pipeline_stage *stages,
651 unsigned char *sha1_out)
652 {
653 struct mesa_sha1 ctx;
654 _mesa_sha1_init(&ctx);
655
656 _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
657 sizeof(pipeline->subpass->view_mask));
658
659 if (layout)
660 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
661
662 const bool rba = pipeline->base.device->robust_buffer_access;
663 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
664
665 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
666 if (stages[s].entrypoint) {
667 _mesa_sha1_update(&ctx, stages[s].shader_sha1,
668 sizeof(stages[s].shader_sha1));
669 _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
670 }
671 }
672
673 _mesa_sha1_final(&ctx, sha1_out);
674 }
675
676 static void
anv_pipeline_hash_compute(struct anv_compute_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stage,unsigned char * sha1_out)677 anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
678 struct anv_pipeline_layout *layout,
679 struct anv_pipeline_stage *stage,
680 unsigned char *sha1_out)
681 {
682 struct mesa_sha1 ctx;
683 _mesa_sha1_init(&ctx);
684
685 if (layout)
686 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
687
688 const bool rba = pipeline->base.device->robust_buffer_access;
689 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
690
691 _mesa_sha1_update(&ctx, stage->shader_sha1,
692 sizeof(stage->shader_sha1));
693 _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
694
695 _mesa_sha1_final(&ctx, sha1_out);
696 }
697
698 static void
anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stage,unsigned char * sha1_out)699 anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline,
700 struct anv_pipeline_layout *layout,
701 struct anv_pipeline_stage *stage,
702 unsigned char *sha1_out)
703 {
704 struct mesa_sha1 ctx;
705 _mesa_sha1_init(&ctx);
706
707 if (layout != NULL)
708 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
709
710 const bool rba = pipeline->base.device->robust_buffer_access;
711 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
712
713 _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
714 _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs));
715
716 _mesa_sha1_final(&ctx, sha1_out);
717 }
718
719 static void
anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * intersection,struct anv_pipeline_stage * any_hit,unsigned char * sha1_out)720 anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline,
721 struct anv_pipeline_layout *layout,
722 struct anv_pipeline_stage *intersection,
723 struct anv_pipeline_stage *any_hit,
724 unsigned char *sha1_out)
725 {
726 struct mesa_sha1 ctx;
727 _mesa_sha1_init(&ctx);
728
729 if (layout != NULL)
730 _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
731
732 const bool rba = pipeline->base.device->robust_buffer_access;
733 _mesa_sha1_update(&ctx, &rba, sizeof(rba));
734
735 _mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1));
736 _mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs));
737 _mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1));
738 _mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs));
739
740 _mesa_sha1_final(&ctx, sha1_out);
741 }
742
743 static nir_shader *
anv_pipeline_stage_get_nir(struct anv_pipeline * pipeline,struct anv_pipeline_cache * cache,void * mem_ctx,struct anv_pipeline_stage * stage)744 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
745 struct anv_pipeline_cache *cache,
746 void *mem_ctx,
747 struct anv_pipeline_stage *stage)
748 {
749 const struct brw_compiler *compiler =
750 pipeline->device->physical->compiler;
751 const nir_shader_compiler_options *nir_options =
752 compiler->glsl_compiler_options[stage->stage].NirOptions;
753 nir_shader *nir;
754
755 nir = anv_device_search_for_nir(pipeline->device, cache,
756 nir_options,
757 stage->shader_sha1,
758 mem_ctx);
759 if (nir) {
760 assert(nir->info.stage == stage->stage);
761 return nir;
762 }
763
764 nir = anv_shader_compile_to_nir(pipeline->device,
765 mem_ctx,
766 stage->module,
767 stage->entrypoint,
768 stage->stage,
769 stage->spec_info);
770 if (nir) {
771 anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
772 return nir;
773 }
774
775 return NULL;
776 }
777
778 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)779 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
780 {
781 assert(glsl_type_is_vector_or_scalar(type));
782
783 uint32_t comp_size = glsl_type_is_boolean(type)
784 ? 4 : glsl_get_bit_size(type) / 8;
785 unsigned length = glsl_get_vector_elements(type);
786 *size = comp_size * length,
787 *align = comp_size * (length == 3 ? 4 : length);
788 }
789
790 static void
anv_pipeline_lower_nir(struct anv_pipeline * pipeline,void * mem_ctx,struct anv_pipeline_stage * stage,struct anv_pipeline_layout * layout)791 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
792 void *mem_ctx,
793 struct anv_pipeline_stage *stage,
794 struct anv_pipeline_layout *layout)
795 {
796 const struct anv_physical_device *pdevice = pipeline->device->physical;
797 const struct brw_compiler *compiler = pdevice->compiler;
798
799 struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
800 nir_shader *nir = stage->nir;
801
802 if (nir->info.stage == MESA_SHADER_FRAGMENT) {
803 /* Check if sample shading is enabled in the shader and toggle
804 * it on for the pipeline independent if sampleShadingEnable is set.
805 */
806 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
807 if (nir->info.fs.uses_sample_shading)
808 anv_pipeline_to_graphics(pipeline)->sample_shading_enable = true;
809
810 NIR_PASS_V(nir, nir_lower_wpos_center,
811 anv_pipeline_to_graphics(pipeline)->sample_shading_enable);
812 NIR_PASS_V(nir, nir_lower_input_attachments,
813 &(nir_input_attachment_options) {
814 .use_fragcoord_sysval = true,
815 .use_layer_id_sysval = true,
816 });
817 }
818
819 NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
820
821 if (pipeline->type == ANV_PIPELINE_GRAPHICS) {
822 NIR_PASS_V(nir, anv_nir_lower_multiview,
823 anv_pipeline_to_graphics(pipeline));
824 }
825
826 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
827
828 NIR_PASS_V(nir, brw_nir_lower_storage_image, compiler->devinfo);
829
830 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
831 nir_address_format_64bit_global);
832 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
833 nir_address_format_32bit_offset);
834
835 /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
836 anv_nir_apply_pipeline_layout(pdevice,
837 pipeline->device->robust_buffer_access,
838 layout, nir, &stage->bind_map);
839
840 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
841 anv_nir_ubo_addr_format(pdevice,
842 pipeline->device->robust_buffer_access));
843 NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
844 anv_nir_ssbo_addr_format(pdevice,
845 pipeline->device->robust_buffer_access));
846
847 /* First run copy-prop to get rid of all of the vec() that address
848 * calculations often create and then constant-fold so that, when we
849 * get to anv_nir_lower_ubo_loads, we can detect constant offsets.
850 */
851 NIR_PASS_V(nir, nir_copy_prop);
852 NIR_PASS_V(nir, nir_opt_constant_folding);
853
854 NIR_PASS_V(nir, anv_nir_lower_ubo_loads);
855
856 /* We don't support non-uniform UBOs and non-uniform SSBO access is
857 * handled naturally by falling back to A64 messages.
858 */
859 NIR_PASS_V(nir, nir_lower_non_uniform_access,
860 &(nir_lower_non_uniform_access_options) {
861 .types = nir_lower_non_uniform_texture_access |
862 nir_lower_non_uniform_image_access,
863 .callback = NULL,
864 });
865
866 anv_nir_compute_push_layout(pdevice, pipeline->device->robust_buffer_access,
867 nir, prog_data, &stage->bind_map, mem_ctx);
868
869 if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
870 if (!nir->info.shared_memory_explicit_layout) {
871 NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
872 nir_var_mem_shared, shared_type_info);
873 }
874
875 NIR_PASS_V(nir, nir_lower_explicit_io,
876 nir_var_mem_shared, nir_address_format_32bit_offset);
877
878 if (nir->info.zero_initialize_shared_memory &&
879 nir->info.shared_size > 0) {
880 /* The effective Shared Local Memory size is at least 1024 bytes and
881 * is always rounded to a power of two, so it is OK to align the size
882 * used by the shader to chunk_size -- which does simplify the logic.
883 */
884 const unsigned chunk_size = 16;
885 const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
886 assert(shared_size <=
887 intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));
888
889 NIR_PASS_V(nir, nir_zero_initialize_shared_memory,
890 shared_size, chunk_size);
891 }
892 }
893
894 stage->nir = nir;
895 }
896
897 static void
anv_pipeline_link_vs(const struct brw_compiler * compiler,struct anv_pipeline_stage * vs_stage,struct anv_pipeline_stage * next_stage)898 anv_pipeline_link_vs(const struct brw_compiler *compiler,
899 struct anv_pipeline_stage *vs_stage,
900 struct anv_pipeline_stage *next_stage)
901 {
902 if (next_stage)
903 brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
904 }
905
906 static void
anv_pipeline_compile_vs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_graphics_pipeline * pipeline,struct anv_pipeline_stage * vs_stage)907 anv_pipeline_compile_vs(const struct brw_compiler *compiler,
908 void *mem_ctx,
909 struct anv_graphics_pipeline *pipeline,
910 struct anv_pipeline_stage *vs_stage)
911 {
912 /* When using Primitive Replication for multiview, each view gets its own
913 * position slot.
914 */
915 uint32_t pos_slots = pipeline->use_primitive_replication ?
916 anv_subpass_view_count(pipeline->subpass) : 1;
917
918 brw_compute_vue_map(compiler->devinfo,
919 &vs_stage->prog_data.vs.base.vue_map,
920 vs_stage->nir->info.outputs_written,
921 vs_stage->nir->info.separate_shader,
922 pos_slots);
923
924 vs_stage->num_stats = 1;
925
926 struct brw_compile_vs_params params = {
927 .nir = vs_stage->nir,
928 .key = &vs_stage->key.vs,
929 .prog_data = &vs_stage->prog_data.vs,
930 .stats = vs_stage->stats,
931 .log_data = pipeline->base.device,
932 };
933
934 vs_stage->code = brw_compile_vs(compiler, mem_ctx, ¶ms);
935 }
936
937 static void
merge_tess_info(struct shader_info * tes_info,const struct shader_info * tcs_info)938 merge_tess_info(struct shader_info *tes_info,
939 const struct shader_info *tcs_info)
940 {
941 /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
942 *
943 * "PointMode. Controls generation of points rather than triangles
944 * or lines. This functionality defaults to disabled, and is
945 * enabled if either shader stage includes the execution mode.
946 *
947 * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
948 * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
949 * and OutputVertices, it says:
950 *
951 * "One mode must be set in at least one of the tessellation
952 * shader stages."
953 *
954 * So, the fields can be set in either the TCS or TES, but they must
955 * agree if set in both. Our backend looks at TES, so bitwise-or in
956 * the values from the TCS.
957 */
958 assert(tcs_info->tess.tcs_vertices_out == 0 ||
959 tes_info->tess.tcs_vertices_out == 0 ||
960 tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
961 tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
962
963 assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
964 tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
965 tcs_info->tess.spacing == tes_info->tess.spacing);
966 tes_info->tess.spacing |= tcs_info->tess.spacing;
967
968 assert(tcs_info->tess.primitive_mode == 0 ||
969 tes_info->tess.primitive_mode == 0 ||
970 tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
971 tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
972 tes_info->tess.ccw |= tcs_info->tess.ccw;
973 tes_info->tess.point_mode |= tcs_info->tess.point_mode;
974 }
975
976 static void
anv_pipeline_link_tcs(const struct brw_compiler * compiler,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * tes_stage)977 anv_pipeline_link_tcs(const struct brw_compiler *compiler,
978 struct anv_pipeline_stage *tcs_stage,
979 struct anv_pipeline_stage *tes_stage)
980 {
981 assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
982
983 brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
984
985 nir_lower_patch_vertices(tes_stage->nir,
986 tcs_stage->nir->info.tess.tcs_vertices_out,
987 NULL);
988
989 /* Copy TCS info into the TES info */
990 merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
991
992 /* Whacking the key after cache lookup is a bit sketchy, but all of
993 * this comes from the SPIR-V, which is part of the hash used for the
994 * pipeline cache. So it should be safe.
995 */
996 tcs_stage->key.tcs.tes_primitive_mode =
997 tes_stage->nir->info.tess.primitive_mode;
998 tcs_stage->key.tcs.quads_workaround =
999 compiler->devinfo->ver < 9 &&
1000 tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ &&
1001 tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
1002 }
1003
1004 static void
anv_pipeline_compile_tcs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * prev_stage)1005 anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
1006 void *mem_ctx,
1007 struct anv_device *device,
1008 struct anv_pipeline_stage *tcs_stage,
1009 struct anv_pipeline_stage *prev_stage)
1010 {
1011 tcs_stage->key.tcs.outputs_written =
1012 tcs_stage->nir->info.outputs_written;
1013 tcs_stage->key.tcs.patch_outputs_written =
1014 tcs_stage->nir->info.patch_outputs_written;
1015
1016 tcs_stage->num_stats = 1;
1017 tcs_stage->code = brw_compile_tcs(compiler, device, mem_ctx,
1018 &tcs_stage->key.tcs,
1019 &tcs_stage->prog_data.tcs,
1020 tcs_stage->nir, -1,
1021 tcs_stage->stats, NULL);
1022 }
1023
1024 static void
anv_pipeline_link_tes(const struct brw_compiler * compiler,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * next_stage)1025 anv_pipeline_link_tes(const struct brw_compiler *compiler,
1026 struct anv_pipeline_stage *tes_stage,
1027 struct anv_pipeline_stage *next_stage)
1028 {
1029 if (next_stage)
1030 brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
1031 }
1032
1033 static void
anv_pipeline_compile_tes(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * tcs_stage)1034 anv_pipeline_compile_tes(const struct brw_compiler *compiler,
1035 void *mem_ctx,
1036 struct anv_device *device,
1037 struct anv_pipeline_stage *tes_stage,
1038 struct anv_pipeline_stage *tcs_stage)
1039 {
1040 tes_stage->key.tes.inputs_read =
1041 tcs_stage->nir->info.outputs_written;
1042 tes_stage->key.tes.patch_inputs_read =
1043 tcs_stage->nir->info.patch_outputs_written;
1044
1045 tes_stage->num_stats = 1;
1046 tes_stage->code = brw_compile_tes(compiler, device, mem_ctx,
1047 &tes_stage->key.tes,
1048 &tcs_stage->prog_data.tcs.base.vue_map,
1049 &tes_stage->prog_data.tes,
1050 tes_stage->nir, -1,
1051 tes_stage->stats, NULL);
1052 }
1053
1054 static void
anv_pipeline_link_gs(const struct brw_compiler * compiler,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * next_stage)1055 anv_pipeline_link_gs(const struct brw_compiler *compiler,
1056 struct anv_pipeline_stage *gs_stage,
1057 struct anv_pipeline_stage *next_stage)
1058 {
1059 if (next_stage)
1060 brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
1061 }
1062
1063 static void
anv_pipeline_compile_gs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * prev_stage)1064 anv_pipeline_compile_gs(const struct brw_compiler *compiler,
1065 void *mem_ctx,
1066 struct anv_device *device,
1067 struct anv_pipeline_stage *gs_stage,
1068 struct anv_pipeline_stage *prev_stage)
1069 {
1070 brw_compute_vue_map(compiler->devinfo,
1071 &gs_stage->prog_data.gs.base.vue_map,
1072 gs_stage->nir->info.outputs_written,
1073 gs_stage->nir->info.separate_shader, 1);
1074
1075 gs_stage->num_stats = 1;
1076 gs_stage->code = brw_compile_gs(compiler, device, mem_ctx,
1077 &gs_stage->key.gs,
1078 &gs_stage->prog_data.gs,
1079 gs_stage->nir, -1,
1080 gs_stage->stats, NULL);
1081 }
1082
1083 static void
anv_pipeline_link_fs(const struct brw_compiler * compiler,struct anv_pipeline_stage * stage)1084 anv_pipeline_link_fs(const struct brw_compiler *compiler,
1085 struct anv_pipeline_stage *stage)
1086 {
1087 unsigned num_rt_bindings;
1088 struct anv_pipeline_binding rt_bindings[MAX_RTS];
1089 if (stage->key.wm.nr_color_regions > 0) {
1090 assert(stage->key.wm.nr_color_regions <= MAX_RTS);
1091 for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
1092 if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
1093 rt_bindings[rt] = (struct anv_pipeline_binding) {
1094 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1095 .index = rt,
1096 };
1097 } else {
1098 /* Setup a null render target */
1099 rt_bindings[rt] = (struct anv_pipeline_binding) {
1100 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1101 .index = UINT32_MAX,
1102 };
1103 }
1104 }
1105 num_rt_bindings = stage->key.wm.nr_color_regions;
1106 } else {
1107 /* Setup a null render target */
1108 rt_bindings[0] = (struct anv_pipeline_binding) {
1109 .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1110 .index = UINT32_MAX,
1111 };
1112 num_rt_bindings = 1;
1113 }
1114
1115 assert(num_rt_bindings <= MAX_RTS);
1116 assert(stage->bind_map.surface_count == 0);
1117 typed_memcpy(stage->bind_map.surface_to_descriptor,
1118 rt_bindings, num_rt_bindings);
1119 stage->bind_map.surface_count += num_rt_bindings;
1120
1121 /* Now that we've set up the color attachments, we can go through and
1122 * eliminate any shader outputs that map to VK_ATTACHMENT_UNUSED in the
1123 * hopes that dead code can clean them up in this and any earlier shader
1124 * stages.
1125 */
1126 nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);
1127 bool deleted_output = false;
1128 nir_foreach_shader_out_variable_safe(var, stage->nir) {
1129 /* TODO: We don't delete depth/stencil writes. We probably could if the
1130 * subpass doesn't have a depth/stencil attachment.
1131 */
1132 if (var->data.location < FRAG_RESULT_DATA0)
1133 continue;
1134
1135 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
1136
1137 /* If this is the RT at location 0 and we have alpha to coverage
1138 * enabled we still need that write because it will affect the coverage
1139 * mask even if it's never written to a color target.
1140 */
1141 if (rt == 0 && stage->key.wm.alpha_to_coverage)
1142 continue;
1143
1144 const unsigned array_len =
1145 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
1146 assert(rt + array_len <= MAX_RTS);
1147
1148 if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid &
1149 BITFIELD_RANGE(rt, array_len))) {
1150 deleted_output = true;
1151 var->data.mode = nir_var_function_temp;
1152 exec_node_remove(&var->node);
1153 exec_list_push_tail(&impl->locals, &var->node);
1154 }
1155 }
1156
1157 if (deleted_output)
1158 nir_fixup_deref_modes(stage->nir);
1159
1160 /* Initially the valid outputs value is based off the renderpass color
1161 * attachments (see populate_wm_prog_key()), now that we've potentially
1162 * deleted variables that map to unused attachments, we need to update the
1163 * valid outputs for the backend compiler based on what output variables
1164 * are actually used. */
1165 stage->key.wm.color_outputs_valid = 0;
1166 nir_foreach_shader_out_variable_safe(var, stage->nir) {
1167 if (var->data.location < FRAG_RESULT_DATA0)
1168 continue;
1169
1170 const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
1171 const unsigned array_len =
1172 glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
1173 assert(rt + array_len <= MAX_RTS);
1174
1175 stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
1176 }
1177
1178 /* We stored the number of subpass color attachments in nr_color_regions
1179 * when calculating the key for caching. Now that we've computed the bind
1180 * map, we can reduce this to the actual max before we go into the back-end
1181 * compiler.
1182 */
1183 stage->key.wm.nr_color_regions =
1184 util_last_bit(stage->key.wm.color_outputs_valid);
1185 }
1186
1187 static void
anv_pipeline_compile_fs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * fs_stage,struct anv_pipeline_stage * prev_stage)1188 anv_pipeline_compile_fs(const struct brw_compiler *compiler,
1189 void *mem_ctx,
1190 struct anv_device *device,
1191 struct anv_pipeline_stage *fs_stage,
1192 struct anv_pipeline_stage *prev_stage)
1193 {
1194 /* TODO: we could set this to 0 based on the information in nir_shader, but
1195 * we need this before we call spirv_to_nir.
1196 */
1197 assert(prev_stage);
1198 fs_stage->key.wm.input_slots_valid =
1199 prev_stage->prog_data.vue.vue_map.slots_valid;
1200
1201 struct brw_compile_fs_params params = {
1202 .nir = fs_stage->nir,
1203 .key = &fs_stage->key.wm,
1204 .prog_data = &fs_stage->prog_data.wm,
1205
1206 .allow_spilling = true,
1207 .stats = fs_stage->stats,
1208 .log_data = device,
1209 };
1210
1211 fs_stage->code = brw_compile_fs(compiler, mem_ctx, ¶ms);
1212
1213 fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
1214 (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
1215 (uint32_t)fs_stage->prog_data.wm.dispatch_32;
1216
1217 if (fs_stage->key.wm.color_outputs_valid == 0 &&
1218 !fs_stage->prog_data.wm.has_side_effects &&
1219 !fs_stage->prog_data.wm.uses_omask &&
1220 !fs_stage->key.wm.alpha_to_coverage &&
1221 !fs_stage->prog_data.wm.uses_kill &&
1222 fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF &&
1223 !fs_stage->prog_data.wm.computed_stencil) {
1224 /* This fragment shader has no outputs and no side effects. Go ahead
1225 * and return the code pointer so we don't accidentally think the
1226 * compile failed but zero out prog_data which will set program_size to
1227 * zero and disable the stage.
1228 */
1229 memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data));
1230 }
1231 }
1232
1233 static void
anv_pipeline_add_executable(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,struct brw_compile_stats * stats,uint32_t code_offset)1234 anv_pipeline_add_executable(struct anv_pipeline *pipeline,
1235 struct anv_pipeline_stage *stage,
1236 struct brw_compile_stats *stats,
1237 uint32_t code_offset)
1238 {
1239 char *nir = NULL;
1240 if (stage->nir &&
1241 (pipeline->flags &
1242 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1243 nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx);
1244 }
1245
1246 char *disasm = NULL;
1247 if (stage->code &&
1248 (pipeline->flags &
1249 VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1250 char *stream_data = NULL;
1251 size_t stream_size = 0;
1252 FILE *stream = open_memstream(&stream_data, &stream_size);
1253
1254 uint32_t push_size = 0;
1255 for (unsigned i = 0; i < 4; i++)
1256 push_size += stage->bind_map.push_ranges[i].length;
1257 if (push_size > 0) {
1258 fprintf(stream, "Push constant ranges:\n");
1259 for (unsigned i = 0; i < 4; i++) {
1260 if (stage->bind_map.push_ranges[i].length == 0)
1261 continue;
1262
1263 fprintf(stream, " RANGE%d (%dB): ", i,
1264 stage->bind_map.push_ranges[i].length * 32);
1265
1266 switch (stage->bind_map.push_ranges[i].set) {
1267 case ANV_DESCRIPTOR_SET_NULL:
1268 fprintf(stream, "NULL");
1269 break;
1270
1271 case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
1272 fprintf(stream, "Vulkan push constants and API params");
1273 break;
1274
1275 case ANV_DESCRIPTOR_SET_DESCRIPTORS:
1276 fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
1277 stage->bind_map.push_ranges[i].index,
1278 stage->bind_map.push_ranges[i].start * 32);
1279 break;
1280
1281 case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
1282 unreachable("gl_NumWorkgroups is never pushed");
1283
1284 case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
1285 fprintf(stream, "Inline shader constant data (start=%dB)",
1286 stage->bind_map.push_ranges[i].start * 32);
1287 break;
1288
1289 case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
1290 unreachable("Color attachments can't be pushed");
1291
1292 default:
1293 fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
1294 stage->bind_map.push_ranges[i].set,
1295 stage->bind_map.push_ranges[i].index,
1296 stage->bind_map.push_ranges[i].start * 32);
1297 break;
1298 }
1299 fprintf(stream, "\n");
1300 }
1301 fprintf(stream, "\n");
1302 }
1303
1304 /* Creating this is far cheaper than it looks. It's perfectly fine to
1305 * do it for every binary.
1306 */
1307 intel_disassemble(&pipeline->device->info,
1308 stage->code, code_offset, stream);
1309
1310 fclose(stream);
1311
1312 /* Copy it to a ralloc'd thing */
1313 disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1314 memcpy(disasm, stream_data, stream_size);
1315 disasm[stream_size] = 0;
1316
1317 free(stream_data);
1318 }
1319
1320 const struct anv_pipeline_executable exe = {
1321 .stage = stage->stage,
1322 .stats = *stats,
1323 .nir = nir,
1324 .disasm = disasm,
1325 };
1326 util_dynarray_append(&pipeline->executables,
1327 struct anv_pipeline_executable, exe);
1328 }
1329
1330 static void
anv_pipeline_add_executables(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,struct anv_shader_bin * bin)1331 anv_pipeline_add_executables(struct anv_pipeline *pipeline,
1332 struct anv_pipeline_stage *stage,
1333 struct anv_shader_bin *bin)
1334 {
1335 if (stage->stage == MESA_SHADER_FRAGMENT) {
1336 /* We pull the prog data and stats out of the anv_shader_bin because
1337 * the anv_pipeline_stage may not be fully populated if we successfully
1338 * looked up the shader in a cache.
1339 */
1340 const struct brw_wm_prog_data *wm_prog_data =
1341 (const struct brw_wm_prog_data *)bin->prog_data;
1342 struct brw_compile_stats *stats = bin->stats;
1343
1344 if (wm_prog_data->dispatch_8) {
1345 anv_pipeline_add_executable(pipeline, stage, stats++, 0);
1346 }
1347
1348 if (wm_prog_data->dispatch_16) {
1349 anv_pipeline_add_executable(pipeline, stage, stats++,
1350 wm_prog_data->prog_offset_16);
1351 }
1352
1353 if (wm_prog_data->dispatch_32) {
1354 anv_pipeline_add_executable(pipeline, stage, stats++,
1355 wm_prog_data->prog_offset_32);
1356 }
1357 } else {
1358 anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
1359 }
1360 }
1361
1362 static enum brw_subgroup_size_type
anv_subgroup_size_type(gl_shader_stage stage,VkPipelineShaderStageCreateFlags flags,const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT * rss_info)1363 anv_subgroup_size_type(gl_shader_stage stage,
1364 VkPipelineShaderStageCreateFlags flags,
1365 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info)
1366 {
1367 enum brw_subgroup_size_type subgroup_size_type;
1368
1369 if (rss_info) {
1370 assert(stage == MESA_SHADER_COMPUTE);
1371 /* These enum values are expressly chosen to be equal to the subgroup
1372 * size that they require.
1373 */
1374 assert(rss_info->requiredSubgroupSize == 8 ||
1375 rss_info->requiredSubgroupSize == 16 ||
1376 rss_info->requiredSubgroupSize == 32);
1377 subgroup_size_type = rss_info->requiredSubgroupSize;
1378 } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) {
1379 subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING;
1380 } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
1381 assert(stage == MESA_SHADER_COMPUTE);
1382 /* If the client expressly requests full subgroups and they don't
1383 * specify a subgroup size neither allow varying subgroups, we need to
1384 * pick one. So we specify the API value of 32. Performance will
1385 * likely be terrible in this case but there's nothing we can do about
1386 * that. The client should have chosen a size.
1387 */
1388 subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32;
1389 } else {
1390 subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;
1391 }
1392
1393 return subgroup_size_type;
1394 }
1395
1396 static void
anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline * pipeline)1397 anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)
1398 {
1399 /* TODO: Cache this pipeline-wide information. */
1400
1401 if (anv_pipeline_is_primitive(pipeline)) {
1402 /* Primitive replication depends on information from all the shaders.
1403 * Recover this bit from the fact that we have more than one position slot
1404 * in the vertex shader when using it.
1405 */
1406 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1407 int pos_slots = 0;
1408 const struct brw_vue_prog_data *vue_prog_data =
1409 (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data;
1410 const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1411 for (int i = 0; i < vue_map->num_slots; i++) {
1412 if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS)
1413 pos_slots++;
1414 }
1415 pipeline->use_primitive_replication = pos_slots > 1;
1416 }
1417 }
1418
1419 static VkResult
anv_pipeline_compile_graphics(struct anv_graphics_pipeline * pipeline,struct anv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * info)1420 anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
1421 struct anv_pipeline_cache *cache,
1422 const VkGraphicsPipelineCreateInfo *info)
1423 {
1424 VkPipelineCreationFeedbackEXT pipeline_feedback = {
1425 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1426 };
1427 int64_t pipeline_start = os_time_get_nano();
1428
1429 const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1430 struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
1431
1432 /* Information on which states are considered dynamic. */
1433 const VkPipelineDynamicStateCreateInfo *dyn_info =
1434 info->pDynamicState;
1435 uint32_t dynamic_states = 0;
1436 if (dyn_info) {
1437 for (unsigned i = 0; i < dyn_info->dynamicStateCount; i++)
1438 dynamic_states |=
1439 anv_cmd_dirty_bit_for_vk_dynamic_state(dyn_info->pDynamicStates[i]);
1440 }
1441
1442 VkResult result;
1443 for (uint32_t i = 0; i < info->stageCount; i++) {
1444 const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
1445 gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
1446
1447 int64_t stage_start = os_time_get_nano();
1448
1449 stages[stage].stage = stage;
1450 stages[stage].module = vk_shader_module_from_handle(sinfo->module);
1451 stages[stage].entrypoint = sinfo->pName;
1452 stages[stage].spec_info = sinfo->pSpecializationInfo;
1453 anv_pipeline_hash_shader(stages[stage].module,
1454 stages[stage].entrypoint,
1455 stage,
1456 stages[stage].spec_info,
1457 stages[stage].shader_sha1);
1458
1459 enum brw_subgroup_size_type subgroup_size_type =
1460 anv_subgroup_size_type(stage, sinfo->flags, NULL);
1461
1462 const struct intel_device_info *devinfo = &pipeline->base.device->info;
1463 switch (stage) {
1464 case MESA_SHADER_VERTEX:
1465 populate_vs_prog_key(devinfo, subgroup_size_type,
1466 pipeline->base.device->robust_buffer_access,
1467 &stages[stage].key.vs);
1468 break;
1469 case MESA_SHADER_TESS_CTRL:
1470 populate_tcs_prog_key(devinfo, subgroup_size_type,
1471 pipeline->base.device->robust_buffer_access,
1472 info->pTessellationState->patchControlPoints,
1473 &stages[stage].key.tcs);
1474 break;
1475 case MESA_SHADER_TESS_EVAL:
1476 populate_tes_prog_key(devinfo, subgroup_size_type,
1477 pipeline->base.device->robust_buffer_access,
1478 &stages[stage].key.tes);
1479 break;
1480 case MESA_SHADER_GEOMETRY:
1481 populate_gs_prog_key(devinfo, subgroup_size_type,
1482 pipeline->base.device->robust_buffer_access,
1483 &stages[stage].key.gs);
1484 break;
1485 case MESA_SHADER_FRAGMENT: {
1486 const bool raster_enabled =
1487 !info->pRasterizationState->rasterizerDiscardEnable ||
1488 dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
1489 populate_wm_prog_key(pipeline, subgroup_size_type,
1490 pipeline->base.device->robust_buffer_access,
1491 pipeline->subpass,
1492 raster_enabled ? info->pMultisampleState : NULL,
1493 vk_find_struct_const(info->pNext,
1494 PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR),
1495 &stages[stage].key.wm);
1496 break;
1497 }
1498 default:
1499 unreachable("Invalid graphics shader stage");
1500 }
1501
1502 stages[stage].feedback.duration += os_time_get_nano() - stage_start;
1503 stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
1504 }
1505
1506 assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1507
1508 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1509
1510 unsigned char sha1[20];
1511 anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
1512
1513 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1514 if (!stages[s].entrypoint)
1515 continue;
1516
1517 stages[s].cache_key.stage = s;
1518 memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
1519 }
1520
1521 const bool skip_cache_lookup =
1522 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1523
1524 if (!skip_cache_lookup) {
1525 unsigned found = 0;
1526 unsigned cache_hits = 0;
1527 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1528 if (!stages[s].entrypoint)
1529 continue;
1530
1531 int64_t stage_start = os_time_get_nano();
1532
1533 bool cache_hit;
1534 struct anv_shader_bin *bin =
1535 anv_device_search_for_kernel(pipeline->base.device, cache,
1536 &stages[s].cache_key,
1537 sizeof(stages[s].cache_key), &cache_hit);
1538 if (bin) {
1539 found++;
1540 pipeline->shaders[s] = bin;
1541 }
1542
1543 if (cache_hit) {
1544 cache_hits++;
1545 stages[s].feedback.flags |=
1546 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1547 }
1548 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1549 }
1550
1551 if (found == __builtin_popcount(pipeline->active_stages)) {
1552 if (cache_hits == found) {
1553 pipeline_feedback.flags |=
1554 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1555 }
1556 /* We found all our shaders in the cache. We're done. */
1557 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1558 if (!stages[s].entrypoint)
1559 continue;
1560
1561 anv_pipeline_add_executables(&pipeline->base, &stages[s],
1562 pipeline->shaders[s]);
1563 }
1564 anv_pipeline_init_from_cached_graphics(pipeline);
1565 goto done;
1566 } else if (found > 0) {
1567 /* We found some but not all of our shaders. This shouldn't happen
1568 * most of the time but it can if we have a partially populated
1569 * pipeline cache.
1570 */
1571 assert(found < __builtin_popcount(pipeline->active_stages));
1572
1573 vk_perf(VK_LOG_OBJS(&cache->base),
1574 "Found a partial pipeline in the cache. This is "
1575 "most likely caused by an incomplete pipeline cache "
1576 "import or export");
1577
1578 /* We're going to have to recompile anyway, so just throw away our
1579 * references to the shaders in the cache. We'll get them out of the
1580 * cache again as part of the compilation process.
1581 */
1582 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1583 stages[s].feedback.flags = 0;
1584 if (pipeline->shaders[s]) {
1585 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1586 pipeline->shaders[s] = NULL;
1587 }
1588 }
1589 }
1590 }
1591
1592 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
1593 return VK_PIPELINE_COMPILE_REQUIRED_EXT;
1594
1595 void *pipeline_ctx = ralloc_context(NULL);
1596
1597 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1598 if (!stages[s].entrypoint)
1599 continue;
1600
1601 int64_t stage_start = os_time_get_nano();
1602
1603 assert(stages[s].stage == s);
1604 assert(pipeline->shaders[s] == NULL);
1605
1606 stages[s].bind_map = (struct anv_pipeline_bind_map) {
1607 .surface_to_descriptor = stages[s].surface_to_descriptor,
1608 .sampler_to_descriptor = stages[s].sampler_to_descriptor
1609 };
1610
1611 stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
1612 pipeline_ctx,
1613 &stages[s]);
1614 if (stages[s].nir == NULL) {
1615 result = vk_error(pipeline, VK_ERROR_UNKNOWN);
1616 goto fail;
1617 }
1618
1619 /* This is rather ugly.
1620 *
1621 * Any variable annotated as interpolated by sample essentially disables
1622 * coarse pixel shading. Unfortunately the CTS tests exercising this set
1623 * the varying value in the previous stage using a constant. Our NIR
1624 * infrastructure is clever enough to lookup variables across stages and
1625 * constant fold, removing the variable. So in order to comply with CTS
1626 * we have check variables here.
1627 */
1628 if (s == MESA_SHADER_FRAGMENT) {
1629 nir_foreach_variable_in_list(var, &stages[s].nir->variables) {
1630 if (var->data.sample) {
1631 stages[s].key.wm.coarse_pixel = false;
1632 break;
1633 }
1634 }
1635 }
1636
1637 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1638 }
1639
1640 /* Walk backwards to link */
1641 struct anv_pipeline_stage *next_stage = NULL;
1642 for (int s = ARRAY_SIZE(pipeline->shaders) - 1; s >= 0; s--) {
1643 if (!stages[s].entrypoint)
1644 continue;
1645
1646 switch (s) {
1647 case MESA_SHADER_VERTEX:
1648 anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1649 break;
1650 case MESA_SHADER_TESS_CTRL:
1651 anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1652 break;
1653 case MESA_SHADER_TESS_EVAL:
1654 anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1655 break;
1656 case MESA_SHADER_GEOMETRY:
1657 anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1658 break;
1659 case MESA_SHADER_FRAGMENT:
1660 anv_pipeline_link_fs(compiler, &stages[s]);
1661 break;
1662 default:
1663 unreachable("Invalid graphics shader stage");
1664 }
1665
1666 next_stage = &stages[s];
1667 }
1668
1669 if (pipeline->base.device->info.ver >= 12 &&
1670 pipeline->subpass->view_mask != 0) {
1671 /* For some pipelines HW Primitive Replication can be used instead of
1672 * instancing to implement Multiview. This depend on how viewIndex is
1673 * used in all the active shaders, so this check can't be done per
1674 * individual shaders.
1675 */
1676 nir_shader *shaders[MESA_SHADER_STAGES] = {};
1677 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++)
1678 shaders[s] = stages[s].nir;
1679
1680 pipeline->use_primitive_replication =
1681 anv_check_for_primitive_replication(shaders, pipeline);
1682 } else {
1683 pipeline->use_primitive_replication = false;
1684 }
1685
1686 struct anv_pipeline_stage *prev_stage = NULL;
1687 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1688 if (!stages[s].entrypoint)
1689 continue;
1690
1691 int64_t stage_start = os_time_get_nano();
1692
1693 void *stage_ctx = ralloc_context(NULL);
1694
1695 anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
1696
1697 if (prev_stage && compiler->glsl_compiler_options[s].NirOptions->unify_interfaces) {
1698 prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read &
1699 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1700 stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written &
1701 ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1702 prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read;
1703 stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written;
1704 }
1705
1706 ralloc_free(stage_ctx);
1707
1708 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1709
1710 prev_stage = &stages[s];
1711 }
1712
1713 prev_stage = NULL;
1714 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1715 if (!stages[s].entrypoint)
1716 continue;
1717
1718 int64_t stage_start = os_time_get_nano();
1719
1720 void *stage_ctx = ralloc_context(NULL);
1721
1722 nir_xfb_info *xfb_info = NULL;
1723 if (s == MESA_SHADER_VERTEX ||
1724 s == MESA_SHADER_TESS_EVAL ||
1725 s == MESA_SHADER_GEOMETRY)
1726 xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx);
1727
1728 switch (s) {
1729 case MESA_SHADER_VERTEX:
1730 anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
1731 &stages[s]);
1732 break;
1733 case MESA_SHADER_TESS_CTRL:
1734 anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,
1735 &stages[s], prev_stage);
1736 break;
1737 case MESA_SHADER_TESS_EVAL:
1738 anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,
1739 &stages[s], prev_stage);
1740 break;
1741 case MESA_SHADER_GEOMETRY:
1742 anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
1743 &stages[s], prev_stage);
1744 break;
1745 case MESA_SHADER_FRAGMENT:
1746 anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
1747 &stages[s], prev_stage);
1748 break;
1749 default:
1750 unreachable("Invalid graphics shader stage");
1751 }
1752 if (stages[s].code == NULL) {
1753 ralloc_free(stage_ctx);
1754 result = vk_error(pipeline->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
1755 goto fail;
1756 }
1757
1758 anv_nir_validate_push_layout(&stages[s].prog_data.base,
1759 &stages[s].bind_map);
1760
1761 struct anv_shader_bin *bin =
1762 anv_device_upload_kernel(pipeline->base.device, cache, s,
1763 &stages[s].cache_key,
1764 sizeof(stages[s].cache_key),
1765 stages[s].code,
1766 stages[s].prog_data.base.program_size,
1767 &stages[s].prog_data.base,
1768 brw_prog_data_size(s),
1769 stages[s].stats, stages[s].num_stats,
1770 xfb_info, &stages[s].bind_map);
1771 if (!bin) {
1772 ralloc_free(stage_ctx);
1773 result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1774 goto fail;
1775 }
1776
1777 anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);
1778
1779 pipeline->shaders[s] = bin;
1780 ralloc_free(stage_ctx);
1781
1782 stages[s].feedback.duration += os_time_get_nano() - stage_start;
1783
1784 prev_stage = &stages[s];
1785 }
1786
1787 ralloc_free(pipeline_ctx);
1788
1789 done:
1790
1791 if (pipeline->shaders[MESA_SHADER_FRAGMENT] &&
1792 pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) {
1793 /* This can happen if we decided to implicitly disable the fragment
1794 * shader. See anv_pipeline_compile_fs().
1795 */
1796 anv_shader_bin_unref(pipeline->base.device,
1797 pipeline->shaders[MESA_SHADER_FRAGMENT]);
1798 pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL;
1799 pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
1800 }
1801
1802 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1803
1804 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1805 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1806 if (create_feedback) {
1807 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1808
1809 assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
1810 for (uint32_t i = 0; i < info->stageCount; i++) {
1811 gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
1812 create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
1813 }
1814 }
1815
1816 return VK_SUCCESS;
1817
1818 fail:
1819 ralloc_free(pipeline_ctx);
1820
1821 for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1822 if (pipeline->shaders[s])
1823 anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1824 }
1825
1826 return result;
1827 }
1828
1829 VkResult
anv_pipeline_compile_cs(struct anv_compute_pipeline * pipeline,struct anv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const struct vk_shader_module * module,const char * entrypoint,const VkSpecializationInfo * spec_info)1830 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
1831 struct anv_pipeline_cache *cache,
1832 const VkComputePipelineCreateInfo *info,
1833 const struct vk_shader_module *module,
1834 const char *entrypoint,
1835 const VkSpecializationInfo *spec_info)
1836 {
1837 VkPipelineCreationFeedbackEXT pipeline_feedback = {
1838 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1839 };
1840 int64_t pipeline_start = os_time_get_nano();
1841
1842 const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1843
1844 struct anv_pipeline_stage stage = {
1845 .stage = MESA_SHADER_COMPUTE,
1846 .module = module,
1847 .entrypoint = entrypoint,
1848 .spec_info = spec_info,
1849 .cache_key = {
1850 .stage = MESA_SHADER_COMPUTE,
1851 },
1852 .feedback = {
1853 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1854 },
1855 };
1856 anv_pipeline_hash_shader(stage.module,
1857 stage.entrypoint,
1858 MESA_SHADER_COMPUTE,
1859 stage.spec_info,
1860 stage.shader_sha1);
1861
1862 struct anv_shader_bin *bin = NULL;
1863
1864 const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info =
1865 vk_find_struct_const(info->stage.pNext,
1866 PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
1867
1868 const enum brw_subgroup_size_type subgroup_size_type =
1869 anv_subgroup_size_type(MESA_SHADER_COMPUTE, info->stage.flags, rss_info);
1870
1871 populate_cs_prog_key(&pipeline->base.device->info, subgroup_size_type,
1872 pipeline->base.device->robust_buffer_access,
1873 &stage.key.cs);
1874
1875 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1876
1877 const bool skip_cache_lookup =
1878 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1879
1880 anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1881
1882 bool cache_hit = false;
1883 if (!skip_cache_lookup) {
1884 bin = anv_device_search_for_kernel(pipeline->base.device, cache,
1885 &stage.cache_key,
1886 sizeof(stage.cache_key),
1887 &cache_hit);
1888 }
1889
1890 if (bin == NULL &&
1891 (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT))
1892 return VK_PIPELINE_COMPILE_REQUIRED_EXT;
1893
1894 void *mem_ctx = ralloc_context(NULL);
1895 if (bin == NULL) {
1896 int64_t stage_start = os_time_get_nano();
1897
1898 stage.bind_map = (struct anv_pipeline_bind_map) {
1899 .surface_to_descriptor = stage.surface_to_descriptor,
1900 .sampler_to_descriptor = stage.sampler_to_descriptor
1901 };
1902
1903 /* Set up a binding for the gl_NumWorkGroups */
1904 stage.bind_map.surface_count = 1;
1905 stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
1906 .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
1907 };
1908
1909 stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);
1910 if (stage.nir == NULL) {
1911 ralloc_free(mem_ctx);
1912 return vk_error(pipeline, VK_ERROR_UNKNOWN);
1913 }
1914
1915 NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id);
1916
1917 anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
1918
1919 NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);
1920
1921 stage.num_stats = 1;
1922
1923 struct brw_compile_cs_params params = {
1924 .nir = stage.nir,
1925 .key = &stage.key.cs,
1926 .prog_data = &stage.prog_data.cs,
1927 .stats = stage.stats,
1928 .log_data = pipeline->base.device,
1929 };
1930
1931 stage.code = brw_compile_cs(compiler, mem_ctx, ¶ms);
1932 if (stage.code == NULL) {
1933 ralloc_free(mem_ctx);
1934 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1935 }
1936
1937 anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
1938
1939 if (!stage.prog_data.cs.uses_num_work_groups) {
1940 assert(stage.bind_map.surface_to_descriptor[0].set ==
1941 ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);
1942 stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;
1943 }
1944
1945 const unsigned code_size = stage.prog_data.base.program_size;
1946 bin = anv_device_upload_kernel(pipeline->base.device, cache,
1947 MESA_SHADER_COMPUTE,
1948 &stage.cache_key, sizeof(stage.cache_key),
1949 stage.code, code_size,
1950 &stage.prog_data.base,
1951 sizeof(stage.prog_data.cs),
1952 stage.stats, stage.num_stats,
1953 NULL, &stage.bind_map);
1954 if (!bin) {
1955 ralloc_free(mem_ctx);
1956 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1957 }
1958
1959 stage.feedback.duration = os_time_get_nano() - stage_start;
1960 }
1961
1962 anv_pipeline_add_executables(&pipeline->base, &stage, bin);
1963
1964 ralloc_free(mem_ctx);
1965
1966 if (cache_hit) {
1967 stage.feedback.flags |=
1968 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1969 pipeline_feedback.flags |=
1970 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1971 }
1972 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1973
1974 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1975 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1976 if (create_feedback) {
1977 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1978
1979 assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
1980 create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
1981 }
1982
1983 pipeline->cs = bin;
1984
1985 return VK_SUCCESS;
1986 }
1987
1988 /**
1989 * Copy pipeline state not marked as dynamic.
1990 * Dynamic state is pipeline state which hasn't been provided at pipeline
1991 * creation time, but is dynamically provided afterwards using various
1992 * vkCmdSet* functions.
1993 *
1994 * The set of state considered "non_dynamic" is determined by the pieces of
1995 * state that have their corresponding VkDynamicState enums omitted from
1996 * VkPipelineDynamicStateCreateInfo::pDynamicStates.
1997 *
1998 * @param[out] pipeline Destination non_dynamic state.
1999 * @param[in] pCreateInfo Source of non_dynamic state to be copied.
2000 */
2001 static void
copy_non_dynamic_state(struct anv_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * pCreateInfo)2002 copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
2003 const VkGraphicsPipelineCreateInfo *pCreateInfo)
2004 {
2005 anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
2006 struct anv_subpass *subpass = pipeline->subpass;
2007
2008 pipeline->dynamic_state = default_dynamic_state;
2009
2010 states &= ~pipeline->dynamic_states;
2011
2012 struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
2013
2014 bool raster_discard =
2015 pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
2016 !(pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
2017
2018 /* Section 9.2 of the Vulkan 1.0.15 spec says:
2019 *
2020 * pViewportState is [...] NULL if the pipeline
2021 * has rasterization disabled.
2022 */
2023 if (!raster_discard) {
2024 assert(pCreateInfo->pViewportState);
2025
2026 dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
2027 if (states & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
2028 typed_memcpy(dynamic->viewport.viewports,
2029 pCreateInfo->pViewportState->pViewports,
2030 pCreateInfo->pViewportState->viewportCount);
2031 }
2032
2033 dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
2034 if (states & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) {
2035 typed_memcpy(dynamic->scissor.scissors,
2036 pCreateInfo->pViewportState->pScissors,
2037 pCreateInfo->pViewportState->scissorCount);
2038 }
2039 }
2040
2041 if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) {
2042 assert(pCreateInfo->pRasterizationState);
2043 dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
2044 }
2045
2046 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) {
2047 assert(pCreateInfo->pRasterizationState);
2048 dynamic->depth_bias.bias =
2049 pCreateInfo->pRasterizationState->depthBiasConstantFactor;
2050 dynamic->depth_bias.clamp =
2051 pCreateInfo->pRasterizationState->depthBiasClamp;
2052 dynamic->depth_bias.slope =
2053 pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
2054 }
2055
2056 if (states & ANV_CMD_DIRTY_DYNAMIC_CULL_MODE) {
2057 assert(pCreateInfo->pRasterizationState);
2058 dynamic->cull_mode =
2059 pCreateInfo->pRasterizationState->cullMode;
2060 }
2061
2062 if (states & ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE) {
2063 assert(pCreateInfo->pRasterizationState);
2064 dynamic->front_face =
2065 pCreateInfo->pRasterizationState->frontFace;
2066 }
2067
2068 if ((states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) &&
2069 (pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
2070 assert(pCreateInfo->pInputAssemblyState);
2071 dynamic->primitive_topology = pCreateInfo->pInputAssemblyState->topology;
2072 }
2073
2074 if (states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
2075 assert(pCreateInfo->pRasterizationState);
2076 dynamic->raster_discard =
2077 pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2078 }
2079
2080 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE) {
2081 assert(pCreateInfo->pRasterizationState);
2082 dynamic->depth_bias_enable =
2083 pCreateInfo->pRasterizationState->depthBiasEnable;
2084 }
2085
2086 if ((states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE) &&
2087 (pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
2088 assert(pCreateInfo->pInputAssemblyState);
2089 dynamic->primitive_restart_enable =
2090 pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
2091 }
2092
2093 /* Section 9.2 of the Vulkan 1.0.15 spec says:
2094 *
2095 * pColorBlendState is [...] NULL if the pipeline has rasterization
2096 * disabled or if the subpass of the render pass the pipeline is
2097 * created against does not use any color attachments.
2098 */
2099 bool uses_color_att = false;
2100 for (unsigned i = 0; i < subpass->color_count; ++i) {
2101 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
2102 uses_color_att = true;
2103 break;
2104 }
2105 }
2106
2107 if (uses_color_att && !raster_discard) {
2108 assert(pCreateInfo->pColorBlendState);
2109
2110 if (states & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
2111 typed_memcpy(dynamic->blend_constants,
2112 pCreateInfo->pColorBlendState->blendConstants, 4);
2113 }
2114
2115 /* If there is no depthstencil attachment, then don't read
2116 * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
2117 * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
2118 * no need to override the depthstencil defaults in
2119 * anv_pipeline::dynamic_state when there is no depthstencil attachment.
2120 *
2121 * Section 9.2 of the Vulkan 1.0.15 spec says:
2122 *
2123 * pDepthStencilState is [...] NULL if the pipeline has rasterization
2124 * disabled or if the subpass of the render pass the pipeline is created
2125 * against does not use a depth/stencil attachment.
2126 */
2127 if (!raster_discard && subpass->depth_stencil_attachment) {
2128 assert(pCreateInfo->pDepthStencilState);
2129
2130 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) {
2131 dynamic->depth_bounds.min =
2132 pCreateInfo->pDepthStencilState->minDepthBounds;
2133 dynamic->depth_bounds.max =
2134 pCreateInfo->pDepthStencilState->maxDepthBounds;
2135 }
2136
2137 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) {
2138 dynamic->stencil_compare_mask.front =
2139 pCreateInfo->pDepthStencilState->front.compareMask;
2140 dynamic->stencil_compare_mask.back =
2141 pCreateInfo->pDepthStencilState->back.compareMask;
2142 }
2143
2144 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) {
2145 dynamic->stencil_write_mask.front =
2146 pCreateInfo->pDepthStencilState->front.writeMask;
2147 dynamic->stencil_write_mask.back =
2148 pCreateInfo->pDepthStencilState->back.writeMask;
2149 }
2150
2151 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) {
2152 dynamic->stencil_reference.front =
2153 pCreateInfo->pDepthStencilState->front.reference;
2154 dynamic->stencil_reference.back =
2155 pCreateInfo->pDepthStencilState->back.reference;
2156 }
2157
2158 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) {
2159 dynamic->depth_test_enable =
2160 pCreateInfo->pDepthStencilState->depthTestEnable;
2161 }
2162
2163 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) {
2164 dynamic->depth_write_enable =
2165 pCreateInfo->pDepthStencilState->depthWriteEnable;
2166 }
2167
2168 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) {
2169 dynamic->depth_compare_op =
2170 pCreateInfo->pDepthStencilState->depthCompareOp;
2171 }
2172
2173 if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
2174 dynamic->depth_bounds_test_enable =
2175 pCreateInfo->pDepthStencilState->depthBoundsTestEnable;
2176 }
2177
2178 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) {
2179 dynamic->stencil_test_enable =
2180 pCreateInfo->pDepthStencilState->stencilTestEnable;
2181 }
2182
2183 if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP) {
2184 const VkPipelineDepthStencilStateCreateInfo *info =
2185 pCreateInfo->pDepthStencilState;
2186 memcpy(&dynamic->stencil_op.front, &info->front,
2187 sizeof(dynamic->stencil_op.front));
2188 memcpy(&dynamic->stencil_op.back, &info->back,
2189 sizeof(dynamic->stencil_op.back));
2190 }
2191 }
2192
2193 const VkPipelineRasterizationLineStateCreateInfoEXT *line_state =
2194 vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2195 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
2196 if (!raster_discard && line_state && line_state->stippledLineEnable) {
2197 if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) {
2198 dynamic->line_stipple.factor = line_state->lineStippleFactor;
2199 dynamic->line_stipple.pattern = line_state->lineStipplePattern;
2200 }
2201 }
2202
2203 const VkPipelineMultisampleStateCreateInfo *ms_info =
2204 pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? NULL :
2205 pCreateInfo->pMultisampleState;
2206 if (states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
2207 const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ?
2208 vk_find_struct_const(ms_info, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL;
2209
2210 if (sl_info) {
2211 dynamic->sample_locations.samples =
2212 sl_info->sampleLocationsInfo.sampleLocationsCount;
2213 const VkSampleLocationEXT *positions =
2214 sl_info->sampleLocationsInfo.pSampleLocations;
2215 for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) {
2216 dynamic->sample_locations.locations[i].x = positions[i].x;
2217 dynamic->sample_locations.locations[i].y = positions[i].y;
2218 }
2219 }
2220 }
2221 /* Ensure we always have valid values for sample_locations. */
2222 if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
2223 dynamic->sample_locations.samples == 0) {
2224 dynamic->sample_locations.samples =
2225 ms_info ? ms_info->rasterizationSamples : 1;
2226 const struct intel_sample_position *positions =
2227 intel_get_sample_positions(dynamic->sample_locations.samples);
2228 for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) {
2229 dynamic->sample_locations.locations[i].x = positions[i].x;
2230 dynamic->sample_locations.locations[i].y = positions[i].y;
2231 }
2232 }
2233
2234 if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
2235 if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
2236 uses_color_att) {
2237 assert(pCreateInfo->pColorBlendState);
2238 const VkPipelineColorWriteCreateInfoEXT *color_write_info =
2239 vk_find_struct_const(pCreateInfo->pColorBlendState->pNext,
2240 PIPELINE_COLOR_WRITE_CREATE_INFO_EXT);
2241
2242 if (color_write_info) {
2243 dynamic->color_writes = 0;
2244 for (uint32_t i = 0; i < color_write_info->attachmentCount; i++) {
2245 dynamic->color_writes |=
2246 color_write_info->pColorWriteEnables[i] ? (1u << i) : 0;
2247 }
2248 }
2249 }
2250 }
2251
2252 const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_state =
2253 vk_find_struct_const(pCreateInfo->pNext,
2254 PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR);
2255 if (fsr_state) {
2256 if (states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE)
2257 dynamic->fragment_shading_rate = fsr_state->fragmentSize;
2258 }
2259
2260 pipeline->dynamic_state_mask = states;
2261
2262 /* Mark states that can either be dynamic or fully baked into the pipeline.
2263 */
2264 pipeline->static_state_mask = states &
2265 (ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS |
2266 ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |
2267 ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE |
2268 ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE |
2269 ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP |
2270 ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY);
2271 }
2272
2273 static void
anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo * info)2274 anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
2275 {
2276 #ifdef DEBUG
2277 struct anv_render_pass *renderpass = NULL;
2278 struct anv_subpass *subpass = NULL;
2279
2280 /* Assert that all required members of VkGraphicsPipelineCreateInfo are
2281 * present. See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.
2282 */
2283 assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
2284
2285 renderpass = anv_render_pass_from_handle(info->renderPass);
2286 assert(renderpass);
2287
2288 assert(info->subpass < renderpass->subpass_count);
2289 subpass = &renderpass->subpasses[info->subpass];
2290
2291 assert(info->stageCount >= 1);
2292 assert(info->pRasterizationState);
2293 if (!info->pRasterizationState->rasterizerDiscardEnable) {
2294 assert(info->pViewportState);
2295 assert(info->pMultisampleState);
2296
2297 if (subpass && subpass->depth_stencil_attachment)
2298 assert(info->pDepthStencilState);
2299
2300 if (subpass && subpass->color_count > 0) {
2301 bool all_color_unused = true;
2302 for (int i = 0; i < subpass->color_count; i++) {
2303 if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
2304 all_color_unused = false;
2305 }
2306 /* pColorBlendState is ignored if the pipeline has rasterization
2307 * disabled or if the subpass of the render pass the pipeline is
2308 * created against does not use any color attachments.
2309 */
2310 assert(info->pColorBlendState || all_color_unused);
2311 }
2312 }
2313
2314 for (uint32_t i = 0; i < info->stageCount; ++i) {
2315 switch (info->pStages[i].stage) {
2316 case VK_SHADER_STAGE_VERTEX_BIT:
2317 assert(info->pVertexInputState);
2318 assert(info->pInputAssemblyState);
2319 break;
2320 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2321 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2322 assert(info->pTessellationState);
2323 break;
2324 default:
2325 break;
2326 }
2327 }
2328 #endif
2329 }
2330
2331 /**
2332 * Calculate the desired L3 partitioning based on the current state of the
2333 * pipeline. For now this simply returns the conservative defaults calculated
2334 * by get_default_l3_weights(), but we could probably do better by gathering
2335 * more statistics from the pipeline state (e.g. guess of expected URB usage
2336 * and bound surfaces), or by using feed-back from performance counters.
2337 */
2338 void
anv_pipeline_setup_l3_config(struct anv_pipeline * pipeline,bool needs_slm)2339 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
2340 {
2341 const struct intel_device_info *devinfo = &pipeline->device->info;
2342
2343 const struct intel_l3_weights w =
2344 intel_get_default_l3_weights(devinfo, true, needs_slm);
2345
2346 pipeline->l3_config = intel_get_l3_config(devinfo, w);
2347 }
2348
2349 static VkLineRasterizationModeEXT
vk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT * line_info,const VkPipelineMultisampleStateCreateInfo * ms_info)2350 vk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT *line_info,
2351 const VkPipelineMultisampleStateCreateInfo *ms_info)
2352 {
2353 VkLineRasterizationModeEXT line_mode =
2354 line_info ? line_info->lineRasterizationMode :
2355 VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT;
2356
2357 if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) {
2358 if (ms_info && ms_info->rasterizationSamples > 1) {
2359 return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
2360 } else {
2361 return VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
2362 }
2363 }
2364
2365 return line_mode;
2366 }
2367
2368 VkResult
anv_graphics_pipeline_init(struct anv_graphics_pipeline * pipeline,struct anv_device * device,struct anv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * alloc)2369 anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
2370 struct anv_device *device,
2371 struct anv_pipeline_cache *cache,
2372 const VkGraphicsPipelineCreateInfo *pCreateInfo,
2373 const VkAllocationCallbacks *alloc)
2374 {
2375 VkResult result;
2376
2377 anv_pipeline_validate_create_info(pCreateInfo);
2378
2379 result = anv_pipeline_init(&pipeline->base, device,
2380 ANV_PIPELINE_GRAPHICS, pCreateInfo->flags,
2381 alloc);
2382 if (result != VK_SUCCESS)
2383 return result;
2384
2385 anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
2386 pipeline->batch_data, sizeof(pipeline->batch_data));
2387
2388 ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass);
2389 assert(pCreateInfo->subpass < render_pass->subpass_count);
2390 pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2391
2392 assert(pCreateInfo->pRasterizationState);
2393
2394 if (pCreateInfo->pDynamicState) {
2395 /* Remove all of the states that are marked as dynamic */
2396 uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
2397 for (uint32_t s = 0; s < count; s++) {
2398 pipeline->dynamic_states |= anv_cmd_dirty_bit_for_vk_dynamic_state(
2399 pCreateInfo->pDynamicState->pDynamicStates[s]);
2400 }
2401 }
2402
2403 pipeline->active_stages = 0;
2404 for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
2405 pipeline->active_stages |= pCreateInfo->pStages[i].stage;
2406
2407 if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2408 pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2409
2410 copy_non_dynamic_state(pipeline, pCreateInfo);
2411
2412 pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState->depthClampEnable;
2413
2414 /* Previously we enabled depth clipping when !depthClampEnable.
2415 * DepthClipStateCreateInfo now makes depth clipping explicit so if the
2416 * clipping info is available, use its enable value to determine clipping,
2417 * otherwise fallback to the previous !depthClampEnable logic.
2418 */
2419 const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
2420 vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2421 PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
2422 pipeline->depth_clip_enable = clip_info ? clip_info->depthClipEnable : !pipeline->depth_clamp_enable;
2423
2424 pipeline->sample_shading_enable =
2425 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
2426 pCreateInfo->pMultisampleState &&
2427 pCreateInfo->pMultisampleState->sampleShadingEnable;
2428
2429 result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
2430 if (result != VK_SUCCESS) {
2431 anv_pipeline_finish(&pipeline->base, device, alloc);
2432 return result;
2433 }
2434
2435 anv_pipeline_setup_l3_config(&pipeline->base, false);
2436
2437 if (anv_pipeline_is_primitive(pipeline)) {
2438 const VkPipelineVertexInputStateCreateInfo *vi_info =
2439 pCreateInfo->pVertexInputState;
2440
2441 const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
2442
2443 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2444 const VkVertexInputAttributeDescription *desc =
2445 &vi_info->pVertexAttributeDescriptions[i];
2446
2447 if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location)))
2448 pipeline->vb_used |= 1 << desc->binding;
2449 }
2450
2451 for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
2452 const VkVertexInputBindingDescription *desc =
2453 &vi_info->pVertexBindingDescriptions[i];
2454
2455 pipeline->vb[desc->binding].stride = desc->stride;
2456
2457 /* Step rate is programmed per vertex element (attribute), not
2458 * binding. Set up a map of which bindings step per instance, for
2459 * reference by vertex element setup. */
2460 switch (desc->inputRate) {
2461 default:
2462 case VK_VERTEX_INPUT_RATE_VERTEX:
2463 pipeline->vb[desc->binding].instanced = false;
2464 break;
2465 case VK_VERTEX_INPUT_RATE_INSTANCE:
2466 pipeline->vb[desc->binding].instanced = true;
2467 break;
2468 }
2469
2470 pipeline->vb[desc->binding].instance_divisor = 1;
2471 }
2472
2473 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state =
2474 vk_find_struct_const(vi_info->pNext,
2475 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
2476 if (vi_div_state) {
2477 for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) {
2478 const VkVertexInputBindingDivisorDescriptionEXT *desc =
2479 &vi_div_state->pVertexBindingDivisors[i];
2480
2481 pipeline->vb[desc->binding].instance_divisor = desc->divisor;
2482 }
2483 }
2484
2485 /* Our implementation of VK_KHR_multiview uses instancing to draw the
2486 * different views. If the client asks for instancing, we need to multiply
2487 * the instance divisor by the number of views ensure that we repeat the
2488 * client's per-instance data once for each view.
2489 */
2490 if (pipeline->subpass->view_mask && !pipeline->use_primitive_replication) {
2491 const uint32_t view_count = anv_subpass_view_count(pipeline->subpass);
2492 for (uint32_t vb = 0; vb < MAX_VBS; vb++) {
2493 if (pipeline->vb[vb].instanced)
2494 pipeline->vb[vb].instance_divisor *= view_count;
2495 }
2496 }
2497
2498 const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2499 pCreateInfo->pInputAssemblyState;
2500 const VkPipelineTessellationStateCreateInfo *tess_info =
2501 pCreateInfo->pTessellationState;
2502
2503 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
2504 pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints);
2505 else
2506 pipeline->topology = vk_to_intel_primitive_type[ia_info->topology];
2507 }
2508
2509 /* If rasterization is not enabled, ms_info must be ignored. */
2510 const bool raster_enabled =
2511 !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ||
2512 (pipeline->dynamic_states &
2513 ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
2514
2515 const VkPipelineMultisampleStateCreateInfo *ms_info =
2516 raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2517
2518 const VkPipelineRasterizationLineStateCreateInfoEXT *line_info =
2519 vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2520 PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
2521
2522 /* Store line mode, polygon mode and rasterization samples, these are used
2523 * for dynamic primitive topology.
2524 */
2525 pipeline->line_mode = vk_line_rasterization_mode(line_info, ms_info);
2526 pipeline->polygon_mode = pCreateInfo->pRasterizationState->polygonMode;
2527 pipeline->rasterization_samples =
2528 ms_info ? ms_info->rasterizationSamples : 1;
2529
2530 return VK_SUCCESS;
2531 }
2532
2533 static VkResult
compile_upload_rt_shader(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_cache * cache,nir_shader * nir,struct anv_pipeline_stage * stage,struct anv_shader_bin ** shader_out,void * mem_ctx)2534 compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
2535 struct anv_pipeline_cache *cache,
2536 nir_shader *nir,
2537 struct anv_pipeline_stage *stage,
2538 struct anv_shader_bin **shader_out,
2539 void *mem_ctx)
2540 {
2541 const struct brw_compiler *compiler =
2542 pipeline->base.device->physical->compiler;
2543 const struct intel_device_info *devinfo = compiler->devinfo;
2544
2545 nir_shader **resume_shaders = NULL;
2546 uint32_t num_resume_shaders = 0;
2547 if (nir->info.stage != MESA_SHADER_COMPUTE) {
2548 NIR_PASS_V(nir, nir_lower_shader_calls,
2549 nir_address_format_64bit_global,
2550 BRW_BTD_STACK_ALIGN,
2551 &resume_shaders, &num_resume_shaders, mem_ctx);
2552 NIR_PASS_V(nir, brw_nir_lower_shader_calls);
2553 NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);
2554 }
2555
2556 for (unsigned i = 0; i < num_resume_shaders; i++) {
2557 NIR_PASS_V(resume_shaders[i], brw_nir_lower_shader_calls);
2558 NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo);
2559 }
2560
2561 stage->code =
2562 brw_compile_bs(compiler, pipeline->base.device, mem_ctx,
2563 &stage->key.bs, &stage->prog_data.bs, nir,
2564 num_resume_shaders, resume_shaders, stage->stats, NULL);
2565 if (stage->code == NULL)
2566 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2567
2568 /* Ray-tracing shaders don't have a "real" bind map */
2569 struct anv_pipeline_bind_map empty_bind_map = {};
2570
2571 const unsigned code_size = stage->prog_data.base.program_size;
2572 struct anv_shader_bin *bin =
2573 anv_device_upload_kernel(pipeline->base.device,
2574 cache,
2575 stage->stage,
2576 &stage->cache_key, sizeof(stage->cache_key),
2577 stage->code, code_size,
2578 &stage->prog_data.base,
2579 sizeof(stage->prog_data.bs),
2580 stage->stats, 1,
2581 NULL, &empty_bind_map);
2582 if (bin == NULL)
2583 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2584
2585 /* TODO: Figure out executables for resume shaders */
2586 anv_pipeline_add_executables(&pipeline->base, stage, bin);
2587 util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin);
2588
2589 *shader_out = bin;
2590
2591 return VK_SUCCESS;
2592 }
2593
2594 static bool
is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR * info)2595 is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)
2596 {
2597 if (info->pDynamicState == NULL)
2598 return false;
2599
2600 for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) {
2601 if (info->pDynamicState->pDynamicStates[i] ==
2602 VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
2603 return true;
2604 }
2605
2606 return false;
2607 }
2608
2609 static void
anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline * pipeline,const VkRayTracingPipelineCreateInfoKHR * info,uint32_t * stack_max)2610 anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline,
2611 const VkRayTracingPipelineCreateInfoKHR *info,
2612 uint32_t *stack_max)
2613 {
2614 if (is_rt_stack_size_dynamic(info)) {
2615 pipeline->stack_size = 0; /* 0 means dynamic */
2616 } else {
2617 /* From the Vulkan spec:
2618 *
2619 * "If the stack size is not set explicitly, the stack size for a
2620 * pipeline is:
2621 *
2622 * rayGenStackMax +
2623 * min(1, maxPipelineRayRecursionDepth) ×
2624 * max(closestHitStackMax, missStackMax,
2625 * intersectionStackMax + anyHitStackMax) +
2626 * max(0, maxPipelineRayRecursionDepth-1) ×
2627 * max(closestHitStackMax, missStackMax) +
2628 * 2 × callableStackMax"
2629 */
2630 pipeline->stack_size =
2631 stack_max[MESA_SHADER_RAYGEN] +
2632 MIN2(1, info->maxPipelineRayRecursionDepth) *
2633 MAX4(stack_max[MESA_SHADER_CLOSEST_HIT],
2634 stack_max[MESA_SHADER_MISS],
2635 stack_max[MESA_SHADER_INTERSECTION],
2636 stack_max[MESA_SHADER_ANY_HIT]) +
2637 MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) *
2638 MAX2(stack_max[MESA_SHADER_CLOSEST_HIT],
2639 stack_max[MESA_SHADER_MISS]) +
2640 2 * stack_max[MESA_SHADER_CALLABLE];
2641
2642 /* This is an extremely unlikely case but we need to set it to some
2643 * non-zero value so that we don't accidentally think it's dynamic.
2644 * Our minimum stack size is 2KB anyway so we could set to any small
2645 * value we like.
2646 */
2647 if (pipeline->stack_size == 0)
2648 pipeline->stack_size = 1;
2649 }
2650 }
2651
2652 static struct anv_pipeline_stage *
anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline * pipeline,const VkRayTracingPipelineCreateInfoKHR * info,void * pipeline_ctx)2653 anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
2654 const VkRayTracingPipelineCreateInfoKHR *info,
2655 void *pipeline_ctx)
2656 {
2657 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
2658
2659 /* Create enough stage entries for all shader modules plus potential
2660 * combinaisons in the groups.
2661 */
2662 struct anv_pipeline_stage *stages =
2663 rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount);
2664
2665 for (uint32_t i = 0; i < info->stageCount; i++) {
2666 const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
2667 if (sinfo->module == VK_NULL_HANDLE)
2668 continue;
2669
2670 int64_t stage_start = os_time_get_nano();
2671
2672 stages[i] = (struct anv_pipeline_stage) {
2673 .stage = vk_to_mesa_shader_stage(sinfo->stage),
2674 .module = vk_shader_module_from_handle(sinfo->module),
2675 .entrypoint = sinfo->pName,
2676 .spec_info = sinfo->pSpecializationInfo,
2677 .cache_key = {
2678 .stage = vk_to_mesa_shader_stage(sinfo->stage),
2679 },
2680 .feedback = {
2681 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
2682 },
2683 };
2684
2685 populate_bs_prog_key(&pipeline->base.device->info, sinfo->flags,
2686 pipeline->base.device->robust_buffer_access,
2687 &stages[i].key.bs);
2688
2689 anv_pipeline_hash_shader(stages[i].module,
2690 stages[i].entrypoint,
2691 stages[i].stage,
2692 stages[i].spec_info,
2693 stages[i].shader_sha1);
2694
2695 if (stages[i].stage != MESA_SHADER_INTERSECTION) {
2696 anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i],
2697 stages[i].cache_key.sha1);
2698 }
2699
2700 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2701 }
2702
2703 for (uint32_t i = 0; i < info->groupCount; i++) {
2704 const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
2705
2706 if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR)
2707 continue;
2708
2709 int64_t stage_start = os_time_get_nano();
2710
2711 uint32_t intersection_idx = ginfo->intersectionShader;
2712 assert(intersection_idx < info->stageCount);
2713
2714 uint32_t any_hit_idx = ginfo->anyHitShader;
2715 if (any_hit_idx != VK_SHADER_UNUSED_KHR) {
2716 assert(any_hit_idx < info->stageCount);
2717 anv_pipeline_hash_ray_tracing_combined_shader(pipeline,
2718 layout,
2719 &stages[intersection_idx],
2720 &stages[any_hit_idx],
2721 stages[intersection_idx].cache_key.sha1);
2722 } else {
2723 anv_pipeline_hash_ray_tracing_shader(pipeline, layout,
2724 &stages[intersection_idx],
2725 stages[intersection_idx].cache_key.sha1);
2726 }
2727
2728 stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start;
2729 }
2730
2731 return stages;
2732 }
2733
2734 static bool
anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * info,struct anv_pipeline_stage * stages,uint32_t * stack_max)2735 anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline,
2736 struct anv_pipeline_cache *cache,
2737 const VkRayTracingPipelineCreateInfoKHR *info,
2738 struct anv_pipeline_stage *stages,
2739 uint32_t *stack_max)
2740 {
2741 uint32_t shaders = 0, cache_hits = 0;
2742 for (uint32_t i = 0; i < info->stageCount; i++) {
2743 if (stages[i].entrypoint == NULL)
2744 continue;
2745
2746 shaders++;
2747
2748 int64_t stage_start = os_time_get_nano();
2749
2750 bool cache_hit;
2751 stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache,
2752 &stages[i].cache_key,
2753 sizeof(stages[i].cache_key),
2754 &cache_hit);
2755 if (cache_hit) {
2756 cache_hits++;
2757 stages[i].feedback.flags |=
2758 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
2759 }
2760
2761 if (stages[i].bin != NULL) {
2762 anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin);
2763 util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin);
2764
2765 uint32_t stack_size =
2766 brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
2767 stack_max[stages[i].stage] =
2768 MAX2(stack_max[stages[i].stage], stack_size);
2769 }
2770
2771 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2772 }
2773
2774 return cache_hits == shaders;
2775 }
2776
2777 static VkResult
anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * info)2778 anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
2779 struct anv_pipeline_cache *cache,
2780 const VkRayTracingPipelineCreateInfoKHR *info)
2781 {
2782 const struct intel_device_info *devinfo = &pipeline->base.device->info;
2783 VkResult result;
2784
2785 VkPipelineCreationFeedbackEXT pipeline_feedback = {
2786 .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
2787 };
2788 int64_t pipeline_start = os_time_get_nano();
2789
2790 void *pipeline_ctx = ralloc_context(NULL);
2791
2792 struct anv_pipeline_stage *stages =
2793 anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx);
2794
2795 ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
2796
2797 const bool skip_cache_lookup =
2798 (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
2799
2800 uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {};
2801
2802 if (!skip_cache_lookup &&
2803 anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) {
2804 pipeline_feedback.flags |=
2805 VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
2806 goto done;
2807 }
2808
2809 if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) {
2810 ralloc_free(pipeline_ctx);
2811 return VK_PIPELINE_COMPILE_REQUIRED_EXT;
2812 }
2813
2814 for (uint32_t i = 0; i < info->stageCount; i++) {
2815 if (stages[i].entrypoint == NULL)
2816 continue;
2817
2818 int64_t stage_start = os_time_get_nano();
2819
2820 stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
2821 pipeline_ctx, &stages[i]);
2822 if (stages[i].nir == NULL) {
2823 ralloc_free(pipeline_ctx);
2824 return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2825 }
2826
2827 anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i], layout);
2828
2829 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2830 }
2831
2832 for (uint32_t i = 0; i < info->stageCount; i++) {
2833 if (stages[i].entrypoint == NULL)
2834 continue;
2835
2836 /* Shader found in cache already. */
2837 if (stages[i].bin != NULL)
2838 continue;
2839
2840 /* We handle intersection shaders as part of the group */
2841 if (stages[i].stage == MESA_SHADER_INTERSECTION)
2842 continue;
2843
2844 int64_t stage_start = os_time_get_nano();
2845
2846 void *stage_ctx = ralloc_context(pipeline_ctx);
2847
2848 nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir);
2849 switch (stages[i].stage) {
2850 case MESA_SHADER_RAYGEN:
2851 brw_nir_lower_raygen(nir);
2852 break;
2853
2854 case MESA_SHADER_ANY_HIT:
2855 brw_nir_lower_any_hit(nir, devinfo);
2856 break;
2857
2858 case MESA_SHADER_CLOSEST_HIT:
2859 brw_nir_lower_closest_hit(nir);
2860 break;
2861
2862 case MESA_SHADER_MISS:
2863 brw_nir_lower_miss(nir);
2864 break;
2865
2866 case MESA_SHADER_INTERSECTION:
2867 unreachable("These are handled later");
2868
2869 case MESA_SHADER_CALLABLE:
2870 brw_nir_lower_callable(nir);
2871 break;
2872
2873 default:
2874 unreachable("Invalid ray-tracing shader stage");
2875 }
2876
2877 result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i],
2878 &stages[i].bin, stage_ctx);
2879 if (result != VK_SUCCESS) {
2880 ralloc_free(pipeline_ctx);
2881 return result;
2882 }
2883
2884 uint32_t stack_size =
2885 brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
2886 stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size);
2887
2888 ralloc_free(stage_ctx);
2889
2890 stages[i].feedback.duration += os_time_get_nano() - stage_start;
2891 }
2892
2893 for (uint32_t i = 0; i < info->groupCount; i++) {
2894 const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
2895 struct anv_rt_shader_group *group = &pipeline->groups[i];
2896 group->type = ginfo->type;
2897 switch (ginfo->type) {
2898 case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
2899 assert(ginfo->generalShader < info->stageCount);
2900 group->general = stages[ginfo->generalShader].bin;
2901 break;
2902
2903 case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
2904 if (ginfo->anyHitShader < info->stageCount)
2905 group->any_hit = stages[ginfo->anyHitShader].bin;
2906
2907 if (ginfo->closestHitShader < info->stageCount)
2908 group->closest_hit = stages[ginfo->closestHitShader].bin;
2909 break;
2910
2911 case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {
2912 if (ginfo->closestHitShader < info->stageCount)
2913 group->closest_hit = stages[ginfo->closestHitShader].bin;
2914
2915 uint32_t intersection_idx = info->pGroups[i].intersectionShader;
2916 assert(intersection_idx < info->stageCount);
2917
2918 /* Only compile this stage if not already found in the cache. */
2919 if (stages[intersection_idx].bin == NULL) {
2920 /* The any-hit and intersection shader have to be combined */
2921 uint32_t any_hit_idx = info->pGroups[i].anyHitShader;
2922 const nir_shader *any_hit = NULL;
2923 if (any_hit_idx < info->stageCount)
2924 any_hit = stages[any_hit_idx].nir;
2925
2926 void *group_ctx = ralloc_context(pipeline_ctx);
2927 nir_shader *intersection =
2928 nir_shader_clone(group_ctx, stages[intersection_idx].nir);
2929
2930 brw_nir_lower_combined_intersection_any_hit(intersection, any_hit,
2931 devinfo);
2932
2933 result = compile_upload_rt_shader(pipeline, cache,
2934 intersection,
2935 &stages[intersection_idx],
2936 &group->intersection,
2937 group_ctx);
2938 ralloc_free(group_ctx);
2939 if (result != VK_SUCCESS)
2940 return result;
2941 } else {
2942 group->intersection = stages[intersection_idx].bin;
2943 }
2944
2945 uint32_t stack_size =
2946 brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size;
2947 stack_max[MESA_SHADER_INTERSECTION] =
2948 MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size);
2949
2950 break;
2951 }
2952
2953 default:
2954 unreachable("Invalid ray tracing shader group type");
2955 }
2956 }
2957
2958 done:
2959 ralloc_free(pipeline_ctx);
2960
2961 anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max);
2962
2963 pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2964
2965 const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
2966 vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
2967 if (create_feedback) {
2968 *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
2969
2970 assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
2971 for (uint32_t i = 0; i < info->stageCount; i++) {
2972 gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
2973 create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
2974 }
2975 }
2976
2977 return VK_SUCCESS;
2978 }
2979
2980 VkResult
anv_device_init_rt_shaders(struct anv_device * device)2981 anv_device_init_rt_shaders(struct anv_device *device)
2982 {
2983 if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
2984 return VK_SUCCESS;
2985
2986 bool cache_hit;
2987
2988 struct brw_rt_trampoline {
2989 char name[16];
2990 struct brw_cs_prog_key key;
2991 } trampoline_key = {
2992 .name = "rt-trampoline",
2993 .key = {
2994 /* TODO: Other subgroup sizes? */
2995 .base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_8,
2996 },
2997 };
2998 device->rt_trampoline =
2999 anv_device_search_for_kernel(device, &device->default_pipeline_cache,
3000 &trampoline_key, sizeof(trampoline_key),
3001 &cache_hit);
3002 if (device->rt_trampoline == NULL) {
3003
3004 void *tmp_ctx = ralloc_context(NULL);
3005 nir_shader *trampoline_nir =
3006 brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
3007
3008 struct anv_pipeline_bind_map bind_map = {
3009 .surface_count = 0,
3010 .sampler_count = 0,
3011 };
3012 uint32_t dummy_params[4] = { 0, };
3013 struct brw_cs_prog_data trampoline_prog_data = {
3014 .base.nr_params = 4,
3015 .base.param = dummy_params,
3016 .uses_inline_data = true,
3017 .uses_btd_stack_ids = true,
3018 };
3019 struct brw_compile_cs_params params = {
3020 .nir = trampoline_nir,
3021 .key = &trampoline_key.key,
3022 .prog_data = &trampoline_prog_data,
3023 .log_data = device,
3024 };
3025 const unsigned *tramp_data =
3026 brw_compile_cs(device->physical->compiler, tmp_ctx, ¶ms);
3027
3028 device->rt_trampoline =
3029 anv_device_upload_kernel(device, &device->default_pipeline_cache,
3030 MESA_SHADER_COMPUTE,
3031 &trampoline_key, sizeof(trampoline_key),
3032 tramp_data,
3033 trampoline_prog_data.base.program_size,
3034 &trampoline_prog_data.base,
3035 sizeof(trampoline_prog_data),
3036 NULL, 0, NULL, &bind_map);
3037
3038 ralloc_free(tmp_ctx);
3039
3040 if (device->rt_trampoline == NULL)
3041 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3042 }
3043
3044 struct brw_rt_trivial_return {
3045 char name[16];
3046 struct brw_bs_prog_key key;
3047 } return_key = {
3048 .name = "rt-trivial-ret",
3049 };
3050 device->rt_trivial_return =
3051 anv_device_search_for_kernel(device, &device->default_pipeline_cache,
3052 &return_key, sizeof(return_key),
3053 &cache_hit);
3054 if (device->rt_trivial_return == NULL) {
3055 void *tmp_ctx = ralloc_context(NULL);
3056 nir_shader *trivial_return_nir =
3057 brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx);
3058
3059 NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, &device->info);
3060
3061 struct anv_pipeline_bind_map bind_map = {
3062 .surface_count = 0,
3063 .sampler_count = 0,
3064 };
3065 struct brw_bs_prog_data return_prog_data = { 0, };
3066 const unsigned *return_data =
3067 brw_compile_bs(device->physical->compiler, device, tmp_ctx,
3068 &return_key.key, &return_prog_data, trivial_return_nir,
3069 0, 0, NULL, NULL);
3070
3071 device->rt_trivial_return =
3072 anv_device_upload_kernel(device, &device->default_pipeline_cache,
3073 MESA_SHADER_CALLABLE,
3074 &return_key, sizeof(return_key),
3075 return_data, return_prog_data.base.program_size,
3076 &return_prog_data.base, sizeof(return_prog_data),
3077 NULL, 0, NULL, &bind_map);
3078
3079 ralloc_free(tmp_ctx);
3080
3081 if (device->rt_trivial_return == NULL) {
3082 anv_shader_bin_unref(device, device->rt_trampoline);
3083 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3084 }
3085 }
3086
3087 return VK_SUCCESS;
3088 }
3089
3090 void
anv_device_finish_rt_shaders(struct anv_device * device)3091 anv_device_finish_rt_shaders(struct anv_device *device)
3092 {
3093 if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
3094 return;
3095
3096 anv_shader_bin_unref(device, device->rt_trampoline);
3097 }
3098
3099 VkResult
anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline * pipeline,struct anv_device * device,struct anv_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * alloc)3100 anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
3101 struct anv_device *device,
3102 struct anv_pipeline_cache *cache,
3103 const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
3104 const VkAllocationCallbacks *alloc)
3105 {
3106 VkResult result;
3107
3108 util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx);
3109
3110 result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo);
3111 if (result != VK_SUCCESS)
3112 goto fail;
3113
3114 anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false);
3115
3116 return VK_SUCCESS;
3117
3118 fail:
3119 util_dynarray_foreach(&pipeline->shaders,
3120 struct anv_shader_bin *, shader) {
3121 anv_shader_bin_unref(device, *shader);
3122 }
3123 return result;
3124 }
3125
3126 #define WRITE_STR(field, ...) ({ \
3127 memset(field, 0, sizeof(field)); \
3128 UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
3129 assert(i > 0 && i < sizeof(field)); \
3130 })
3131
anv_GetPipelineExecutablePropertiesKHR(VkDevice device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)3132 VkResult anv_GetPipelineExecutablePropertiesKHR(
3133 VkDevice device,
3134 const VkPipelineInfoKHR* pPipelineInfo,
3135 uint32_t* pExecutableCount,
3136 VkPipelineExecutablePropertiesKHR* pProperties)
3137 {
3138 ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
3139 VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount);
3140
3141 util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
3142 vk_outarray_append(&out, props) {
3143 gl_shader_stage stage = exe->stage;
3144 props->stages = mesa_to_vk_shader_stage(stage);
3145
3146 unsigned simd_width = exe->stats.dispatch_width;
3147 if (stage == MESA_SHADER_FRAGMENT) {
3148 WRITE_STR(props->name, "%s%d %s",
3149 simd_width ? "SIMD" : "vec",
3150 simd_width ? simd_width : 4,
3151 _mesa_shader_stage_to_string(stage));
3152 } else {
3153 WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
3154 }
3155 WRITE_STR(props->description, "%s%d %s shader",
3156 simd_width ? "SIMD" : "vec",
3157 simd_width ? simd_width : 4,
3158 _mesa_shader_stage_to_string(stage));
3159
3160 /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
3161 * wants a subgroup size of 1.
3162 */
3163 props->subgroupSize = MAX2(simd_width, 1);
3164 }
3165 }
3166
3167 return vk_outarray_status(&out);
3168 }
3169
3170 static const struct anv_pipeline_executable *
anv_pipeline_get_executable(struct anv_pipeline * pipeline,uint32_t index)3171 anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
3172 {
3173 assert(index < util_dynarray_num_elements(&pipeline->executables,
3174 struct anv_pipeline_executable));
3175 return util_dynarray_element(
3176 &pipeline->executables, struct anv_pipeline_executable, index);
3177 }
3178
anv_GetPipelineExecutableStatisticsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)3179 VkResult anv_GetPipelineExecutableStatisticsKHR(
3180 VkDevice device,
3181 const VkPipelineExecutableInfoKHR* pExecutableInfo,
3182 uint32_t* pStatisticCount,
3183 VkPipelineExecutableStatisticKHR* pStatistics)
3184 {
3185 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
3186 VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount);
3187
3188 const struct anv_pipeline_executable *exe =
3189 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3190
3191 const struct brw_stage_prog_data *prog_data;
3192 switch (pipeline->type) {
3193 case ANV_PIPELINE_GRAPHICS: {
3194 prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;
3195 break;
3196 }
3197 case ANV_PIPELINE_COMPUTE: {
3198 prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
3199 break;
3200 }
3201 default:
3202 unreachable("invalid pipeline type");
3203 }
3204
3205 vk_outarray_append(&out, stat) {
3206 WRITE_STR(stat->name, "Instruction Count");
3207 WRITE_STR(stat->description,
3208 "Number of GEN instructions in the final generated "
3209 "shader executable.");
3210 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3211 stat->value.u64 = exe->stats.instructions;
3212 }
3213
3214 vk_outarray_append(&out, stat) {
3215 WRITE_STR(stat->name, "SEND Count");
3216 WRITE_STR(stat->description,
3217 "Number of instructions in the final generated shader "
3218 "executable which access external units such as the "
3219 "constant cache or the sampler.");
3220 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3221 stat->value.u64 = exe->stats.sends;
3222 }
3223
3224 vk_outarray_append(&out, stat) {
3225 WRITE_STR(stat->name, "Loop Count");
3226 WRITE_STR(stat->description,
3227 "Number of loops (not unrolled) in the final generated "
3228 "shader executable.");
3229 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3230 stat->value.u64 = exe->stats.loops;
3231 }
3232
3233 vk_outarray_append(&out, stat) {
3234 WRITE_STR(stat->name, "Cycle Count");
3235 WRITE_STR(stat->description,
3236 "Estimate of the number of EU cycles required to execute "
3237 "the final generated executable. This is an estimate only "
3238 "and may vary greatly from actual run-time performance.");
3239 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3240 stat->value.u64 = exe->stats.cycles;
3241 }
3242
3243 vk_outarray_append(&out, stat) {
3244 WRITE_STR(stat->name, "Spill Count");
3245 WRITE_STR(stat->description,
3246 "Number of scratch spill operations. This gives a rough "
3247 "estimate of the cost incurred due to spilling temporary "
3248 "values to memory. If this is non-zero, you may want to "
3249 "adjust your shader to reduce register pressure.");
3250 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3251 stat->value.u64 = exe->stats.spills;
3252 }
3253
3254 vk_outarray_append(&out, stat) {
3255 WRITE_STR(stat->name, "Fill Count");
3256 WRITE_STR(stat->description,
3257 "Number of scratch fill operations. This gives a rough "
3258 "estimate of the cost incurred due to spilling temporary "
3259 "values to memory. If this is non-zero, you may want to "
3260 "adjust your shader to reduce register pressure.");
3261 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3262 stat->value.u64 = exe->stats.fills;
3263 }
3264
3265 vk_outarray_append(&out, stat) {
3266 WRITE_STR(stat->name, "Scratch Memory Size");
3267 WRITE_STR(stat->description,
3268 "Number of bytes of scratch memory required by the "
3269 "generated shader executable. If this is non-zero, you "
3270 "may want to adjust your shader to reduce register "
3271 "pressure.");
3272 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3273 stat->value.u64 = prog_data->total_scratch;
3274 }
3275
3276 if (gl_shader_stage_uses_workgroup(exe->stage)) {
3277 vk_outarray_append(&out, stat) {
3278 WRITE_STR(stat->name, "Workgroup Memory Size");
3279 WRITE_STR(stat->description,
3280 "Number of bytes of workgroup shared memory used by this "
3281 "shader including any padding.");
3282 stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3283 stat->value.u64 = prog_data->total_shared;
3284 }
3285 }
3286
3287 return vk_outarray_status(&out);
3288 }
3289
3290 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)3291 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
3292 const char *data)
3293 {
3294 ir->isText = VK_TRUE;
3295
3296 size_t data_len = strlen(data) + 1;
3297
3298 if (ir->pData == NULL) {
3299 ir->dataSize = data_len;
3300 return true;
3301 }
3302
3303 strncpy(ir->pData, data, ir->dataSize);
3304 if (ir->dataSize < data_len)
3305 return false;
3306
3307 ir->dataSize = data_len;
3308 return true;
3309 }
3310
anv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)3311 VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
3312 VkDevice device,
3313 const VkPipelineExecutableInfoKHR* pExecutableInfo,
3314 uint32_t* pInternalRepresentationCount,
3315 VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
3316 {
3317 ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
3318 VK_OUTARRAY_MAKE(out, pInternalRepresentations,
3319 pInternalRepresentationCount);
3320 bool incomplete_text = false;
3321
3322 const struct anv_pipeline_executable *exe =
3323 anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3324
3325 if (exe->nir) {
3326 vk_outarray_append(&out, ir) {
3327 WRITE_STR(ir->name, "Final NIR");
3328 WRITE_STR(ir->description,
3329 "Final NIR before going into the back-end compiler");
3330
3331 if (!write_ir_text(ir, exe->nir))
3332 incomplete_text = true;
3333 }
3334 }
3335
3336 if (exe->disasm) {
3337 vk_outarray_append(&out, ir) {
3338 WRITE_STR(ir->name, "GEN Assembly");
3339 WRITE_STR(ir->description,
3340 "Final GEN assembly for the generated shader binary");
3341
3342 if (!write_ir_text(ir, exe->disasm))
3343 incomplete_text = true;
3344 }
3345 }
3346
3347 return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
3348 }
3349
3350 VkResult
anv_GetRayTracingShaderGroupHandlesKHR(VkDevice _device,VkPipeline _pipeline,uint32_t firstGroup,uint32_t groupCount,size_t dataSize,void * pData)3351 anv_GetRayTracingShaderGroupHandlesKHR(
3352 VkDevice _device,
3353 VkPipeline _pipeline,
3354 uint32_t firstGroup,
3355 uint32_t groupCount,
3356 size_t dataSize,
3357 void* pData)
3358 {
3359 ANV_FROM_HANDLE(anv_device, device, _device);
3360 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
3361
3362 if (pipeline->type != ANV_PIPELINE_RAY_TRACING)
3363 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
3364
3365 struct anv_ray_tracing_pipeline *rt_pipeline =
3366 anv_pipeline_to_ray_tracing(pipeline);
3367
3368 for (uint32_t i = 0; i < groupCount; i++) {
3369 struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i];
3370 memcpy(pData, group->handle, sizeof(group->handle));
3371 pData += sizeof(group->handle);
3372 }
3373
3374 return VK_SUCCESS;
3375 }
3376
3377 VkResult
anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice _device,VkPipeline pipeline,uint32_t firstGroup,uint32_t groupCount,size_t dataSize,void * pData)3378 anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(
3379 VkDevice _device,
3380 VkPipeline pipeline,
3381 uint32_t firstGroup,
3382 uint32_t groupCount,
3383 size_t dataSize,
3384 void* pData)
3385 {
3386 ANV_FROM_HANDLE(anv_device, device, _device);
3387 unreachable("Unimplemented");
3388 return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
3389 }
3390
3391 VkDeviceSize
anv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device,VkPipeline _pipeline,uint32_t group,VkShaderGroupShaderKHR groupShader)3392 anv_GetRayTracingShaderGroupStackSizeKHR(
3393 VkDevice device,
3394 VkPipeline _pipeline,
3395 uint32_t group,
3396 VkShaderGroupShaderKHR groupShader)
3397 {
3398 ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
3399 assert(pipeline->type == ANV_PIPELINE_RAY_TRACING);
3400
3401 struct anv_ray_tracing_pipeline *rt_pipeline =
3402 anv_pipeline_to_ray_tracing(pipeline);
3403
3404 assert(group < rt_pipeline->group_count);
3405
3406 struct anv_shader_bin *bin;
3407 switch (groupShader) {
3408 case VK_SHADER_GROUP_SHADER_GENERAL_KHR:
3409 bin = rt_pipeline->groups[group].general;
3410 break;
3411
3412 case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:
3413 bin = rt_pipeline->groups[group].closest_hit;
3414 break;
3415
3416 case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:
3417 bin = rt_pipeline->groups[group].any_hit;
3418 break;
3419
3420 case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:
3421 bin = rt_pipeline->groups[group].intersection;
3422 break;
3423
3424 default:
3425 unreachable("Invalid VkShaderGroupShader enum");
3426 }
3427
3428 if (bin == NULL)
3429 return 0;
3430
3431 return brw_bs_prog_data_const(bin->prog_data)->max_stack_size;
3432 }
3433