1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 
30 #include "util/mesa-sha1.h"
31 #include "util/os_time.h"
32 #include "common/intel_l3_config.h"
33 #include "common/intel_disasm.h"
34 #include "common/intel_sample_positions.h"
35 #include "anv_private.h"
36 #include "compiler/brw_nir.h"
37 #include "compiler/brw_nir_rt.h"
38 #include "anv_nir.h"
39 #include "nir/nir_xfb_info.h"
40 #include "spirv/nir_spirv.h"
41 #include "vk_util.h"
42 
43 /* Needed for SWIZZLE macros */
44 #include "program/prog_instruction.h"
45 
46 // Shader functions
47 #define SPIR_V_MAGIC_NUMBER 0x07230203
48 
49 struct anv_spirv_debug_data {
50    struct anv_device *device;
51    const struct vk_shader_module *module;
52 };
53 
anv_spirv_nir_debug(void * private_data,enum nir_spirv_debug_level level,size_t spirv_offset,const char * message)54 static void anv_spirv_nir_debug(void *private_data,
55                                 enum nir_spirv_debug_level level,
56                                 size_t spirv_offset,
57                                 const char *message)
58 {
59    struct anv_spirv_debug_data *debug_data = private_data;
60 
61    switch (level) {
62    case NIR_SPIRV_DEBUG_LEVEL_INFO:
63       vk_logi(VK_LOG_OBJS(&debug_data->module->base),
64               "SPIR-V offset %lu: %s",
65               (unsigned long) spirv_offset, message);
66       break;
67    case NIR_SPIRV_DEBUG_LEVEL_WARNING:
68       vk_logw(VK_LOG_OBJS(&debug_data->module->base),
69               "SPIR-V offset %lu: %s",
70               (unsigned long) spirv_offset, message);
71       break;
72    case NIR_SPIRV_DEBUG_LEVEL_ERROR:
73       vk_loge(VK_LOG_OBJS(&debug_data->module->base),
74               "SPIR-V offset %lu: %s",
75               (unsigned long) spirv_offset, message);
76       break;
77    default:
78       break;
79    }
80 }
81 
82 /* Eventually, this will become part of anv_CreateShader.  Unfortunately,
83  * we can't do that yet because we don't have the ability to copy nir.
84  */
85 static nir_shader *
anv_shader_compile_to_nir(struct anv_device * device,void * mem_ctx,const struct vk_shader_module * module,const char * entrypoint_name,gl_shader_stage stage,const VkSpecializationInfo * spec_info)86 anv_shader_compile_to_nir(struct anv_device *device,
87                           void *mem_ctx,
88                           const struct vk_shader_module *module,
89                           const char *entrypoint_name,
90                           gl_shader_stage stage,
91                           const VkSpecializationInfo *spec_info)
92 {
93    const struct anv_physical_device *pdevice = device->physical;
94    const struct brw_compiler *compiler = pdevice->compiler;
95    const nir_shader_compiler_options *nir_options =
96       compiler->glsl_compiler_options[stage].NirOptions;
97 
98    uint32_t *spirv = (uint32_t *) module->data;
99    assert(spirv[0] == SPIR_V_MAGIC_NUMBER);
100    assert(module->size % 4 == 0);
101 
102    uint32_t num_spec_entries = 0;
103    struct nir_spirv_specialization *spec_entries =
104       vk_spec_info_to_nir_spirv(spec_info, &num_spec_entries);
105 
106    struct anv_spirv_debug_data spirv_debug_data = {
107       .device = device,
108       .module = module,
109    };
110    struct spirv_to_nir_options spirv_options = {
111       .caps = {
112          .demote_to_helper_invocation = true,
113          .derivative_group = true,
114          .descriptor_array_dynamic_indexing = true,
115          .descriptor_array_non_uniform_indexing = true,
116          .descriptor_indexing = true,
117          .device_group = true,
118          .draw_parameters = true,
119          .float16 = pdevice->info.ver >= 8,
120          .float32_atomic_add = pdevice->info.has_lsc,
121          .float32_atomic_min_max = pdevice->info.ver >= 9,
122          .float64 = pdevice->info.ver >= 8,
123          .float64_atomic_min_max = pdevice->info.has_lsc,
124          .fragment_shader_sample_interlock = pdevice->info.ver >= 9,
125          .fragment_shader_pixel_interlock = pdevice->info.ver >= 9,
126          .geometry_streams = true,
127          /* When KHR_format_feature_flags2 is enabled, the read/write without
128           * format is per format, so just report true. It's up to the
129           * application to check.
130           */
131          .image_read_without_format = device->vk.enabled_extensions.KHR_format_feature_flags2,
132          .image_write_without_format = true,
133          .int8 = pdevice->info.ver >= 8,
134          .int16 = pdevice->info.ver >= 8,
135          .int64 = pdevice->info.ver >= 8,
136          .int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin,
137          .integer_functions2 = pdevice->info.ver >= 8,
138          .min_lod = true,
139          .multiview = true,
140          .physical_storage_buffer_address = pdevice->has_a64_buffer_access,
141          .post_depth_coverage = pdevice->info.ver >= 9,
142          .runtime_descriptor_array = true,
143          .float_controls = pdevice->info.ver >= 8,
144          .ray_tracing = pdevice->info.has_ray_tracing,
145          .shader_clock = true,
146          .shader_viewport_index_layer = true,
147          .stencil_export = pdevice->info.ver >= 9,
148          .storage_8bit = pdevice->info.ver >= 8,
149          .storage_16bit = pdevice->info.ver >= 8,
150          .subgroup_arithmetic = true,
151          .subgroup_basic = true,
152          .subgroup_ballot = true,
153          .subgroup_dispatch = true,
154          .subgroup_quad = true,
155          .subgroup_uniform_control_flow = true,
156          .subgroup_shuffle = true,
157          .subgroup_vote = true,
158          .tessellation = true,
159          .transform_feedback = pdevice->info.ver >= 8,
160          .variable_pointers = true,
161          .vk_memory_model = true,
162          .vk_memory_model_device_scope = true,
163          .workgroup_memory_explicit_layout = true,
164          .fragment_shading_rate = pdevice->info.ver >= 11,
165       },
166       .ubo_addr_format =
167          anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access),
168       .ssbo_addr_format =
169           anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),
170       .phys_ssbo_addr_format = nir_address_format_64bit_global,
171       .push_const_addr_format = nir_address_format_logical,
172 
173       /* TODO: Consider changing this to an address format that has the NULL
174        * pointer equals to 0.  That might be a better format to play nice
175        * with certain code / code generators.
176        */
177       .shared_addr_format = nir_address_format_32bit_offset,
178       .debug = {
179          .func = anv_spirv_nir_debug,
180          .private_data = &spirv_debug_data,
181       },
182    };
183 
184 
185    nir_shader *nir =
186       spirv_to_nir(spirv, module->size / 4,
187                    spec_entries, num_spec_entries,
188                    stage, entrypoint_name, &spirv_options, nir_options);
189    if (!nir) {
190       free(spec_entries);
191       return NULL;
192    }
193 
194    assert(nir->info.stage == stage);
195    nir_validate_shader(nir, "after spirv_to_nir");
196    nir_validate_ssa_dominance(nir, "after spirv_to_nir");
197    ralloc_steal(mem_ctx, nir);
198 
199    free(spec_entries);
200 
201    const struct nir_lower_sysvals_to_varyings_options sysvals_to_varyings = {
202       .point_coord = true,
203    };
204    NIR_PASS_V(nir, nir_lower_sysvals_to_varyings, &sysvals_to_varyings);
205 
206    if (INTEL_DEBUG(intel_debug_flag_for_shader_stage(stage))) {
207       fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",
208               gl_shader_stage_name(stage));
209       nir_print_shader(nir, stderr);
210    }
211 
212    /* We have to lower away local constant initializers right before we
213     * inline functions.  That way they get properly initialized at the top
214     * of the function and not at the top of its caller.
215     */
216    NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
217    NIR_PASS_V(nir, nir_lower_returns);
218    NIR_PASS_V(nir, nir_inline_functions);
219    NIR_PASS_V(nir, nir_copy_prop);
220    NIR_PASS_V(nir, nir_opt_deref);
221 
222    /* Pick off the single entrypoint that we want */
223    foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
224       if (!func->is_entrypoint)
225          exec_node_remove(&func->node);
226    }
227    assert(exec_list_length(&nir->functions) == 1);
228 
229    /* Now that we've deleted all but the main function, we can go ahead and
230     * lower the rest of the constant initializers.  We do this here so that
231     * nir_remove_dead_variables and split_per_member_structs below see the
232     * corresponding stores.
233     */
234    NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);
235 
236    /* Split member structs.  We do this before lower_io_to_temporaries so that
237     * it doesn't lower system values to temporaries by accident.
238     */
239    NIR_PASS_V(nir, nir_split_var_copies);
240    NIR_PASS_V(nir, nir_split_per_member_structs);
241 
242    NIR_PASS_V(nir, nir_remove_dead_variables,
243               nir_var_shader_in | nir_var_shader_out | nir_var_system_value |
244               nir_var_shader_call_data | nir_var_ray_hit_attrib,
245               NULL);
246 
247    NIR_PASS_V(nir, nir_propagate_invariant, false);
248    NIR_PASS_V(nir, nir_lower_io_to_temporaries,
249               nir_shader_get_entrypoint(nir), true, false);
250 
251    NIR_PASS_V(nir, nir_lower_frexp);
252 
253    /* Vulkan uses the separate-shader linking model */
254    nir->info.separate_shader = true;
255 
256    brw_preprocess_nir(compiler, nir, NULL);
257 
258    return nir;
259 }
260 
261 VkResult
anv_pipeline_init(struct anv_pipeline * pipeline,struct anv_device * device,enum anv_pipeline_type type,VkPipelineCreateFlags flags,const VkAllocationCallbacks * pAllocator)262 anv_pipeline_init(struct anv_pipeline *pipeline,
263                   struct anv_device *device,
264                   enum anv_pipeline_type type,
265                   VkPipelineCreateFlags flags,
266                   const VkAllocationCallbacks *pAllocator)
267 {
268    VkResult result;
269 
270    memset(pipeline, 0, sizeof(*pipeline));
271 
272    vk_object_base_init(&device->vk, &pipeline->base,
273                        VK_OBJECT_TYPE_PIPELINE);
274    pipeline->device = device;
275 
276    /* It's the job of the child class to provide actual backing storage for
277     * the batch by setting batch.start, batch.next, and batch.end.
278     */
279    pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc;
280    pipeline->batch.relocs = &pipeline->batch_relocs;
281    pipeline->batch.status = VK_SUCCESS;
282 
283    result = anv_reloc_list_init(&pipeline->batch_relocs,
284                                 pipeline->batch.alloc);
285    if (result != VK_SUCCESS)
286       return result;
287 
288    pipeline->mem_ctx = ralloc_context(NULL);
289 
290    pipeline->type = type;
291    pipeline->flags = flags;
292 
293    util_dynarray_init(&pipeline->executables, pipeline->mem_ctx);
294 
295    return VK_SUCCESS;
296 }
297 
298 void
anv_pipeline_finish(struct anv_pipeline * pipeline,struct anv_device * device,const VkAllocationCallbacks * pAllocator)299 anv_pipeline_finish(struct anv_pipeline *pipeline,
300                     struct anv_device *device,
301                     const VkAllocationCallbacks *pAllocator)
302 {
303    anv_reloc_list_finish(&pipeline->batch_relocs,
304                          pAllocator ? pAllocator : &device->vk.alloc);
305    ralloc_free(pipeline->mem_ctx);
306    vk_object_base_finish(&pipeline->base);
307 }
308 
anv_DestroyPipeline(VkDevice _device,VkPipeline _pipeline,const VkAllocationCallbacks * pAllocator)309 void anv_DestroyPipeline(
310     VkDevice                                    _device,
311     VkPipeline                                  _pipeline,
312     const VkAllocationCallbacks*                pAllocator)
313 {
314    ANV_FROM_HANDLE(anv_device, device, _device);
315    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
316 
317    if (!pipeline)
318       return;
319 
320    switch (pipeline->type) {
321    case ANV_PIPELINE_GRAPHICS: {
322       struct anv_graphics_pipeline *gfx_pipeline =
323          anv_pipeline_to_graphics(pipeline);
324 
325       if (gfx_pipeline->blend_state.map)
326          anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state);
327       if (gfx_pipeline->cps_state.map)
328          anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->cps_state);
329 
330       for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) {
331          if (gfx_pipeline->shaders[s])
332             anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);
333       }
334       break;
335    }
336 
337    case ANV_PIPELINE_COMPUTE: {
338       struct anv_compute_pipeline *compute_pipeline =
339          anv_pipeline_to_compute(pipeline);
340 
341       if (compute_pipeline->cs)
342          anv_shader_bin_unref(device, compute_pipeline->cs);
343 
344       break;
345    }
346 
347    case ANV_PIPELINE_RAY_TRACING: {
348       struct anv_ray_tracing_pipeline *rt_pipeline =
349          anv_pipeline_to_ray_tracing(pipeline);
350 
351       util_dynarray_foreach(&rt_pipeline->shaders,
352                             struct anv_shader_bin *, shader) {
353          anv_shader_bin_unref(device, *shader);
354       }
355       break;
356    }
357 
358    default:
359       unreachable("invalid pipeline type");
360    }
361 
362    anv_pipeline_finish(pipeline, device, pAllocator);
363    vk_free2(&device->vk.alloc, pAllocator, pipeline);
364 }
365 
366 static const uint32_t vk_to_intel_primitive_type[] = {
367    [VK_PRIMITIVE_TOPOLOGY_POINT_LIST]                    = _3DPRIM_POINTLIST,
368    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST]                     = _3DPRIM_LINELIST,
369    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP]                    = _3DPRIM_LINESTRIP,
370    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST]                 = _3DPRIM_TRILIST,
371    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP]                = _3DPRIM_TRISTRIP,
372    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN]                  = _3DPRIM_TRIFAN,
373    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY]      = _3DPRIM_LINELIST_ADJ,
374    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY]     = _3DPRIM_LINESTRIP_ADJ,
375    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY]  = _3DPRIM_TRILIST_ADJ,
376    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
377 };
378 
379 static void
populate_sampler_prog_key(const struct intel_device_info * devinfo,struct brw_sampler_prog_key_data * key)380 populate_sampler_prog_key(const struct intel_device_info *devinfo,
381                           struct brw_sampler_prog_key_data *key)
382 {
383    /* Almost all multisampled textures are compressed.  The only time when we
384     * don't compress a multisampled texture is for 16x MSAA with a surface
385     * width greater than 8k which is a bit of an edge case.  Since the sampler
386     * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe
387     * to tell the compiler to always assume compression.
388     */
389    key->compressed_multisample_layout_mask = ~0;
390 
391    /* SkyLake added support for 16x MSAA.  With this came a new message for
392     * reading from a 16x MSAA surface with compression.  The new message was
393     * needed because now the MCS data is 64 bits instead of 32 or lower as is
394     * the case for 8x, 4x, and 2x.  The key->msaa_16 bit-field controls which
395     * message we use.  Fortunately, the 16x message works for 8x, 4x, and 2x
396     * so we can just use it unconditionally.  This may not be quite as
397     * efficient but it saves us from recompiling.
398     */
399    if (devinfo->ver >= 9)
400       key->msaa_16 = ~0;
401 
402    /* XXX: Handle texture swizzle on HSW- */
403    for (int i = 0; i < MAX_SAMPLERS; i++) {
404       /* Assume color sampler, no swizzling. (Works for BDW+) */
405       key->swizzles[i] = SWIZZLE_XYZW;
406    }
407 }
408 
409 static void
populate_base_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_base_prog_key * key)410 populate_base_prog_key(const struct intel_device_info *devinfo,
411                        enum brw_subgroup_size_type subgroup_size_type,
412                        bool robust_buffer_acccess,
413                        struct brw_base_prog_key *key)
414 {
415    key->subgroup_size_type = subgroup_size_type;
416    key->robust_buffer_access = robust_buffer_acccess;
417 
418    populate_sampler_prog_key(devinfo, &key->tex);
419 }
420 
421 static void
populate_vs_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_vs_prog_key * key)422 populate_vs_prog_key(const struct intel_device_info *devinfo,
423                      enum brw_subgroup_size_type subgroup_size_type,
424                      bool robust_buffer_acccess,
425                      struct brw_vs_prog_key *key)
426 {
427    memset(key, 0, sizeof(*key));
428 
429    populate_base_prog_key(devinfo, subgroup_size_type,
430                           robust_buffer_acccess, &key->base);
431 
432    /* XXX: Handle vertex input work-arounds */
433 
434    /* XXX: Handle sampler_prog_key */
435 }
436 
437 static void
populate_tcs_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,unsigned input_vertices,struct brw_tcs_prog_key * key)438 populate_tcs_prog_key(const struct intel_device_info *devinfo,
439                       enum brw_subgroup_size_type subgroup_size_type,
440                       bool robust_buffer_acccess,
441                       unsigned input_vertices,
442                       struct brw_tcs_prog_key *key)
443 {
444    memset(key, 0, sizeof(*key));
445 
446    populate_base_prog_key(devinfo, subgroup_size_type,
447                           robust_buffer_acccess, &key->base);
448 
449    key->input_vertices = input_vertices;
450 }
451 
452 static void
populate_tes_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_tes_prog_key * key)453 populate_tes_prog_key(const struct intel_device_info *devinfo,
454                       enum brw_subgroup_size_type subgroup_size_type,
455                       bool robust_buffer_acccess,
456                       struct brw_tes_prog_key *key)
457 {
458    memset(key, 0, sizeof(*key));
459 
460    populate_base_prog_key(devinfo, subgroup_size_type,
461                           robust_buffer_acccess, &key->base);
462 }
463 
464 static void
populate_gs_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_gs_prog_key * key)465 populate_gs_prog_key(const struct intel_device_info *devinfo,
466                      enum brw_subgroup_size_type subgroup_size_type,
467                      bool robust_buffer_acccess,
468                      struct brw_gs_prog_key *key)
469 {
470    memset(key, 0, sizeof(*key));
471 
472    populate_base_prog_key(devinfo, subgroup_size_type,
473                           robust_buffer_acccess, &key->base);
474 }
475 
476 static bool
pipeline_has_coarse_pixel(const struct anv_graphics_pipeline * pipeline,const VkPipelineFragmentShadingRateStateCreateInfoKHR * fsr_info)477 pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
478                           const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info)
479 {
480    if (pipeline->sample_shading_enable)
481       return false;
482 
483    /* Not dynamic & not specified for the pipeline. */
484    if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 && !fsr_info)
485       return false;
486 
487    /* Not dynamic & pipeline has a 1x1 fragment shading rate with no
488     * possibility for element of the pipeline to change the value.
489     */
490    if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 &&
491        fsr_info->fragmentSize.width <= 1 &&
492        fsr_info->fragmentSize.height <= 1 &&
493        fsr_info->combinerOps[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
494        fsr_info->combinerOps[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)
495       return false;
496 
497    return true;
498 }
499 
500 static void
populate_wm_prog_key(const struct anv_graphics_pipeline * pipeline,VkPipelineShaderStageCreateFlags flags,bool robust_buffer_acccess,const struct anv_subpass * subpass,const VkPipelineMultisampleStateCreateInfo * ms_info,const VkPipelineFragmentShadingRateStateCreateInfoKHR * fsr_info,struct brw_wm_prog_key * key)501 populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
502                      VkPipelineShaderStageCreateFlags flags,
503                      bool robust_buffer_acccess,
504                      const struct anv_subpass *subpass,
505                      const VkPipelineMultisampleStateCreateInfo *ms_info,
506                      const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info,
507                      struct brw_wm_prog_key *key)
508 {
509    const struct anv_device *device = pipeline->base.device;
510    const struct intel_device_info *devinfo = &device->info;
511 
512    memset(key, 0, sizeof(*key));
513 
514    populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);
515 
516    /* We set this to 0 here and set to the actual value before we call
517     * brw_compile_fs.
518     */
519    key->input_slots_valid = 0;
520 
521    /* Vulkan doesn't specify a default */
522    key->high_quality_derivatives = false;
523 
524    /* XXX Vulkan doesn't appear to specify */
525    key->clamp_fragment_color = false;
526 
527    key->ignore_sample_mask_out = false;
528 
529    assert(subpass->color_count <= MAX_RTS);
530    for (uint32_t i = 0; i < subpass->color_count; i++) {
531       if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
532          key->color_outputs_valid |= (1 << i);
533    }
534 
535    key->nr_color_regions = subpass->color_count;
536 
537    /* To reduce possible shader recompilations we would need to know if
538     * there is a SampleMask output variable to compute if we should emit
539     * code to workaround the issue that hardware disables alpha to coverage
540     * when there is SampleMask output.
541     */
542    key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable;
543 
544    /* Vulkan doesn't support fixed-function alpha test */
545    key->alpha_test_replicate_alpha = false;
546 
547    if (ms_info) {
548       /* We should probably pull this out of the shader, but it's fairly
549        * harmless to compute it and then let dead-code take care of it.
550        */
551       if (ms_info->rasterizationSamples > 1) {
552          key->persample_interp = ms_info->sampleShadingEnable &&
553             (ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
554          key->multisample_fbo = true;
555       }
556 
557       key->frag_coord_adds_sample_pos = key->persample_interp;
558    }
559 
560    key->coarse_pixel =
561       device->vk.enabled_extensions.KHR_fragment_shading_rate &&
562       pipeline_has_coarse_pixel(pipeline, fsr_info);
563 }
564 
565 static void
populate_cs_prog_key(const struct intel_device_info * devinfo,enum brw_subgroup_size_type subgroup_size_type,bool robust_buffer_acccess,struct brw_cs_prog_key * key)566 populate_cs_prog_key(const struct intel_device_info *devinfo,
567                      enum brw_subgroup_size_type subgroup_size_type,
568                      bool robust_buffer_acccess,
569                      struct brw_cs_prog_key *key)
570 {
571    memset(key, 0, sizeof(*key));
572 
573    populate_base_prog_key(devinfo, subgroup_size_type,
574                           robust_buffer_acccess, &key->base);
575 }
576 
577 static void
populate_bs_prog_key(const struct intel_device_info * devinfo,VkPipelineShaderStageCreateFlags flags,bool robust_buffer_access,struct brw_bs_prog_key * key)578 populate_bs_prog_key(const struct intel_device_info *devinfo,
579                      VkPipelineShaderStageCreateFlags flags,
580                      bool robust_buffer_access,
581                      struct brw_bs_prog_key *key)
582 {
583    memset(key, 0, sizeof(*key));
584 
585    populate_base_prog_key(devinfo, flags, robust_buffer_access, &key->base);
586 }
587 
588 struct anv_pipeline_stage {
589    gl_shader_stage stage;
590 
591    const struct vk_shader_module *module;
592    const char *entrypoint;
593    const VkSpecializationInfo *spec_info;
594 
595    unsigned char shader_sha1[20];
596 
597    union brw_any_prog_key key;
598 
599    struct {
600       gl_shader_stage stage;
601       unsigned char sha1[20];
602    } cache_key;
603 
604    nir_shader *nir;
605 
606    struct anv_pipeline_binding surface_to_descriptor[256];
607    struct anv_pipeline_binding sampler_to_descriptor[256];
608    struct anv_pipeline_bind_map bind_map;
609 
610    union brw_any_prog_data prog_data;
611 
612    uint32_t num_stats;
613    struct brw_compile_stats stats[3];
614    char *disasm[3];
615 
616    VkPipelineCreationFeedbackEXT feedback;
617 
618    const unsigned *code;
619 
620    struct anv_shader_bin *bin;
621 };
622 
623 static void
anv_pipeline_hash_shader(const struct vk_shader_module * module,const char * entrypoint,gl_shader_stage stage,const VkSpecializationInfo * spec_info,unsigned char * sha1_out)624 anv_pipeline_hash_shader(const struct vk_shader_module *module,
625                          const char *entrypoint,
626                          gl_shader_stage stage,
627                          const VkSpecializationInfo *spec_info,
628                          unsigned char *sha1_out)
629 {
630    struct mesa_sha1 ctx;
631    _mesa_sha1_init(&ctx);
632 
633    _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
634    _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
635    _mesa_sha1_update(&ctx, &stage, sizeof(stage));
636    if (spec_info) {
637       _mesa_sha1_update(&ctx, spec_info->pMapEntries,
638                         spec_info->mapEntryCount *
639                         sizeof(*spec_info->pMapEntries));
640       _mesa_sha1_update(&ctx, spec_info->pData,
641                         spec_info->dataSize);
642    }
643 
644    _mesa_sha1_final(&ctx, sha1_out);
645 }
646 
647 static void
anv_pipeline_hash_graphics(struct anv_graphics_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stages,unsigned char * sha1_out)648 anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,
649                            struct anv_pipeline_layout *layout,
650                            struct anv_pipeline_stage *stages,
651                            unsigned char *sha1_out)
652 {
653    struct mesa_sha1 ctx;
654    _mesa_sha1_init(&ctx);
655 
656    _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
657                      sizeof(pipeline->subpass->view_mask));
658 
659    if (layout)
660       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
661 
662    const bool rba = pipeline->base.device->robust_buffer_access;
663    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
664 
665    for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
666       if (stages[s].entrypoint) {
667          _mesa_sha1_update(&ctx, stages[s].shader_sha1,
668                            sizeof(stages[s].shader_sha1));
669          _mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));
670       }
671    }
672 
673    _mesa_sha1_final(&ctx, sha1_out);
674 }
675 
676 static void
anv_pipeline_hash_compute(struct anv_compute_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stage,unsigned char * sha1_out)677 anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,
678                           struct anv_pipeline_layout *layout,
679                           struct anv_pipeline_stage *stage,
680                           unsigned char *sha1_out)
681 {
682    struct mesa_sha1 ctx;
683    _mesa_sha1_init(&ctx);
684 
685    if (layout)
686       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
687 
688    const bool rba = pipeline->base.device->robust_buffer_access;
689    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
690 
691    _mesa_sha1_update(&ctx, stage->shader_sha1,
692                      sizeof(stage->shader_sha1));
693    _mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));
694 
695    _mesa_sha1_final(&ctx, sha1_out);
696 }
697 
698 static void
anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * stage,unsigned char * sha1_out)699 anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline,
700                                      struct anv_pipeline_layout *layout,
701                                      struct anv_pipeline_stage *stage,
702                                      unsigned char *sha1_out)
703 {
704    struct mesa_sha1 ctx;
705    _mesa_sha1_init(&ctx);
706 
707    if (layout != NULL)
708       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
709 
710    const bool rba = pipeline->base.device->robust_buffer_access;
711    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
712 
713    _mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));
714    _mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs));
715 
716    _mesa_sha1_final(&ctx, sha1_out);
717 }
718 
719 static void
anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_layout * layout,struct anv_pipeline_stage * intersection,struct anv_pipeline_stage * any_hit,unsigned char * sha1_out)720 anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline,
721                                               struct anv_pipeline_layout *layout,
722                                               struct anv_pipeline_stage *intersection,
723                                               struct anv_pipeline_stage *any_hit,
724                                               unsigned char *sha1_out)
725 {
726    struct mesa_sha1 ctx;
727    _mesa_sha1_init(&ctx);
728 
729    if (layout != NULL)
730       _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
731 
732    const bool rba = pipeline->base.device->robust_buffer_access;
733    _mesa_sha1_update(&ctx, &rba, sizeof(rba));
734 
735    _mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1));
736    _mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs));
737    _mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1));
738    _mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs));
739 
740    _mesa_sha1_final(&ctx, sha1_out);
741 }
742 
743 static nir_shader *
anv_pipeline_stage_get_nir(struct anv_pipeline * pipeline,struct anv_pipeline_cache * cache,void * mem_ctx,struct anv_pipeline_stage * stage)744 anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,
745                            struct anv_pipeline_cache *cache,
746                            void *mem_ctx,
747                            struct anv_pipeline_stage *stage)
748 {
749    const struct brw_compiler *compiler =
750       pipeline->device->physical->compiler;
751    const nir_shader_compiler_options *nir_options =
752       compiler->glsl_compiler_options[stage->stage].NirOptions;
753    nir_shader *nir;
754 
755    nir = anv_device_search_for_nir(pipeline->device, cache,
756                                    nir_options,
757                                    stage->shader_sha1,
758                                    mem_ctx);
759    if (nir) {
760       assert(nir->info.stage == stage->stage);
761       return nir;
762    }
763 
764    nir = anv_shader_compile_to_nir(pipeline->device,
765                                    mem_ctx,
766                                    stage->module,
767                                    stage->entrypoint,
768                                    stage->stage,
769                                    stage->spec_info);
770    if (nir) {
771       anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);
772       return nir;
773    }
774 
775    return NULL;
776 }
777 
778 static void
shared_type_info(const struct glsl_type * type,unsigned * size,unsigned * align)779 shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)
780 {
781    assert(glsl_type_is_vector_or_scalar(type));
782 
783    uint32_t comp_size = glsl_type_is_boolean(type)
784       ? 4 : glsl_get_bit_size(type) / 8;
785    unsigned length = glsl_get_vector_elements(type);
786    *size = comp_size * length,
787    *align = comp_size * (length == 3 ? 4 : length);
788 }
789 
790 static void
anv_pipeline_lower_nir(struct anv_pipeline * pipeline,void * mem_ctx,struct anv_pipeline_stage * stage,struct anv_pipeline_layout * layout)791 anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
792                        void *mem_ctx,
793                        struct anv_pipeline_stage *stage,
794                        struct anv_pipeline_layout *layout)
795 {
796    const struct anv_physical_device *pdevice = pipeline->device->physical;
797    const struct brw_compiler *compiler = pdevice->compiler;
798 
799    struct brw_stage_prog_data *prog_data = &stage->prog_data.base;
800    nir_shader *nir = stage->nir;
801 
802    if (nir->info.stage == MESA_SHADER_FRAGMENT) {
803       /* Check if sample shading is enabled in the shader and toggle
804        * it on for the pipeline independent if sampleShadingEnable is set.
805        */
806       nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
807       if (nir->info.fs.uses_sample_shading)
808          anv_pipeline_to_graphics(pipeline)->sample_shading_enable = true;
809 
810       NIR_PASS_V(nir, nir_lower_wpos_center,
811                  anv_pipeline_to_graphics(pipeline)->sample_shading_enable);
812       NIR_PASS_V(nir, nir_lower_input_attachments,
813                  &(nir_input_attachment_options) {
814                      .use_fragcoord_sysval = true,
815                      .use_layer_id_sysval = true,
816                  });
817    }
818 
819    NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
820 
821    if (pipeline->type == ANV_PIPELINE_GRAPHICS) {
822       NIR_PASS_V(nir, anv_nir_lower_multiview,
823                  anv_pipeline_to_graphics(pipeline));
824    }
825 
826    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
827 
828    NIR_PASS_V(nir, brw_nir_lower_storage_image, compiler->devinfo);
829 
830    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,
831               nir_address_format_64bit_global);
832    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,
833               nir_address_format_32bit_offset);
834 
835    /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
836    anv_nir_apply_pipeline_layout(pdevice,
837                                  pipeline->device->robust_buffer_access,
838                                  layout, nir, &stage->bind_map);
839 
840    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
841               anv_nir_ubo_addr_format(pdevice,
842                  pipeline->device->robust_buffer_access));
843    NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
844               anv_nir_ssbo_addr_format(pdevice,
845                  pipeline->device->robust_buffer_access));
846 
847    /* First run copy-prop to get rid of all of the vec() that address
848     * calculations often create and then constant-fold so that, when we
849     * get to anv_nir_lower_ubo_loads, we can detect constant offsets.
850     */
851    NIR_PASS_V(nir, nir_copy_prop);
852    NIR_PASS_V(nir, nir_opt_constant_folding);
853 
854    NIR_PASS_V(nir, anv_nir_lower_ubo_loads);
855 
856    /* We don't support non-uniform UBOs and non-uniform SSBO access is
857     * handled naturally by falling back to A64 messages.
858     */
859    NIR_PASS_V(nir, nir_lower_non_uniform_access,
860               &(nir_lower_non_uniform_access_options) {
861                   .types = nir_lower_non_uniform_texture_access |
862                            nir_lower_non_uniform_image_access,
863                   .callback = NULL,
864               });
865 
866    anv_nir_compute_push_layout(pdevice, pipeline->device->robust_buffer_access,
867                                nir, prog_data, &stage->bind_map, mem_ctx);
868 
869    if (gl_shader_stage_uses_workgroup(nir->info.stage)) {
870       if (!nir->info.shared_memory_explicit_layout) {
871          NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
872                     nir_var_mem_shared, shared_type_info);
873       }
874 
875       NIR_PASS_V(nir, nir_lower_explicit_io,
876                  nir_var_mem_shared, nir_address_format_32bit_offset);
877 
878       if (nir->info.zero_initialize_shared_memory &&
879           nir->info.shared_size > 0) {
880          /* The effective Shared Local Memory size is at least 1024 bytes and
881           * is always rounded to a power of two, so it is OK to align the size
882           * used by the shader to chunk_size -- which does simplify the logic.
883           */
884          const unsigned chunk_size = 16;
885          const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);
886          assert(shared_size <=
887                 intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));
888 
889          NIR_PASS_V(nir, nir_zero_initialize_shared_memory,
890                     shared_size, chunk_size);
891       }
892    }
893 
894    stage->nir = nir;
895 }
896 
897 static void
anv_pipeline_link_vs(const struct brw_compiler * compiler,struct anv_pipeline_stage * vs_stage,struct anv_pipeline_stage * next_stage)898 anv_pipeline_link_vs(const struct brw_compiler *compiler,
899                      struct anv_pipeline_stage *vs_stage,
900                      struct anv_pipeline_stage *next_stage)
901 {
902    if (next_stage)
903       brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);
904 }
905 
906 static void
anv_pipeline_compile_vs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_graphics_pipeline * pipeline,struct anv_pipeline_stage * vs_stage)907 anv_pipeline_compile_vs(const struct brw_compiler *compiler,
908                         void *mem_ctx,
909                         struct anv_graphics_pipeline *pipeline,
910                         struct anv_pipeline_stage *vs_stage)
911 {
912    /* When using Primitive Replication for multiview, each view gets its own
913     * position slot.
914     */
915    uint32_t pos_slots = pipeline->use_primitive_replication ?
916       anv_subpass_view_count(pipeline->subpass) : 1;
917 
918    brw_compute_vue_map(compiler->devinfo,
919                        &vs_stage->prog_data.vs.base.vue_map,
920                        vs_stage->nir->info.outputs_written,
921                        vs_stage->nir->info.separate_shader,
922                        pos_slots);
923 
924    vs_stage->num_stats = 1;
925 
926    struct brw_compile_vs_params params = {
927       .nir = vs_stage->nir,
928       .key = &vs_stage->key.vs,
929       .prog_data = &vs_stage->prog_data.vs,
930       .stats = vs_stage->stats,
931       .log_data = pipeline->base.device,
932    };
933 
934    vs_stage->code = brw_compile_vs(compiler, mem_ctx, &params);
935 }
936 
937 static void
merge_tess_info(struct shader_info * tes_info,const struct shader_info * tcs_info)938 merge_tess_info(struct shader_info *tes_info,
939                 const struct shader_info *tcs_info)
940 {
941    /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:
942     *
943     *    "PointMode. Controls generation of points rather than triangles
944     *     or lines. This functionality defaults to disabled, and is
945     *     enabled if either shader stage includes the execution mode.
946     *
947     * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,
948     * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,
949     * and OutputVertices, it says:
950     *
951     *    "One mode must be set in at least one of the tessellation
952     *     shader stages."
953     *
954     * So, the fields can be set in either the TCS or TES, but they must
955     * agree if set in both.  Our backend looks at TES, so bitwise-or in
956     * the values from the TCS.
957     */
958    assert(tcs_info->tess.tcs_vertices_out == 0 ||
959           tes_info->tess.tcs_vertices_out == 0 ||
960           tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);
961    tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;
962 
963    assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
964           tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||
965           tcs_info->tess.spacing == tes_info->tess.spacing);
966    tes_info->tess.spacing |= tcs_info->tess.spacing;
967 
968    assert(tcs_info->tess.primitive_mode == 0 ||
969           tes_info->tess.primitive_mode == 0 ||
970           tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);
971    tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;
972    tes_info->tess.ccw |= tcs_info->tess.ccw;
973    tes_info->tess.point_mode |= tcs_info->tess.point_mode;
974 }
975 
976 static void
anv_pipeline_link_tcs(const struct brw_compiler * compiler,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * tes_stage)977 anv_pipeline_link_tcs(const struct brw_compiler *compiler,
978                       struct anv_pipeline_stage *tcs_stage,
979                       struct anv_pipeline_stage *tes_stage)
980 {
981    assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);
982 
983    brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);
984 
985    nir_lower_patch_vertices(tes_stage->nir,
986                             tcs_stage->nir->info.tess.tcs_vertices_out,
987                             NULL);
988 
989    /* Copy TCS info into the TES info */
990    merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);
991 
992    /* Whacking the key after cache lookup is a bit sketchy, but all of
993     * this comes from the SPIR-V, which is part of the hash used for the
994     * pipeline cache.  So it should be safe.
995     */
996    tcs_stage->key.tcs.tes_primitive_mode =
997       tes_stage->nir->info.tess.primitive_mode;
998    tcs_stage->key.tcs.quads_workaround =
999       compiler->devinfo->ver < 9 &&
1000       tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ &&
1001       tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;
1002 }
1003 
1004 static void
anv_pipeline_compile_tcs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tcs_stage,struct anv_pipeline_stage * prev_stage)1005 anv_pipeline_compile_tcs(const struct brw_compiler *compiler,
1006                          void *mem_ctx,
1007                          struct anv_device *device,
1008                          struct anv_pipeline_stage *tcs_stage,
1009                          struct anv_pipeline_stage *prev_stage)
1010 {
1011    tcs_stage->key.tcs.outputs_written =
1012       tcs_stage->nir->info.outputs_written;
1013    tcs_stage->key.tcs.patch_outputs_written =
1014       tcs_stage->nir->info.patch_outputs_written;
1015 
1016    tcs_stage->num_stats = 1;
1017    tcs_stage->code = brw_compile_tcs(compiler, device, mem_ctx,
1018                                      &tcs_stage->key.tcs,
1019                                      &tcs_stage->prog_data.tcs,
1020                                      tcs_stage->nir, -1,
1021                                      tcs_stage->stats, NULL);
1022 }
1023 
1024 static void
anv_pipeline_link_tes(const struct brw_compiler * compiler,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * next_stage)1025 anv_pipeline_link_tes(const struct brw_compiler *compiler,
1026                       struct anv_pipeline_stage *tes_stage,
1027                       struct anv_pipeline_stage *next_stage)
1028 {
1029    if (next_stage)
1030       brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);
1031 }
1032 
1033 static void
anv_pipeline_compile_tes(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * tes_stage,struct anv_pipeline_stage * tcs_stage)1034 anv_pipeline_compile_tes(const struct brw_compiler *compiler,
1035                          void *mem_ctx,
1036                          struct anv_device *device,
1037                          struct anv_pipeline_stage *tes_stage,
1038                          struct anv_pipeline_stage *tcs_stage)
1039 {
1040    tes_stage->key.tes.inputs_read =
1041       tcs_stage->nir->info.outputs_written;
1042    tes_stage->key.tes.patch_inputs_read =
1043       tcs_stage->nir->info.patch_outputs_written;
1044 
1045    tes_stage->num_stats = 1;
1046    tes_stage->code = brw_compile_tes(compiler, device, mem_ctx,
1047                                      &tes_stage->key.tes,
1048                                      &tcs_stage->prog_data.tcs.base.vue_map,
1049                                      &tes_stage->prog_data.tes,
1050                                      tes_stage->nir, -1,
1051                                      tes_stage->stats, NULL);
1052 }
1053 
1054 static void
anv_pipeline_link_gs(const struct brw_compiler * compiler,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * next_stage)1055 anv_pipeline_link_gs(const struct brw_compiler *compiler,
1056                      struct anv_pipeline_stage *gs_stage,
1057                      struct anv_pipeline_stage *next_stage)
1058 {
1059    if (next_stage)
1060       brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);
1061 }
1062 
1063 static void
anv_pipeline_compile_gs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * gs_stage,struct anv_pipeline_stage * prev_stage)1064 anv_pipeline_compile_gs(const struct brw_compiler *compiler,
1065                         void *mem_ctx,
1066                         struct anv_device *device,
1067                         struct anv_pipeline_stage *gs_stage,
1068                         struct anv_pipeline_stage *prev_stage)
1069 {
1070    brw_compute_vue_map(compiler->devinfo,
1071                        &gs_stage->prog_data.gs.base.vue_map,
1072                        gs_stage->nir->info.outputs_written,
1073                        gs_stage->nir->info.separate_shader, 1);
1074 
1075    gs_stage->num_stats = 1;
1076    gs_stage->code = brw_compile_gs(compiler, device, mem_ctx,
1077                                    &gs_stage->key.gs,
1078                                    &gs_stage->prog_data.gs,
1079                                    gs_stage->nir, -1,
1080                                    gs_stage->stats, NULL);
1081 }
1082 
1083 static void
anv_pipeline_link_fs(const struct brw_compiler * compiler,struct anv_pipeline_stage * stage)1084 anv_pipeline_link_fs(const struct brw_compiler *compiler,
1085                      struct anv_pipeline_stage *stage)
1086 {
1087    unsigned num_rt_bindings;
1088    struct anv_pipeline_binding rt_bindings[MAX_RTS];
1089    if (stage->key.wm.nr_color_regions > 0) {
1090       assert(stage->key.wm.nr_color_regions <= MAX_RTS);
1091       for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {
1092          if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {
1093             rt_bindings[rt] = (struct anv_pipeline_binding) {
1094                .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1095                .index = rt,
1096             };
1097          } else {
1098             /* Setup a null render target */
1099             rt_bindings[rt] = (struct anv_pipeline_binding) {
1100                .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1101                .index = UINT32_MAX,
1102             };
1103          }
1104       }
1105       num_rt_bindings = stage->key.wm.nr_color_regions;
1106    } else {
1107       /* Setup a null render target */
1108       rt_bindings[0] = (struct anv_pipeline_binding) {
1109          .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,
1110          .index = UINT32_MAX,
1111       };
1112       num_rt_bindings = 1;
1113    }
1114 
1115    assert(num_rt_bindings <= MAX_RTS);
1116    assert(stage->bind_map.surface_count == 0);
1117    typed_memcpy(stage->bind_map.surface_to_descriptor,
1118                 rt_bindings, num_rt_bindings);
1119    stage->bind_map.surface_count += num_rt_bindings;
1120 
1121    /* Now that we've set up the color attachments, we can go through and
1122     * eliminate any shader outputs that map to VK_ATTACHMENT_UNUSED in the
1123     * hopes that dead code can clean them up in this and any earlier shader
1124     * stages.
1125     */
1126    nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);
1127    bool deleted_output = false;
1128    nir_foreach_shader_out_variable_safe(var, stage->nir) {
1129       /* TODO: We don't delete depth/stencil writes.  We probably could if the
1130        * subpass doesn't have a depth/stencil attachment.
1131        */
1132       if (var->data.location < FRAG_RESULT_DATA0)
1133          continue;
1134 
1135       const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
1136 
1137       /* If this is the RT at location 0 and we have alpha to coverage
1138        * enabled we still need that write because it will affect the coverage
1139        * mask even if it's never written to a color target.
1140        */
1141       if (rt == 0 && stage->key.wm.alpha_to_coverage)
1142          continue;
1143 
1144       const unsigned array_len =
1145          glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
1146       assert(rt + array_len <= MAX_RTS);
1147 
1148       if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid &
1149                              BITFIELD_RANGE(rt, array_len))) {
1150          deleted_output = true;
1151          var->data.mode = nir_var_function_temp;
1152          exec_node_remove(&var->node);
1153          exec_list_push_tail(&impl->locals, &var->node);
1154       }
1155    }
1156 
1157    if (deleted_output)
1158       nir_fixup_deref_modes(stage->nir);
1159 
1160    /* Initially the valid outputs value is based off the renderpass color
1161     * attachments (see populate_wm_prog_key()), now that we've potentially
1162     * deleted variables that map to unused attachments, we need to update the
1163     * valid outputs for the backend compiler based on what output variables
1164     * are actually used. */
1165    stage->key.wm.color_outputs_valid = 0;
1166    nir_foreach_shader_out_variable_safe(var, stage->nir) {
1167       if (var->data.location < FRAG_RESULT_DATA0)
1168          continue;
1169 
1170       const unsigned rt = var->data.location - FRAG_RESULT_DATA0;
1171       const unsigned array_len =
1172          glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;
1173       assert(rt + array_len <= MAX_RTS);
1174 
1175       stage->key.wm.color_outputs_valid |= BITFIELD_RANGE(rt, array_len);
1176    }
1177 
1178    /* We stored the number of subpass color attachments in nr_color_regions
1179     * when calculating the key for caching.  Now that we've computed the bind
1180     * map, we can reduce this to the actual max before we go into the back-end
1181     * compiler.
1182     */
1183    stage->key.wm.nr_color_regions =
1184       util_last_bit(stage->key.wm.color_outputs_valid);
1185 }
1186 
1187 static void
anv_pipeline_compile_fs(const struct brw_compiler * compiler,void * mem_ctx,struct anv_device * device,struct anv_pipeline_stage * fs_stage,struct anv_pipeline_stage * prev_stage)1188 anv_pipeline_compile_fs(const struct brw_compiler *compiler,
1189                         void *mem_ctx,
1190                         struct anv_device *device,
1191                         struct anv_pipeline_stage *fs_stage,
1192                         struct anv_pipeline_stage *prev_stage)
1193 {
1194    /* TODO: we could set this to 0 based on the information in nir_shader, but
1195     * we need this before we call spirv_to_nir.
1196     */
1197    assert(prev_stage);
1198    fs_stage->key.wm.input_slots_valid =
1199       prev_stage->prog_data.vue.vue_map.slots_valid;
1200 
1201    struct brw_compile_fs_params params = {
1202       .nir = fs_stage->nir,
1203       .key = &fs_stage->key.wm,
1204       .prog_data = &fs_stage->prog_data.wm,
1205 
1206       .allow_spilling = true,
1207       .stats = fs_stage->stats,
1208       .log_data = device,
1209    };
1210 
1211    fs_stage->code = brw_compile_fs(compiler, mem_ctx, &params);
1212 
1213    fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +
1214                          (uint32_t)fs_stage->prog_data.wm.dispatch_16 +
1215                          (uint32_t)fs_stage->prog_data.wm.dispatch_32;
1216 
1217    if (fs_stage->key.wm.color_outputs_valid == 0 &&
1218        !fs_stage->prog_data.wm.has_side_effects &&
1219        !fs_stage->prog_data.wm.uses_omask &&
1220        !fs_stage->key.wm.alpha_to_coverage &&
1221        !fs_stage->prog_data.wm.uses_kill &&
1222        fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF &&
1223        !fs_stage->prog_data.wm.computed_stencil) {
1224       /* This fragment shader has no outputs and no side effects.  Go ahead
1225        * and return the code pointer so we don't accidentally think the
1226        * compile failed but zero out prog_data which will set program_size to
1227        * zero and disable the stage.
1228        */
1229       memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data));
1230    }
1231 }
1232 
1233 static void
anv_pipeline_add_executable(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,struct brw_compile_stats * stats,uint32_t code_offset)1234 anv_pipeline_add_executable(struct anv_pipeline *pipeline,
1235                             struct anv_pipeline_stage *stage,
1236                             struct brw_compile_stats *stats,
1237                             uint32_t code_offset)
1238 {
1239    char *nir = NULL;
1240    if (stage->nir &&
1241        (pipeline->flags &
1242         VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1243       nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx);
1244    }
1245 
1246    char *disasm = NULL;
1247    if (stage->code &&
1248        (pipeline->flags &
1249         VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {
1250       char *stream_data = NULL;
1251       size_t stream_size = 0;
1252       FILE *stream = open_memstream(&stream_data, &stream_size);
1253 
1254       uint32_t push_size = 0;
1255       for (unsigned i = 0; i < 4; i++)
1256          push_size += stage->bind_map.push_ranges[i].length;
1257       if (push_size > 0) {
1258          fprintf(stream, "Push constant ranges:\n");
1259          for (unsigned i = 0; i < 4; i++) {
1260             if (stage->bind_map.push_ranges[i].length == 0)
1261                continue;
1262 
1263             fprintf(stream, "    RANGE%d (%dB): ", i,
1264                     stage->bind_map.push_ranges[i].length * 32);
1265 
1266             switch (stage->bind_map.push_ranges[i].set) {
1267             case ANV_DESCRIPTOR_SET_NULL:
1268                fprintf(stream, "NULL");
1269                break;
1270 
1271             case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:
1272                fprintf(stream, "Vulkan push constants and API params");
1273                break;
1274 
1275             case ANV_DESCRIPTOR_SET_DESCRIPTORS:
1276                fprintf(stream, "Descriptor buffer for set %d (start=%dB)",
1277                        stage->bind_map.push_ranges[i].index,
1278                        stage->bind_map.push_ranges[i].start * 32);
1279                break;
1280 
1281             case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:
1282                unreachable("gl_NumWorkgroups is never pushed");
1283 
1284             case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:
1285                fprintf(stream, "Inline shader constant data (start=%dB)",
1286                        stage->bind_map.push_ranges[i].start * 32);
1287                break;
1288 
1289             case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:
1290                unreachable("Color attachments can't be pushed");
1291 
1292             default:
1293                fprintf(stream, "UBO (set=%d binding=%d start=%dB)",
1294                        stage->bind_map.push_ranges[i].set,
1295                        stage->bind_map.push_ranges[i].index,
1296                        stage->bind_map.push_ranges[i].start * 32);
1297                break;
1298             }
1299             fprintf(stream, "\n");
1300          }
1301          fprintf(stream, "\n");
1302       }
1303 
1304       /* Creating this is far cheaper than it looks.  It's perfectly fine to
1305        * do it for every binary.
1306        */
1307       intel_disassemble(&pipeline->device->info,
1308                         stage->code, code_offset, stream);
1309 
1310       fclose(stream);
1311 
1312       /* Copy it to a ralloc'd thing */
1313       disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);
1314       memcpy(disasm, stream_data, stream_size);
1315       disasm[stream_size] = 0;
1316 
1317       free(stream_data);
1318    }
1319 
1320    const struct anv_pipeline_executable exe = {
1321       .stage = stage->stage,
1322       .stats = *stats,
1323       .nir = nir,
1324       .disasm = disasm,
1325    };
1326    util_dynarray_append(&pipeline->executables,
1327                         struct anv_pipeline_executable, exe);
1328 }
1329 
1330 static void
anv_pipeline_add_executables(struct anv_pipeline * pipeline,struct anv_pipeline_stage * stage,struct anv_shader_bin * bin)1331 anv_pipeline_add_executables(struct anv_pipeline *pipeline,
1332                              struct anv_pipeline_stage *stage,
1333                              struct anv_shader_bin *bin)
1334 {
1335    if (stage->stage == MESA_SHADER_FRAGMENT) {
1336       /* We pull the prog data and stats out of the anv_shader_bin because
1337        * the anv_pipeline_stage may not be fully populated if we successfully
1338        * looked up the shader in a cache.
1339        */
1340       const struct brw_wm_prog_data *wm_prog_data =
1341          (const struct brw_wm_prog_data *)bin->prog_data;
1342       struct brw_compile_stats *stats = bin->stats;
1343 
1344       if (wm_prog_data->dispatch_8) {
1345          anv_pipeline_add_executable(pipeline, stage, stats++, 0);
1346       }
1347 
1348       if (wm_prog_data->dispatch_16) {
1349          anv_pipeline_add_executable(pipeline, stage, stats++,
1350                                      wm_prog_data->prog_offset_16);
1351       }
1352 
1353       if (wm_prog_data->dispatch_32) {
1354          anv_pipeline_add_executable(pipeline, stage, stats++,
1355                                      wm_prog_data->prog_offset_32);
1356       }
1357    } else {
1358       anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);
1359    }
1360 }
1361 
1362 static enum brw_subgroup_size_type
anv_subgroup_size_type(gl_shader_stage stage,VkPipelineShaderStageCreateFlags flags,const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT * rss_info)1363 anv_subgroup_size_type(gl_shader_stage stage,
1364                        VkPipelineShaderStageCreateFlags flags,
1365                        const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info)
1366 {
1367    enum brw_subgroup_size_type subgroup_size_type;
1368 
1369    if (rss_info) {
1370       assert(stage == MESA_SHADER_COMPUTE);
1371       /* These enum values are expressly chosen to be equal to the subgroup
1372        * size that they require.
1373        */
1374       assert(rss_info->requiredSubgroupSize == 8 ||
1375              rss_info->requiredSubgroupSize == 16 ||
1376              rss_info->requiredSubgroupSize == 32);
1377       subgroup_size_type = rss_info->requiredSubgroupSize;
1378    } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT) {
1379       subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING;
1380    } else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {
1381       assert(stage == MESA_SHADER_COMPUTE);
1382       /* If the client expressly requests full subgroups and they don't
1383        * specify a subgroup size neither allow varying subgroups, we need to
1384        * pick one.  So we specify the API value of 32.  Performance will
1385        * likely be terrible in this case but there's nothing we can do about
1386        * that.  The client should have chosen a size.
1387        */
1388       subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32;
1389    } else {
1390       subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;
1391    }
1392 
1393    return subgroup_size_type;
1394 }
1395 
1396 static void
anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline * pipeline)1397 anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)
1398 {
1399    /* TODO: Cache this pipeline-wide information. */
1400 
1401    if (anv_pipeline_is_primitive(pipeline)) {
1402       /* Primitive replication depends on information from all the shaders.
1403        * Recover this bit from the fact that we have more than one position slot
1404        * in the vertex shader when using it.
1405        */
1406       assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1407       int pos_slots = 0;
1408       const struct brw_vue_prog_data *vue_prog_data =
1409          (const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data;
1410       const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
1411       for (int i = 0; i < vue_map->num_slots; i++) {
1412          if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS)
1413             pos_slots++;
1414       }
1415       pipeline->use_primitive_replication = pos_slots > 1;
1416    }
1417 }
1418 
1419 static VkResult
anv_pipeline_compile_graphics(struct anv_graphics_pipeline * pipeline,struct anv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * info)1420 anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
1421                               struct anv_pipeline_cache *cache,
1422                               const VkGraphicsPipelineCreateInfo *info)
1423 {
1424    VkPipelineCreationFeedbackEXT pipeline_feedback = {
1425       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1426    };
1427    int64_t pipeline_start = os_time_get_nano();
1428 
1429    const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1430    struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};
1431 
1432    /* Information on which states are considered dynamic. */
1433    const VkPipelineDynamicStateCreateInfo *dyn_info =
1434       info->pDynamicState;
1435    uint32_t dynamic_states = 0;
1436    if (dyn_info) {
1437       for (unsigned i = 0; i < dyn_info->dynamicStateCount; i++)
1438          dynamic_states |=
1439             anv_cmd_dirty_bit_for_vk_dynamic_state(dyn_info->pDynamicStates[i]);
1440    }
1441 
1442    VkResult result;
1443    for (uint32_t i = 0; i < info->stageCount; i++) {
1444       const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
1445       gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);
1446 
1447       int64_t stage_start = os_time_get_nano();
1448 
1449       stages[stage].stage = stage;
1450       stages[stage].module = vk_shader_module_from_handle(sinfo->module);
1451       stages[stage].entrypoint = sinfo->pName;
1452       stages[stage].spec_info = sinfo->pSpecializationInfo;
1453       anv_pipeline_hash_shader(stages[stage].module,
1454                                stages[stage].entrypoint,
1455                                stage,
1456                                stages[stage].spec_info,
1457                                stages[stage].shader_sha1);
1458 
1459       enum brw_subgroup_size_type subgroup_size_type =
1460          anv_subgroup_size_type(stage, sinfo->flags, NULL);
1461 
1462       const struct intel_device_info *devinfo = &pipeline->base.device->info;
1463       switch (stage) {
1464       case MESA_SHADER_VERTEX:
1465          populate_vs_prog_key(devinfo, subgroup_size_type,
1466                               pipeline->base.device->robust_buffer_access,
1467                               &stages[stage].key.vs);
1468          break;
1469       case MESA_SHADER_TESS_CTRL:
1470          populate_tcs_prog_key(devinfo, subgroup_size_type,
1471                                pipeline->base.device->robust_buffer_access,
1472                                info->pTessellationState->patchControlPoints,
1473                                &stages[stage].key.tcs);
1474          break;
1475       case MESA_SHADER_TESS_EVAL:
1476          populate_tes_prog_key(devinfo, subgroup_size_type,
1477                                pipeline->base.device->robust_buffer_access,
1478                                &stages[stage].key.tes);
1479          break;
1480       case MESA_SHADER_GEOMETRY:
1481          populate_gs_prog_key(devinfo, subgroup_size_type,
1482                               pipeline->base.device->robust_buffer_access,
1483                               &stages[stage].key.gs);
1484          break;
1485       case MESA_SHADER_FRAGMENT: {
1486          const bool raster_enabled =
1487             !info->pRasterizationState->rasterizerDiscardEnable ||
1488             dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
1489          populate_wm_prog_key(pipeline, subgroup_size_type,
1490                               pipeline->base.device->robust_buffer_access,
1491                               pipeline->subpass,
1492                               raster_enabled ? info->pMultisampleState : NULL,
1493                               vk_find_struct_const(info->pNext,
1494                                                    PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR),
1495                               &stages[stage].key.wm);
1496          break;
1497       }
1498       default:
1499          unreachable("Invalid graphics shader stage");
1500       }
1501 
1502       stages[stage].feedback.duration += os_time_get_nano() - stage_start;
1503       stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;
1504    }
1505 
1506    assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);
1507 
1508    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1509 
1510    unsigned char sha1[20];
1511    anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);
1512 
1513    for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1514       if (!stages[s].entrypoint)
1515          continue;
1516 
1517       stages[s].cache_key.stage = s;
1518       memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));
1519    }
1520 
1521    const bool skip_cache_lookup =
1522       (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1523 
1524    if (!skip_cache_lookup) {
1525       unsigned found = 0;
1526       unsigned cache_hits = 0;
1527       for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1528          if (!stages[s].entrypoint)
1529             continue;
1530 
1531          int64_t stage_start = os_time_get_nano();
1532 
1533          bool cache_hit;
1534          struct anv_shader_bin *bin =
1535             anv_device_search_for_kernel(pipeline->base.device, cache,
1536                                          &stages[s].cache_key,
1537                                          sizeof(stages[s].cache_key), &cache_hit);
1538          if (bin) {
1539             found++;
1540             pipeline->shaders[s] = bin;
1541          }
1542 
1543          if (cache_hit) {
1544             cache_hits++;
1545             stages[s].feedback.flags |=
1546                VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1547          }
1548          stages[s].feedback.duration += os_time_get_nano() - stage_start;
1549       }
1550 
1551       if (found == __builtin_popcount(pipeline->active_stages)) {
1552          if (cache_hits == found) {
1553             pipeline_feedback.flags |=
1554                VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1555          }
1556          /* We found all our shaders in the cache.  We're done. */
1557          for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1558             if (!stages[s].entrypoint)
1559                continue;
1560 
1561             anv_pipeline_add_executables(&pipeline->base, &stages[s],
1562                                          pipeline->shaders[s]);
1563          }
1564          anv_pipeline_init_from_cached_graphics(pipeline);
1565          goto done;
1566       } else if (found > 0) {
1567          /* We found some but not all of our shaders.  This shouldn't happen
1568           * most of the time but it can if we have a partially populated
1569           * pipeline cache.
1570           */
1571          assert(found < __builtin_popcount(pipeline->active_stages));
1572 
1573          vk_perf(VK_LOG_OBJS(&cache->base),
1574                  "Found a partial pipeline in the cache.  This is "
1575                  "most likely caused by an incomplete pipeline cache "
1576                  "import or export");
1577 
1578          /* We're going to have to recompile anyway, so just throw away our
1579           * references to the shaders in the cache.  We'll get them out of the
1580           * cache again as part of the compilation process.
1581           */
1582          for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1583             stages[s].feedback.flags = 0;
1584             if (pipeline->shaders[s]) {
1585                anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1586                pipeline->shaders[s] = NULL;
1587             }
1588          }
1589       }
1590    }
1591 
1592    if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)
1593       return VK_PIPELINE_COMPILE_REQUIRED_EXT;
1594 
1595    void *pipeline_ctx = ralloc_context(NULL);
1596 
1597    for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1598       if (!stages[s].entrypoint)
1599          continue;
1600 
1601       int64_t stage_start = os_time_get_nano();
1602 
1603       assert(stages[s].stage == s);
1604       assert(pipeline->shaders[s] == NULL);
1605 
1606       stages[s].bind_map = (struct anv_pipeline_bind_map) {
1607          .surface_to_descriptor = stages[s].surface_to_descriptor,
1608          .sampler_to_descriptor = stages[s].sampler_to_descriptor
1609       };
1610 
1611       stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
1612                                                  pipeline_ctx,
1613                                                  &stages[s]);
1614       if (stages[s].nir == NULL) {
1615          result = vk_error(pipeline, VK_ERROR_UNKNOWN);
1616          goto fail;
1617       }
1618 
1619       /* This is rather ugly.
1620        *
1621        * Any variable annotated as interpolated by sample essentially disables
1622        * coarse pixel shading. Unfortunately the CTS tests exercising this set
1623        * the varying value in the previous stage using a constant. Our NIR
1624        * infrastructure is clever enough to lookup variables across stages and
1625        * constant fold, removing the variable. So in order to comply with CTS
1626        * we have check variables here.
1627        */
1628       if (s == MESA_SHADER_FRAGMENT) {
1629          nir_foreach_variable_in_list(var, &stages[s].nir->variables) {
1630             if (var->data.sample) {
1631                stages[s].key.wm.coarse_pixel = false;
1632                break;
1633             }
1634          }
1635       }
1636 
1637       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1638    }
1639 
1640    /* Walk backwards to link */
1641    struct anv_pipeline_stage *next_stage = NULL;
1642    for (int s = ARRAY_SIZE(pipeline->shaders) - 1; s >= 0; s--) {
1643       if (!stages[s].entrypoint)
1644          continue;
1645 
1646       switch (s) {
1647       case MESA_SHADER_VERTEX:
1648          anv_pipeline_link_vs(compiler, &stages[s], next_stage);
1649          break;
1650       case MESA_SHADER_TESS_CTRL:
1651          anv_pipeline_link_tcs(compiler, &stages[s], next_stage);
1652          break;
1653       case MESA_SHADER_TESS_EVAL:
1654          anv_pipeline_link_tes(compiler, &stages[s], next_stage);
1655          break;
1656       case MESA_SHADER_GEOMETRY:
1657          anv_pipeline_link_gs(compiler, &stages[s], next_stage);
1658          break;
1659       case MESA_SHADER_FRAGMENT:
1660          anv_pipeline_link_fs(compiler, &stages[s]);
1661          break;
1662       default:
1663          unreachable("Invalid graphics shader stage");
1664       }
1665 
1666       next_stage = &stages[s];
1667    }
1668 
1669    if (pipeline->base.device->info.ver >= 12 &&
1670        pipeline->subpass->view_mask != 0) {
1671       /* For some pipelines HW Primitive Replication can be used instead of
1672        * instancing to implement Multiview.  This depend on how viewIndex is
1673        * used in all the active shaders, so this check can't be done per
1674        * individual shaders.
1675        */
1676       nir_shader *shaders[MESA_SHADER_STAGES] = {};
1677       for (unsigned s = 0; s < MESA_SHADER_STAGES; s++)
1678          shaders[s] = stages[s].nir;
1679 
1680       pipeline->use_primitive_replication =
1681          anv_check_for_primitive_replication(shaders, pipeline);
1682    } else {
1683       pipeline->use_primitive_replication = false;
1684    }
1685 
1686    struct anv_pipeline_stage *prev_stage = NULL;
1687    for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1688       if (!stages[s].entrypoint)
1689          continue;
1690 
1691       int64_t stage_start = os_time_get_nano();
1692 
1693       void *stage_ctx = ralloc_context(NULL);
1694 
1695       anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);
1696 
1697       if (prev_stage && compiler->glsl_compiler_options[s].NirOptions->unify_interfaces) {
1698          prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read &
1699                   ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1700          stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written &
1701                   ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
1702          prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read;
1703          stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written;
1704       }
1705 
1706       ralloc_free(stage_ctx);
1707 
1708       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1709 
1710       prev_stage = &stages[s];
1711    }
1712 
1713    prev_stage = NULL;
1714    for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
1715       if (!stages[s].entrypoint)
1716          continue;
1717 
1718       int64_t stage_start = os_time_get_nano();
1719 
1720       void *stage_ctx = ralloc_context(NULL);
1721 
1722       nir_xfb_info *xfb_info = NULL;
1723       if (s == MESA_SHADER_VERTEX ||
1724           s == MESA_SHADER_TESS_EVAL ||
1725           s == MESA_SHADER_GEOMETRY)
1726          xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx);
1727 
1728       switch (s) {
1729       case MESA_SHADER_VERTEX:
1730          anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,
1731                                  &stages[s]);
1732          break;
1733       case MESA_SHADER_TESS_CTRL:
1734          anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,
1735                                   &stages[s], prev_stage);
1736          break;
1737       case MESA_SHADER_TESS_EVAL:
1738          anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,
1739                                   &stages[s], prev_stage);
1740          break;
1741       case MESA_SHADER_GEOMETRY:
1742          anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,
1743                                  &stages[s], prev_stage);
1744          break;
1745       case MESA_SHADER_FRAGMENT:
1746          anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,
1747                                  &stages[s], prev_stage);
1748          break;
1749       default:
1750          unreachable("Invalid graphics shader stage");
1751       }
1752       if (stages[s].code == NULL) {
1753          ralloc_free(stage_ctx);
1754          result = vk_error(pipeline->base.device, VK_ERROR_OUT_OF_HOST_MEMORY);
1755          goto fail;
1756       }
1757 
1758       anv_nir_validate_push_layout(&stages[s].prog_data.base,
1759                                    &stages[s].bind_map);
1760 
1761       struct anv_shader_bin *bin =
1762          anv_device_upload_kernel(pipeline->base.device, cache, s,
1763                                   &stages[s].cache_key,
1764                                   sizeof(stages[s].cache_key),
1765                                   stages[s].code,
1766                                   stages[s].prog_data.base.program_size,
1767                                   &stages[s].prog_data.base,
1768                                   brw_prog_data_size(s),
1769                                   stages[s].stats, stages[s].num_stats,
1770                                   xfb_info, &stages[s].bind_map);
1771       if (!bin) {
1772          ralloc_free(stage_ctx);
1773          result = vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1774          goto fail;
1775       }
1776 
1777       anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);
1778 
1779       pipeline->shaders[s] = bin;
1780       ralloc_free(stage_ctx);
1781 
1782       stages[s].feedback.duration += os_time_get_nano() - stage_start;
1783 
1784       prev_stage = &stages[s];
1785    }
1786 
1787    ralloc_free(pipeline_ctx);
1788 
1789 done:
1790 
1791    if (pipeline->shaders[MESA_SHADER_FRAGMENT] &&
1792        pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) {
1793       /* This can happen if we decided to implicitly disable the fragment
1794        * shader.  See anv_pipeline_compile_fs().
1795        */
1796       anv_shader_bin_unref(pipeline->base.device,
1797                            pipeline->shaders[MESA_SHADER_FRAGMENT]);
1798       pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL;
1799       pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;
1800    }
1801 
1802    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1803 
1804    const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1805       vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1806    if (create_feedback) {
1807       *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1808 
1809       assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
1810       for (uint32_t i = 0; i < info->stageCount; i++) {
1811          gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
1812          create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
1813       }
1814    }
1815 
1816    return VK_SUCCESS;
1817 
1818 fail:
1819    ralloc_free(pipeline_ctx);
1820 
1821    for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {
1822       if (pipeline->shaders[s])
1823          anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);
1824    }
1825 
1826    return result;
1827 }
1828 
1829 VkResult
anv_pipeline_compile_cs(struct anv_compute_pipeline * pipeline,struct anv_pipeline_cache * cache,const VkComputePipelineCreateInfo * info,const struct vk_shader_module * module,const char * entrypoint,const VkSpecializationInfo * spec_info)1830 anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
1831                         struct anv_pipeline_cache *cache,
1832                         const VkComputePipelineCreateInfo *info,
1833                         const struct vk_shader_module *module,
1834                         const char *entrypoint,
1835                         const VkSpecializationInfo *spec_info)
1836 {
1837    VkPipelineCreationFeedbackEXT pipeline_feedback = {
1838       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1839    };
1840    int64_t pipeline_start = os_time_get_nano();
1841 
1842    const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;
1843 
1844    struct anv_pipeline_stage stage = {
1845       .stage = MESA_SHADER_COMPUTE,
1846       .module = module,
1847       .entrypoint = entrypoint,
1848       .spec_info = spec_info,
1849       .cache_key = {
1850          .stage = MESA_SHADER_COMPUTE,
1851       },
1852       .feedback = {
1853          .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
1854       },
1855    };
1856    anv_pipeline_hash_shader(stage.module,
1857                             stage.entrypoint,
1858                             MESA_SHADER_COMPUTE,
1859                             stage.spec_info,
1860                             stage.shader_sha1);
1861 
1862    struct anv_shader_bin *bin = NULL;
1863 
1864    const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info =
1865       vk_find_struct_const(info->stage.pNext,
1866                            PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
1867 
1868    const enum brw_subgroup_size_type subgroup_size_type =
1869       anv_subgroup_size_type(MESA_SHADER_COMPUTE, info->stage.flags, rss_info);
1870 
1871    populate_cs_prog_key(&pipeline->base.device->info, subgroup_size_type,
1872                         pipeline->base.device->robust_buffer_access,
1873                         &stage.key.cs);
1874 
1875    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
1876 
1877    const bool skip_cache_lookup =
1878       (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
1879 
1880    anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);
1881 
1882    bool cache_hit = false;
1883    if (!skip_cache_lookup) {
1884       bin = anv_device_search_for_kernel(pipeline->base.device, cache,
1885                                          &stage.cache_key,
1886                                          sizeof(stage.cache_key),
1887                                          &cache_hit);
1888    }
1889 
1890    if (bin == NULL &&
1891        (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT))
1892       return VK_PIPELINE_COMPILE_REQUIRED_EXT;
1893 
1894    void *mem_ctx = ralloc_context(NULL);
1895    if (bin == NULL) {
1896       int64_t stage_start = os_time_get_nano();
1897 
1898       stage.bind_map = (struct anv_pipeline_bind_map) {
1899          .surface_to_descriptor = stage.surface_to_descriptor,
1900          .sampler_to_descriptor = stage.sampler_to_descriptor
1901       };
1902 
1903       /* Set up a binding for the gl_NumWorkGroups */
1904       stage.bind_map.surface_count = 1;
1905       stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {
1906          .set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,
1907       };
1908 
1909       stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);
1910       if (stage.nir == NULL) {
1911          ralloc_free(mem_ctx);
1912          return vk_error(pipeline, VK_ERROR_UNKNOWN);
1913       }
1914 
1915       NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id);
1916 
1917       anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);
1918 
1919       NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);
1920 
1921       stage.num_stats = 1;
1922 
1923       struct brw_compile_cs_params params = {
1924          .nir = stage.nir,
1925          .key = &stage.key.cs,
1926          .prog_data = &stage.prog_data.cs,
1927          .stats = stage.stats,
1928          .log_data = pipeline->base.device,
1929       };
1930 
1931       stage.code = brw_compile_cs(compiler, mem_ctx, &params);
1932       if (stage.code == NULL) {
1933          ralloc_free(mem_ctx);
1934          return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1935       }
1936 
1937       anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);
1938 
1939       if (!stage.prog_data.cs.uses_num_work_groups) {
1940          assert(stage.bind_map.surface_to_descriptor[0].set ==
1941                 ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);
1942          stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;
1943       }
1944 
1945       const unsigned code_size = stage.prog_data.base.program_size;
1946       bin = anv_device_upload_kernel(pipeline->base.device, cache,
1947                                      MESA_SHADER_COMPUTE,
1948                                      &stage.cache_key, sizeof(stage.cache_key),
1949                                      stage.code, code_size,
1950                                      &stage.prog_data.base,
1951                                      sizeof(stage.prog_data.cs),
1952                                      stage.stats, stage.num_stats,
1953                                      NULL, &stage.bind_map);
1954       if (!bin) {
1955          ralloc_free(mem_ctx);
1956          return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
1957       }
1958 
1959       stage.feedback.duration = os_time_get_nano() - stage_start;
1960    }
1961 
1962    anv_pipeline_add_executables(&pipeline->base, &stage, bin);
1963 
1964    ralloc_free(mem_ctx);
1965 
1966    if (cache_hit) {
1967       stage.feedback.flags |=
1968          VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1969       pipeline_feedback.flags |=
1970          VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
1971    }
1972    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
1973 
1974    const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
1975       vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
1976    if (create_feedback) {
1977       *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
1978 
1979       assert(create_feedback->pipelineStageCreationFeedbackCount == 1);
1980       create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;
1981    }
1982 
1983    pipeline->cs = bin;
1984 
1985    return VK_SUCCESS;
1986 }
1987 
1988 /**
1989  * Copy pipeline state not marked as dynamic.
1990  * Dynamic state is pipeline state which hasn't been provided at pipeline
1991  * creation time, but is dynamically provided afterwards using various
1992  * vkCmdSet* functions.
1993  *
1994  * The set of state considered "non_dynamic" is determined by the pieces of
1995  * state that have their corresponding VkDynamicState enums omitted from
1996  * VkPipelineDynamicStateCreateInfo::pDynamicStates.
1997  *
1998  * @param[out] pipeline    Destination non_dynamic state.
1999  * @param[in]  pCreateInfo Source of non_dynamic state to be copied.
2000  */
2001 static void
copy_non_dynamic_state(struct anv_graphics_pipeline * pipeline,const VkGraphicsPipelineCreateInfo * pCreateInfo)2002 copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
2003                        const VkGraphicsPipelineCreateInfo *pCreateInfo)
2004 {
2005    anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;
2006    struct anv_subpass *subpass = pipeline->subpass;
2007 
2008    pipeline->dynamic_state = default_dynamic_state;
2009 
2010    states &= ~pipeline->dynamic_states;
2011 
2012    struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;
2013 
2014    bool raster_discard =
2015       pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
2016       !(pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
2017 
2018    /* Section 9.2 of the Vulkan 1.0.15 spec says:
2019     *
2020     *    pViewportState is [...] NULL if the pipeline
2021     *    has rasterization disabled.
2022     */
2023    if (!raster_discard) {
2024       assert(pCreateInfo->pViewportState);
2025 
2026       dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
2027       if (states & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
2028          typed_memcpy(dynamic->viewport.viewports,
2029                      pCreateInfo->pViewportState->pViewports,
2030                      pCreateInfo->pViewportState->viewportCount);
2031       }
2032 
2033       dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
2034       if (states & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) {
2035          typed_memcpy(dynamic->scissor.scissors,
2036                      pCreateInfo->pViewportState->pScissors,
2037                      pCreateInfo->pViewportState->scissorCount);
2038       }
2039    }
2040 
2041    if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) {
2042       assert(pCreateInfo->pRasterizationState);
2043       dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;
2044    }
2045 
2046    if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) {
2047       assert(pCreateInfo->pRasterizationState);
2048       dynamic->depth_bias.bias =
2049          pCreateInfo->pRasterizationState->depthBiasConstantFactor;
2050       dynamic->depth_bias.clamp =
2051          pCreateInfo->pRasterizationState->depthBiasClamp;
2052       dynamic->depth_bias.slope =
2053          pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
2054    }
2055 
2056    if (states & ANV_CMD_DIRTY_DYNAMIC_CULL_MODE) {
2057       assert(pCreateInfo->pRasterizationState);
2058       dynamic->cull_mode =
2059          pCreateInfo->pRasterizationState->cullMode;
2060    }
2061 
2062    if (states & ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE) {
2063       assert(pCreateInfo->pRasterizationState);
2064       dynamic->front_face =
2065          pCreateInfo->pRasterizationState->frontFace;
2066    }
2067 
2068    if ((states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) &&
2069          (pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
2070       assert(pCreateInfo->pInputAssemblyState);
2071       dynamic->primitive_topology = pCreateInfo->pInputAssemblyState->topology;
2072    }
2073 
2074    if (states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
2075       assert(pCreateInfo->pRasterizationState);
2076       dynamic->raster_discard =
2077          pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
2078    }
2079 
2080    if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE) {
2081       assert(pCreateInfo->pRasterizationState);
2082       dynamic->depth_bias_enable =
2083          pCreateInfo->pRasterizationState->depthBiasEnable;
2084    }
2085 
2086    if ((states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE) &&
2087          (pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) {
2088       assert(pCreateInfo->pInputAssemblyState);
2089       dynamic->primitive_restart_enable =
2090          pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
2091    }
2092 
2093    /* Section 9.2 of the Vulkan 1.0.15 spec says:
2094     *
2095     *    pColorBlendState is [...] NULL if the pipeline has rasterization
2096     *    disabled or if the subpass of the render pass the pipeline is
2097     *    created against does not use any color attachments.
2098     */
2099    bool uses_color_att = false;
2100    for (unsigned i = 0; i < subpass->color_count; ++i) {
2101       if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
2102          uses_color_att = true;
2103          break;
2104       }
2105    }
2106 
2107    if (uses_color_att && !raster_discard) {
2108       assert(pCreateInfo->pColorBlendState);
2109 
2110       if (states & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
2111          typed_memcpy(dynamic->blend_constants,
2112                      pCreateInfo->pColorBlendState->blendConstants, 4);
2113    }
2114 
2115    /* If there is no depthstencil attachment, then don't read
2116     * pDepthStencilState. The Vulkan spec states that pDepthStencilState may
2117     * be NULL in this case. Even if pDepthStencilState is non-NULL, there is
2118     * no need to override the depthstencil defaults in
2119     * anv_pipeline::dynamic_state when there is no depthstencil attachment.
2120     *
2121     * Section 9.2 of the Vulkan 1.0.15 spec says:
2122     *
2123     *    pDepthStencilState is [...] NULL if the pipeline has rasterization
2124     *    disabled or if the subpass of the render pass the pipeline is created
2125     *    against does not use a depth/stencil attachment.
2126     */
2127    if (!raster_discard && subpass->depth_stencil_attachment) {
2128       assert(pCreateInfo->pDepthStencilState);
2129 
2130       if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) {
2131          dynamic->depth_bounds.min =
2132             pCreateInfo->pDepthStencilState->minDepthBounds;
2133          dynamic->depth_bounds.max =
2134             pCreateInfo->pDepthStencilState->maxDepthBounds;
2135       }
2136 
2137       if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) {
2138          dynamic->stencil_compare_mask.front =
2139             pCreateInfo->pDepthStencilState->front.compareMask;
2140          dynamic->stencil_compare_mask.back =
2141             pCreateInfo->pDepthStencilState->back.compareMask;
2142       }
2143 
2144       if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) {
2145          dynamic->stencil_write_mask.front =
2146             pCreateInfo->pDepthStencilState->front.writeMask;
2147          dynamic->stencil_write_mask.back =
2148             pCreateInfo->pDepthStencilState->back.writeMask;
2149       }
2150 
2151       if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) {
2152          dynamic->stencil_reference.front =
2153             pCreateInfo->pDepthStencilState->front.reference;
2154          dynamic->stencil_reference.back =
2155             pCreateInfo->pDepthStencilState->back.reference;
2156       }
2157 
2158       if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) {
2159          dynamic->depth_test_enable =
2160             pCreateInfo->pDepthStencilState->depthTestEnable;
2161       }
2162 
2163       if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) {
2164          dynamic->depth_write_enable =
2165             pCreateInfo->pDepthStencilState->depthWriteEnable;
2166       }
2167 
2168       if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) {
2169          dynamic->depth_compare_op =
2170             pCreateInfo->pDepthStencilState->depthCompareOp;
2171       }
2172 
2173       if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
2174          dynamic->depth_bounds_test_enable =
2175             pCreateInfo->pDepthStencilState->depthBoundsTestEnable;
2176       }
2177 
2178       if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) {
2179          dynamic->stencil_test_enable =
2180             pCreateInfo->pDepthStencilState->stencilTestEnable;
2181       }
2182 
2183       if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP) {
2184          const VkPipelineDepthStencilStateCreateInfo *info =
2185             pCreateInfo->pDepthStencilState;
2186          memcpy(&dynamic->stencil_op.front, &info->front,
2187                 sizeof(dynamic->stencil_op.front));
2188          memcpy(&dynamic->stencil_op.back, &info->back,
2189                 sizeof(dynamic->stencil_op.back));
2190       }
2191    }
2192 
2193    const VkPipelineRasterizationLineStateCreateInfoEXT *line_state =
2194       vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2195                            PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
2196    if (!raster_discard && line_state && line_state->stippledLineEnable) {
2197       if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) {
2198          dynamic->line_stipple.factor = line_state->lineStippleFactor;
2199          dynamic->line_stipple.pattern = line_state->lineStipplePattern;
2200       }
2201    }
2202 
2203    const VkPipelineMultisampleStateCreateInfo *ms_info =
2204       pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? NULL :
2205       pCreateInfo->pMultisampleState;
2206    if (states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {
2207       const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ?
2208          vk_find_struct_const(ms_info, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL;
2209 
2210       if (sl_info) {
2211          dynamic->sample_locations.samples =
2212             sl_info->sampleLocationsInfo.sampleLocationsCount;
2213          const VkSampleLocationEXT *positions =
2214             sl_info->sampleLocationsInfo.pSampleLocations;
2215          for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) {
2216             dynamic->sample_locations.locations[i].x = positions[i].x;
2217             dynamic->sample_locations.locations[i].y = positions[i].y;
2218          }
2219       }
2220    }
2221    /* Ensure we always have valid values for sample_locations. */
2222    if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
2223        dynamic->sample_locations.samples == 0) {
2224       dynamic->sample_locations.samples =
2225          ms_info ? ms_info->rasterizationSamples : 1;
2226       const struct intel_sample_position *positions =
2227          intel_get_sample_positions(dynamic->sample_locations.samples);
2228       for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) {
2229          dynamic->sample_locations.locations[i].x = positions[i].x;
2230          dynamic->sample_locations.locations[i].y = positions[i].y;
2231       }
2232    }
2233 
2234    if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {
2235       if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
2236           uses_color_att) {
2237          assert(pCreateInfo->pColorBlendState);
2238          const VkPipelineColorWriteCreateInfoEXT *color_write_info =
2239             vk_find_struct_const(pCreateInfo->pColorBlendState->pNext,
2240                                  PIPELINE_COLOR_WRITE_CREATE_INFO_EXT);
2241 
2242          if (color_write_info) {
2243             dynamic->color_writes = 0;
2244             for (uint32_t i = 0; i < color_write_info->attachmentCount; i++) {
2245                dynamic->color_writes |=
2246                   color_write_info->pColorWriteEnables[i] ? (1u << i) : 0;
2247             }
2248          }
2249       }
2250    }
2251 
2252    const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_state =
2253       vk_find_struct_const(pCreateInfo->pNext,
2254                            PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR);
2255    if (fsr_state) {
2256       if (states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE)
2257          dynamic->fragment_shading_rate = fsr_state->fragmentSize;
2258    }
2259 
2260    pipeline->dynamic_state_mask = states;
2261 
2262    /* Mark states that can either be dynamic or fully baked into the pipeline.
2263     */
2264    pipeline->static_state_mask = states &
2265       (ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS |
2266        ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |
2267        ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE |
2268        ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE |
2269        ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP |
2270        ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY);
2271 }
2272 
2273 static void
anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo * info)2274 anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)
2275 {
2276 #ifdef DEBUG
2277    struct anv_render_pass *renderpass = NULL;
2278    struct anv_subpass *subpass = NULL;
2279 
2280    /* Assert that all required members of VkGraphicsPipelineCreateInfo are
2281     * present.  See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.
2282     */
2283    assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);
2284 
2285    renderpass = anv_render_pass_from_handle(info->renderPass);
2286    assert(renderpass);
2287 
2288    assert(info->subpass < renderpass->subpass_count);
2289    subpass = &renderpass->subpasses[info->subpass];
2290 
2291    assert(info->stageCount >= 1);
2292    assert(info->pRasterizationState);
2293    if (!info->pRasterizationState->rasterizerDiscardEnable) {
2294       assert(info->pViewportState);
2295       assert(info->pMultisampleState);
2296 
2297       if (subpass && subpass->depth_stencil_attachment)
2298          assert(info->pDepthStencilState);
2299 
2300       if (subpass && subpass->color_count > 0) {
2301          bool all_color_unused = true;
2302          for (int i = 0; i < subpass->color_count; i++) {
2303             if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
2304                all_color_unused = false;
2305          }
2306          /* pColorBlendState is ignored if the pipeline has rasterization
2307           * disabled or if the subpass of the render pass the pipeline is
2308           * created against does not use any color attachments.
2309           */
2310          assert(info->pColorBlendState || all_color_unused);
2311       }
2312    }
2313 
2314    for (uint32_t i = 0; i < info->stageCount; ++i) {
2315       switch (info->pStages[i].stage) {
2316       case VK_SHADER_STAGE_VERTEX_BIT:
2317          assert(info->pVertexInputState);
2318          assert(info->pInputAssemblyState);
2319          break;
2320       case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
2321       case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
2322          assert(info->pTessellationState);
2323          break;
2324       default:
2325          break;
2326       }
2327    }
2328 #endif
2329 }
2330 
2331 /**
2332  * Calculate the desired L3 partitioning based on the current state of the
2333  * pipeline.  For now this simply returns the conservative defaults calculated
2334  * by get_default_l3_weights(), but we could probably do better by gathering
2335  * more statistics from the pipeline state (e.g. guess of expected URB usage
2336  * and bound surfaces), or by using feed-back from performance counters.
2337  */
2338 void
anv_pipeline_setup_l3_config(struct anv_pipeline * pipeline,bool needs_slm)2339 anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)
2340 {
2341    const struct intel_device_info *devinfo = &pipeline->device->info;
2342 
2343    const struct intel_l3_weights w =
2344       intel_get_default_l3_weights(devinfo, true, needs_slm);
2345 
2346    pipeline->l3_config = intel_get_l3_config(devinfo, w);
2347 }
2348 
2349 static VkLineRasterizationModeEXT
vk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT * line_info,const VkPipelineMultisampleStateCreateInfo * ms_info)2350 vk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT *line_info,
2351                            const VkPipelineMultisampleStateCreateInfo *ms_info)
2352 {
2353    VkLineRasterizationModeEXT line_mode =
2354       line_info ? line_info->lineRasterizationMode :
2355                   VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT;
2356 
2357    if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) {
2358       if (ms_info && ms_info->rasterizationSamples > 1) {
2359          return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;
2360       } else {
2361          return VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;
2362       }
2363    }
2364 
2365    return line_mode;
2366 }
2367 
2368 VkResult
anv_graphics_pipeline_init(struct anv_graphics_pipeline * pipeline,struct anv_device * device,struct anv_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * pCreateInfo,const VkAllocationCallbacks * alloc)2369 anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,
2370                            struct anv_device *device,
2371                            struct anv_pipeline_cache *cache,
2372                            const VkGraphicsPipelineCreateInfo *pCreateInfo,
2373                            const VkAllocationCallbacks *alloc)
2374 {
2375    VkResult result;
2376 
2377    anv_pipeline_validate_create_info(pCreateInfo);
2378 
2379    result = anv_pipeline_init(&pipeline->base, device,
2380                               ANV_PIPELINE_GRAPHICS, pCreateInfo->flags,
2381                               alloc);
2382    if (result != VK_SUCCESS)
2383       return result;
2384 
2385    anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,
2386                          pipeline->batch_data, sizeof(pipeline->batch_data));
2387 
2388    ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass);
2389    assert(pCreateInfo->subpass < render_pass->subpass_count);
2390    pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
2391 
2392    assert(pCreateInfo->pRasterizationState);
2393 
2394    if (pCreateInfo->pDynamicState) {
2395       /* Remove all of the states that are marked as dynamic */
2396       uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;
2397       for (uint32_t s = 0; s < count; s++) {
2398          pipeline->dynamic_states |= anv_cmd_dirty_bit_for_vk_dynamic_state(
2399             pCreateInfo->pDynamicState->pDynamicStates[s]);
2400       }
2401    }
2402 
2403    pipeline->active_stages = 0;
2404    for (uint32_t i = 0; i < pCreateInfo->stageCount; i++)
2405       pipeline->active_stages |= pCreateInfo->pStages[i].stage;
2406 
2407    if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
2408       pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2409 
2410    copy_non_dynamic_state(pipeline, pCreateInfo);
2411 
2412    pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState->depthClampEnable;
2413 
2414    /* Previously we enabled depth clipping when !depthClampEnable.
2415     * DepthClipStateCreateInfo now makes depth clipping explicit so if the
2416     * clipping info is available, use its enable value to determine clipping,
2417     * otherwise fallback to the previous !depthClampEnable logic.
2418     */
2419    const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
2420       vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2421                            PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
2422    pipeline->depth_clip_enable = clip_info ? clip_info->depthClipEnable : !pipeline->depth_clamp_enable;
2423 
2424    pipeline->sample_shading_enable =
2425       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
2426       pCreateInfo->pMultisampleState &&
2427       pCreateInfo->pMultisampleState->sampleShadingEnable;
2428 
2429    result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);
2430    if (result != VK_SUCCESS) {
2431       anv_pipeline_finish(&pipeline->base, device, alloc);
2432       return result;
2433    }
2434 
2435    anv_pipeline_setup_l3_config(&pipeline->base, false);
2436 
2437    if (anv_pipeline_is_primitive(pipeline)) {
2438       const VkPipelineVertexInputStateCreateInfo *vi_info =
2439          pCreateInfo->pVertexInputState;
2440 
2441       const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;
2442 
2443       for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
2444          const VkVertexInputAttributeDescription *desc =
2445             &vi_info->pVertexAttributeDescriptions[i];
2446 
2447          if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location)))
2448             pipeline->vb_used |= 1 << desc->binding;
2449       }
2450 
2451       for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
2452          const VkVertexInputBindingDescription *desc =
2453             &vi_info->pVertexBindingDescriptions[i];
2454 
2455          pipeline->vb[desc->binding].stride = desc->stride;
2456 
2457          /* Step rate is programmed per vertex element (attribute), not
2458           * binding. Set up a map of which bindings step per instance, for
2459           * reference by vertex element setup. */
2460          switch (desc->inputRate) {
2461          default:
2462          case VK_VERTEX_INPUT_RATE_VERTEX:
2463             pipeline->vb[desc->binding].instanced = false;
2464             break;
2465          case VK_VERTEX_INPUT_RATE_INSTANCE:
2466             pipeline->vb[desc->binding].instanced = true;
2467             break;
2468          }
2469 
2470          pipeline->vb[desc->binding].instance_divisor = 1;
2471       }
2472 
2473       const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state =
2474          vk_find_struct_const(vi_info->pNext,
2475                               PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
2476       if (vi_div_state) {
2477          for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) {
2478             const VkVertexInputBindingDivisorDescriptionEXT *desc =
2479                &vi_div_state->pVertexBindingDivisors[i];
2480 
2481             pipeline->vb[desc->binding].instance_divisor = desc->divisor;
2482          }
2483       }
2484 
2485       /* Our implementation of VK_KHR_multiview uses instancing to draw the
2486        * different views.  If the client asks for instancing, we need to multiply
2487        * the instance divisor by the number of views ensure that we repeat the
2488        * client's per-instance data once for each view.
2489        */
2490       if (pipeline->subpass->view_mask && !pipeline->use_primitive_replication) {
2491          const uint32_t view_count = anv_subpass_view_count(pipeline->subpass);
2492          for (uint32_t vb = 0; vb < MAX_VBS; vb++) {
2493             if (pipeline->vb[vb].instanced)
2494                pipeline->vb[vb].instance_divisor *= view_count;
2495          }
2496       }
2497 
2498       const VkPipelineInputAssemblyStateCreateInfo *ia_info =
2499          pCreateInfo->pInputAssemblyState;
2500       const VkPipelineTessellationStateCreateInfo *tess_info =
2501          pCreateInfo->pTessellationState;
2502 
2503       if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
2504          pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints);
2505       else
2506          pipeline->topology = vk_to_intel_primitive_type[ia_info->topology];
2507    }
2508 
2509    /* If rasterization is not enabled, ms_info must be ignored. */
2510    const bool raster_enabled =
2511       !pCreateInfo->pRasterizationState->rasterizerDiscardEnable ||
2512       (pipeline->dynamic_states &
2513        ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);
2514 
2515    const VkPipelineMultisampleStateCreateInfo *ms_info =
2516       raster_enabled ? pCreateInfo->pMultisampleState : NULL;
2517 
2518    const VkPipelineRasterizationLineStateCreateInfoEXT *line_info =
2519       vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,
2520                            PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);
2521 
2522    /* Store line mode, polygon mode and rasterization samples, these are used
2523     * for dynamic primitive topology.
2524     */
2525    pipeline->line_mode = vk_line_rasterization_mode(line_info, ms_info);
2526    pipeline->polygon_mode = pCreateInfo->pRasterizationState->polygonMode;
2527    pipeline->rasterization_samples =
2528       ms_info ? ms_info->rasterizationSamples : 1;
2529 
2530    return VK_SUCCESS;
2531 }
2532 
2533 static VkResult
compile_upload_rt_shader(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_cache * cache,nir_shader * nir,struct anv_pipeline_stage * stage,struct anv_shader_bin ** shader_out,void * mem_ctx)2534 compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,
2535                          struct anv_pipeline_cache *cache,
2536                          nir_shader *nir,
2537                          struct anv_pipeline_stage *stage,
2538                          struct anv_shader_bin **shader_out,
2539                          void *mem_ctx)
2540 {
2541    const struct brw_compiler *compiler =
2542       pipeline->base.device->physical->compiler;
2543    const struct intel_device_info *devinfo = compiler->devinfo;
2544 
2545    nir_shader **resume_shaders = NULL;
2546    uint32_t num_resume_shaders = 0;
2547    if (nir->info.stage != MESA_SHADER_COMPUTE) {
2548       NIR_PASS_V(nir, nir_lower_shader_calls,
2549                  nir_address_format_64bit_global,
2550                  BRW_BTD_STACK_ALIGN,
2551                  &resume_shaders, &num_resume_shaders, mem_ctx);
2552       NIR_PASS_V(nir, brw_nir_lower_shader_calls);
2553       NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);
2554    }
2555 
2556    for (unsigned i = 0; i < num_resume_shaders; i++) {
2557       NIR_PASS_V(resume_shaders[i], brw_nir_lower_shader_calls);
2558       NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo);
2559    }
2560 
2561    stage->code =
2562       brw_compile_bs(compiler, pipeline->base.device, mem_ctx,
2563                      &stage->key.bs, &stage->prog_data.bs, nir,
2564                      num_resume_shaders, resume_shaders, stage->stats, NULL);
2565    if (stage->code == NULL)
2566       return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2567 
2568    /* Ray-tracing shaders don't have a "real" bind map */
2569    struct anv_pipeline_bind_map empty_bind_map = {};
2570 
2571    const unsigned code_size = stage->prog_data.base.program_size;
2572    struct anv_shader_bin *bin =
2573       anv_device_upload_kernel(pipeline->base.device,
2574                                cache,
2575                                stage->stage,
2576                                &stage->cache_key, sizeof(stage->cache_key),
2577                                stage->code, code_size,
2578                                &stage->prog_data.base,
2579                                sizeof(stage->prog_data.bs),
2580                                stage->stats, 1,
2581                                NULL, &empty_bind_map);
2582    if (bin == NULL)
2583       return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2584 
2585    /* TODO: Figure out executables for resume shaders */
2586    anv_pipeline_add_executables(&pipeline->base, stage, bin);
2587    util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin);
2588 
2589    *shader_out = bin;
2590 
2591    return VK_SUCCESS;
2592 }
2593 
2594 static bool
is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR * info)2595 is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)
2596 {
2597    if (info->pDynamicState == NULL)
2598       return false;
2599 
2600    for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) {
2601       if (info->pDynamicState->pDynamicStates[i] ==
2602           VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)
2603          return true;
2604    }
2605 
2606    return false;
2607 }
2608 
2609 static void
anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline * pipeline,const VkRayTracingPipelineCreateInfoKHR * info,uint32_t * stack_max)2610 anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline,
2611                                         const VkRayTracingPipelineCreateInfoKHR *info,
2612                                         uint32_t *stack_max)
2613 {
2614    if (is_rt_stack_size_dynamic(info)) {
2615       pipeline->stack_size = 0; /* 0 means dynamic */
2616    } else {
2617       /* From the Vulkan spec:
2618        *
2619        *    "If the stack size is not set explicitly, the stack size for a
2620        *    pipeline is:
2621        *
2622        *       rayGenStackMax +
2623        *       min(1, maxPipelineRayRecursionDepth) ×
2624        *       max(closestHitStackMax, missStackMax,
2625        *           intersectionStackMax + anyHitStackMax) +
2626        *       max(0, maxPipelineRayRecursionDepth-1) ×
2627        *       max(closestHitStackMax, missStackMax) +
2628        *       2 × callableStackMax"
2629        */
2630       pipeline->stack_size =
2631          stack_max[MESA_SHADER_RAYGEN] +
2632          MIN2(1, info->maxPipelineRayRecursionDepth) *
2633          MAX4(stack_max[MESA_SHADER_CLOSEST_HIT],
2634               stack_max[MESA_SHADER_MISS],
2635               stack_max[MESA_SHADER_INTERSECTION],
2636               stack_max[MESA_SHADER_ANY_HIT]) +
2637          MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) *
2638          MAX2(stack_max[MESA_SHADER_CLOSEST_HIT],
2639               stack_max[MESA_SHADER_MISS]) +
2640          2 * stack_max[MESA_SHADER_CALLABLE];
2641 
2642       /* This is an extremely unlikely case but we need to set it to some
2643        * non-zero value so that we don't accidentally think it's dynamic.
2644        * Our minimum stack size is 2KB anyway so we could set to any small
2645        * value we like.
2646        */
2647       if (pipeline->stack_size == 0)
2648          pipeline->stack_size = 1;
2649    }
2650 }
2651 
2652 static struct anv_pipeline_stage *
anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline * pipeline,const VkRayTracingPipelineCreateInfoKHR * info,void * pipeline_ctx)2653 anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
2654                                      const VkRayTracingPipelineCreateInfoKHR *info,
2655                                      void *pipeline_ctx)
2656 {
2657    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
2658 
2659    /* Create enough stage entries for all shader modules plus potential
2660     * combinaisons in the groups.
2661     */
2662    struct anv_pipeline_stage *stages =
2663       rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount);
2664 
2665    for (uint32_t i = 0; i < info->stageCount; i++) {
2666       const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];
2667       if (sinfo->module == VK_NULL_HANDLE)
2668          continue;
2669 
2670       int64_t stage_start = os_time_get_nano();
2671 
2672       stages[i] = (struct anv_pipeline_stage) {
2673          .stage = vk_to_mesa_shader_stage(sinfo->stage),
2674          .module = vk_shader_module_from_handle(sinfo->module),
2675          .entrypoint = sinfo->pName,
2676          .spec_info = sinfo->pSpecializationInfo,
2677          .cache_key = {
2678             .stage = vk_to_mesa_shader_stage(sinfo->stage),
2679          },
2680          .feedback = {
2681             .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
2682          },
2683       };
2684 
2685       populate_bs_prog_key(&pipeline->base.device->info, sinfo->flags,
2686                            pipeline->base.device->robust_buffer_access,
2687                            &stages[i].key.bs);
2688 
2689       anv_pipeline_hash_shader(stages[i].module,
2690                                stages[i].entrypoint,
2691                                stages[i].stage,
2692                                stages[i].spec_info,
2693                                stages[i].shader_sha1);
2694 
2695       if (stages[i].stage != MESA_SHADER_INTERSECTION) {
2696          anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i],
2697                                               stages[i].cache_key.sha1);
2698       }
2699 
2700       stages[i].feedback.duration += os_time_get_nano() - stage_start;
2701    }
2702 
2703    for (uint32_t i = 0; i < info->groupCount; i++) {
2704       const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
2705 
2706       if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR)
2707          continue;
2708 
2709       int64_t stage_start = os_time_get_nano();
2710 
2711       uint32_t intersection_idx = ginfo->intersectionShader;
2712       assert(intersection_idx < info->stageCount);
2713 
2714       uint32_t any_hit_idx = ginfo->anyHitShader;
2715       if (any_hit_idx != VK_SHADER_UNUSED_KHR) {
2716          assert(any_hit_idx < info->stageCount);
2717          anv_pipeline_hash_ray_tracing_combined_shader(pipeline,
2718                                                        layout,
2719                                                        &stages[intersection_idx],
2720                                                        &stages[any_hit_idx],
2721                                                        stages[intersection_idx].cache_key.sha1);
2722       } else {
2723          anv_pipeline_hash_ray_tracing_shader(pipeline, layout,
2724                                               &stages[intersection_idx],
2725                                               stages[intersection_idx].cache_key.sha1);
2726       }
2727 
2728       stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start;
2729    }
2730 
2731    return stages;
2732 }
2733 
2734 static bool
anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * info,struct anv_pipeline_stage * stages,uint32_t * stack_max)2735 anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline,
2736                                  struct anv_pipeline_cache *cache,
2737                                  const VkRayTracingPipelineCreateInfoKHR *info,
2738                                  struct anv_pipeline_stage *stages,
2739                                  uint32_t *stack_max)
2740 {
2741    uint32_t shaders = 0, cache_hits = 0;
2742    for (uint32_t i = 0; i < info->stageCount; i++) {
2743       if (stages[i].entrypoint == NULL)
2744          continue;
2745 
2746       shaders++;
2747 
2748       int64_t stage_start = os_time_get_nano();
2749 
2750       bool cache_hit;
2751       stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache,
2752                                                    &stages[i].cache_key,
2753                                                    sizeof(stages[i].cache_key),
2754                                                    &cache_hit);
2755       if (cache_hit) {
2756          cache_hits++;
2757          stages[i].feedback.flags |=
2758             VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
2759       }
2760 
2761       if (stages[i].bin != NULL) {
2762          anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin);
2763          util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin);
2764 
2765          uint32_t stack_size =
2766             brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
2767          stack_max[stages[i].stage] =
2768             MAX2(stack_max[stages[i].stage], stack_size);
2769       }
2770 
2771       stages[i].feedback.duration += os_time_get_nano() - stage_start;
2772    }
2773 
2774    return cache_hits == shaders;
2775 }
2776 
2777 static VkResult
anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline * pipeline,struct anv_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * info)2778 anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,
2779                                  struct anv_pipeline_cache *cache,
2780                                  const VkRayTracingPipelineCreateInfoKHR *info)
2781 {
2782    const struct intel_device_info *devinfo = &pipeline->base.device->info;
2783    VkResult result;
2784 
2785    VkPipelineCreationFeedbackEXT pipeline_feedback = {
2786       .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,
2787    };
2788    int64_t pipeline_start = os_time_get_nano();
2789 
2790    void *pipeline_ctx = ralloc_context(NULL);
2791 
2792    struct anv_pipeline_stage *stages =
2793       anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx);
2794 
2795    ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
2796 
2797    const bool skip_cache_lookup =
2798       (pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);
2799 
2800    uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {};
2801 
2802    if (!skip_cache_lookup &&
2803        anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) {
2804       pipeline_feedback.flags |=
2805          VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;
2806       goto done;
2807    }
2808 
2809    if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) {
2810       ralloc_free(pipeline_ctx);
2811       return VK_PIPELINE_COMPILE_REQUIRED_EXT;
2812    }
2813 
2814    for (uint32_t i = 0; i < info->stageCount; i++) {
2815       if (stages[i].entrypoint == NULL)
2816          continue;
2817 
2818       int64_t stage_start = os_time_get_nano();
2819 
2820       stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,
2821                                                  pipeline_ctx, &stages[i]);
2822       if (stages[i].nir == NULL) {
2823          ralloc_free(pipeline_ctx);
2824          return vk_error(pipeline, VK_ERROR_OUT_OF_HOST_MEMORY);
2825       }
2826 
2827       anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i], layout);
2828 
2829       stages[i].feedback.duration += os_time_get_nano() - stage_start;
2830    }
2831 
2832    for (uint32_t i = 0; i < info->stageCount; i++) {
2833       if (stages[i].entrypoint == NULL)
2834          continue;
2835 
2836       /* Shader found in cache already. */
2837       if (stages[i].bin != NULL)
2838          continue;
2839 
2840       /* We handle intersection shaders as part of the group */
2841       if (stages[i].stage == MESA_SHADER_INTERSECTION)
2842          continue;
2843 
2844       int64_t stage_start = os_time_get_nano();
2845 
2846       void *stage_ctx = ralloc_context(pipeline_ctx);
2847 
2848       nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir);
2849       switch (stages[i].stage) {
2850       case MESA_SHADER_RAYGEN:
2851          brw_nir_lower_raygen(nir);
2852          break;
2853 
2854       case MESA_SHADER_ANY_HIT:
2855          brw_nir_lower_any_hit(nir, devinfo);
2856          break;
2857 
2858       case MESA_SHADER_CLOSEST_HIT:
2859          brw_nir_lower_closest_hit(nir);
2860          break;
2861 
2862       case MESA_SHADER_MISS:
2863          brw_nir_lower_miss(nir);
2864          break;
2865 
2866       case MESA_SHADER_INTERSECTION:
2867          unreachable("These are handled later");
2868 
2869       case MESA_SHADER_CALLABLE:
2870          brw_nir_lower_callable(nir);
2871          break;
2872 
2873       default:
2874          unreachable("Invalid ray-tracing shader stage");
2875       }
2876 
2877       result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i],
2878                                         &stages[i].bin, stage_ctx);
2879       if (result != VK_SUCCESS) {
2880          ralloc_free(pipeline_ctx);
2881          return result;
2882       }
2883 
2884       uint32_t stack_size =
2885          brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;
2886       stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size);
2887 
2888       ralloc_free(stage_ctx);
2889 
2890       stages[i].feedback.duration += os_time_get_nano() - stage_start;
2891    }
2892 
2893    for (uint32_t i = 0; i < info->groupCount; i++) {
2894       const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];
2895       struct anv_rt_shader_group *group = &pipeline->groups[i];
2896       group->type = ginfo->type;
2897       switch (ginfo->type) {
2898       case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:
2899          assert(ginfo->generalShader < info->stageCount);
2900          group->general = stages[ginfo->generalShader].bin;
2901          break;
2902 
2903       case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:
2904          if (ginfo->anyHitShader < info->stageCount)
2905             group->any_hit = stages[ginfo->anyHitShader].bin;
2906 
2907          if (ginfo->closestHitShader < info->stageCount)
2908             group->closest_hit = stages[ginfo->closestHitShader].bin;
2909          break;
2910 
2911       case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {
2912          if (ginfo->closestHitShader < info->stageCount)
2913             group->closest_hit = stages[ginfo->closestHitShader].bin;
2914 
2915          uint32_t intersection_idx = info->pGroups[i].intersectionShader;
2916          assert(intersection_idx < info->stageCount);
2917 
2918          /* Only compile this stage if not already found in the cache. */
2919          if (stages[intersection_idx].bin == NULL) {
2920             /* The any-hit and intersection shader have to be combined */
2921             uint32_t any_hit_idx = info->pGroups[i].anyHitShader;
2922             const nir_shader *any_hit = NULL;
2923             if (any_hit_idx < info->stageCount)
2924                any_hit = stages[any_hit_idx].nir;
2925 
2926             void *group_ctx = ralloc_context(pipeline_ctx);
2927             nir_shader *intersection =
2928                nir_shader_clone(group_ctx, stages[intersection_idx].nir);
2929 
2930             brw_nir_lower_combined_intersection_any_hit(intersection, any_hit,
2931                                                         devinfo);
2932 
2933             result = compile_upload_rt_shader(pipeline, cache,
2934                                               intersection,
2935                                               &stages[intersection_idx],
2936                                               &group->intersection,
2937                                               group_ctx);
2938             ralloc_free(group_ctx);
2939             if (result != VK_SUCCESS)
2940                return result;
2941          } else {
2942             group->intersection = stages[intersection_idx].bin;
2943          }
2944 
2945          uint32_t stack_size =
2946             brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size;
2947          stack_max[MESA_SHADER_INTERSECTION] =
2948             MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size);
2949 
2950          break;
2951       }
2952 
2953       default:
2954          unreachable("Invalid ray tracing shader group type");
2955       }
2956    }
2957 
2958  done:
2959    ralloc_free(pipeline_ctx);
2960 
2961    anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max);
2962 
2963    pipeline_feedback.duration = os_time_get_nano() - pipeline_start;
2964 
2965    const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =
2966       vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);
2967    if (create_feedback) {
2968       *create_feedback->pPipelineCreationFeedback = pipeline_feedback;
2969 
2970       assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);
2971       for (uint32_t i = 0; i < info->stageCount; i++) {
2972          gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);
2973          create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;
2974       }
2975    }
2976 
2977    return VK_SUCCESS;
2978 }
2979 
2980 VkResult
anv_device_init_rt_shaders(struct anv_device * device)2981 anv_device_init_rt_shaders(struct anv_device *device)
2982 {
2983    if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
2984       return VK_SUCCESS;
2985 
2986    bool cache_hit;
2987 
2988    struct brw_rt_trampoline {
2989       char name[16];
2990       struct brw_cs_prog_key key;
2991    } trampoline_key = {
2992       .name = "rt-trampoline",
2993       .key = {
2994          /* TODO: Other subgroup sizes? */
2995          .base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_8,
2996       },
2997    };
2998    device->rt_trampoline =
2999       anv_device_search_for_kernel(device, &device->default_pipeline_cache,
3000                                    &trampoline_key, sizeof(trampoline_key),
3001                                    &cache_hit);
3002    if (device->rt_trampoline == NULL) {
3003 
3004       void *tmp_ctx = ralloc_context(NULL);
3005       nir_shader *trampoline_nir =
3006          brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);
3007 
3008       struct anv_pipeline_bind_map bind_map = {
3009          .surface_count = 0,
3010          .sampler_count = 0,
3011       };
3012       uint32_t dummy_params[4] = { 0, };
3013       struct brw_cs_prog_data trampoline_prog_data = {
3014          .base.nr_params = 4,
3015          .base.param = dummy_params,
3016          .uses_inline_data = true,
3017          .uses_btd_stack_ids = true,
3018       };
3019       struct brw_compile_cs_params params = {
3020          .nir = trampoline_nir,
3021          .key = &trampoline_key.key,
3022          .prog_data = &trampoline_prog_data,
3023          .log_data = device,
3024       };
3025       const unsigned *tramp_data =
3026          brw_compile_cs(device->physical->compiler, tmp_ctx, &params);
3027 
3028       device->rt_trampoline =
3029          anv_device_upload_kernel(device, &device->default_pipeline_cache,
3030                                   MESA_SHADER_COMPUTE,
3031                                   &trampoline_key, sizeof(trampoline_key),
3032                                   tramp_data,
3033                                   trampoline_prog_data.base.program_size,
3034                                   &trampoline_prog_data.base,
3035                                   sizeof(trampoline_prog_data),
3036                                   NULL, 0, NULL, &bind_map);
3037 
3038       ralloc_free(tmp_ctx);
3039 
3040       if (device->rt_trampoline == NULL)
3041          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3042    }
3043 
3044    struct brw_rt_trivial_return {
3045       char name[16];
3046       struct brw_bs_prog_key key;
3047    } return_key = {
3048       .name = "rt-trivial-ret",
3049    };
3050    device->rt_trivial_return =
3051       anv_device_search_for_kernel(device, &device->default_pipeline_cache,
3052                                    &return_key, sizeof(return_key),
3053                                    &cache_hit);
3054    if (device->rt_trivial_return == NULL) {
3055       void *tmp_ctx = ralloc_context(NULL);
3056       nir_shader *trivial_return_nir =
3057          brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx);
3058 
3059       NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, &device->info);
3060 
3061       struct anv_pipeline_bind_map bind_map = {
3062          .surface_count = 0,
3063          .sampler_count = 0,
3064       };
3065       struct brw_bs_prog_data return_prog_data = { 0, };
3066       const unsigned *return_data =
3067          brw_compile_bs(device->physical->compiler, device, tmp_ctx,
3068                         &return_key.key, &return_prog_data, trivial_return_nir,
3069                         0, 0, NULL, NULL);
3070 
3071       device->rt_trivial_return =
3072          anv_device_upload_kernel(device, &device->default_pipeline_cache,
3073                                   MESA_SHADER_CALLABLE,
3074                                   &return_key, sizeof(return_key),
3075                                   return_data, return_prog_data.base.program_size,
3076                                   &return_prog_data.base, sizeof(return_prog_data),
3077                                   NULL, 0, NULL, &bind_map);
3078 
3079       ralloc_free(tmp_ctx);
3080 
3081       if (device->rt_trivial_return == NULL) {
3082          anv_shader_bin_unref(device, device->rt_trampoline);
3083          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3084       }
3085    }
3086 
3087    return VK_SUCCESS;
3088 }
3089 
3090 void
anv_device_finish_rt_shaders(struct anv_device * device)3091 anv_device_finish_rt_shaders(struct anv_device *device)
3092 {
3093    if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)
3094       return;
3095 
3096    anv_shader_bin_unref(device, device->rt_trampoline);
3097 }
3098 
3099 VkResult
anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline * pipeline,struct anv_device * device,struct anv_pipeline_cache * cache,const VkRayTracingPipelineCreateInfoKHR * pCreateInfo,const VkAllocationCallbacks * alloc)3100 anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,
3101                               struct anv_device *device,
3102                               struct anv_pipeline_cache *cache,
3103                               const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
3104                               const VkAllocationCallbacks *alloc)
3105 {
3106    VkResult result;
3107 
3108    util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx);
3109 
3110    result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo);
3111    if (result != VK_SUCCESS)
3112       goto fail;
3113 
3114    anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false);
3115 
3116    return VK_SUCCESS;
3117 
3118 fail:
3119    util_dynarray_foreach(&pipeline->shaders,
3120                          struct anv_shader_bin *, shader) {
3121       anv_shader_bin_unref(device, *shader);
3122    }
3123    return result;
3124 }
3125 
3126 #define WRITE_STR(field, ...) ({                               \
3127    memset(field, 0, sizeof(field));                            \
3128    UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \
3129    assert(i > 0 && i < sizeof(field));                         \
3130 })
3131 
anv_GetPipelineExecutablePropertiesKHR(VkDevice device,const VkPipelineInfoKHR * pPipelineInfo,uint32_t * pExecutableCount,VkPipelineExecutablePropertiesKHR * pProperties)3132 VkResult anv_GetPipelineExecutablePropertiesKHR(
3133     VkDevice                                    device,
3134     const VkPipelineInfoKHR*                    pPipelineInfo,
3135     uint32_t*                                   pExecutableCount,
3136     VkPipelineExecutablePropertiesKHR*          pProperties)
3137 {
3138    ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);
3139    VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount);
3140 
3141    util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {
3142       vk_outarray_append(&out, props) {
3143          gl_shader_stage stage = exe->stage;
3144          props->stages = mesa_to_vk_shader_stage(stage);
3145 
3146          unsigned simd_width = exe->stats.dispatch_width;
3147          if (stage == MESA_SHADER_FRAGMENT) {
3148             WRITE_STR(props->name, "%s%d %s",
3149                       simd_width ? "SIMD" : "vec",
3150                       simd_width ? simd_width : 4,
3151                       _mesa_shader_stage_to_string(stage));
3152          } else {
3153             WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));
3154          }
3155          WRITE_STR(props->description, "%s%d %s shader",
3156                    simd_width ? "SIMD" : "vec",
3157                    simd_width ? simd_width : 4,
3158                    _mesa_shader_stage_to_string(stage));
3159 
3160          /* The compiler gives us a dispatch width of 0 for vec4 but Vulkan
3161           * wants a subgroup size of 1.
3162           */
3163          props->subgroupSize = MAX2(simd_width, 1);
3164       }
3165    }
3166 
3167    return vk_outarray_status(&out);
3168 }
3169 
3170 static const struct anv_pipeline_executable *
anv_pipeline_get_executable(struct anv_pipeline * pipeline,uint32_t index)3171 anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)
3172 {
3173    assert(index < util_dynarray_num_elements(&pipeline->executables,
3174                                              struct anv_pipeline_executable));
3175    return util_dynarray_element(
3176       &pipeline->executables, struct anv_pipeline_executable, index);
3177 }
3178 
anv_GetPipelineExecutableStatisticsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pStatisticCount,VkPipelineExecutableStatisticKHR * pStatistics)3179 VkResult anv_GetPipelineExecutableStatisticsKHR(
3180     VkDevice                                    device,
3181     const VkPipelineExecutableInfoKHR*          pExecutableInfo,
3182     uint32_t*                                   pStatisticCount,
3183     VkPipelineExecutableStatisticKHR*           pStatistics)
3184 {
3185    ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
3186    VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount);
3187 
3188    const struct anv_pipeline_executable *exe =
3189       anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3190 
3191    const struct brw_stage_prog_data *prog_data;
3192    switch (pipeline->type) {
3193    case ANV_PIPELINE_GRAPHICS: {
3194       prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;
3195       break;
3196    }
3197    case ANV_PIPELINE_COMPUTE: {
3198       prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;
3199       break;
3200    }
3201    default:
3202       unreachable("invalid pipeline type");
3203    }
3204 
3205    vk_outarray_append(&out, stat) {
3206       WRITE_STR(stat->name, "Instruction Count");
3207       WRITE_STR(stat->description,
3208                 "Number of GEN instructions in the final generated "
3209                 "shader executable.");
3210       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3211       stat->value.u64 = exe->stats.instructions;
3212    }
3213 
3214    vk_outarray_append(&out, stat) {
3215       WRITE_STR(stat->name, "SEND Count");
3216       WRITE_STR(stat->description,
3217                 "Number of instructions in the final generated shader "
3218                 "executable which access external units such as the "
3219                 "constant cache or the sampler.");
3220       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3221       stat->value.u64 = exe->stats.sends;
3222    }
3223 
3224    vk_outarray_append(&out, stat) {
3225       WRITE_STR(stat->name, "Loop Count");
3226       WRITE_STR(stat->description,
3227                 "Number of loops (not unrolled) in the final generated "
3228                 "shader executable.");
3229       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3230       stat->value.u64 = exe->stats.loops;
3231    }
3232 
3233    vk_outarray_append(&out, stat) {
3234       WRITE_STR(stat->name, "Cycle Count");
3235       WRITE_STR(stat->description,
3236                 "Estimate of the number of EU cycles required to execute "
3237                 "the final generated executable.  This is an estimate only "
3238                 "and may vary greatly from actual run-time performance.");
3239       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3240       stat->value.u64 = exe->stats.cycles;
3241    }
3242 
3243    vk_outarray_append(&out, stat) {
3244       WRITE_STR(stat->name, "Spill Count");
3245       WRITE_STR(stat->description,
3246                 "Number of scratch spill operations.  This gives a rough "
3247                 "estimate of the cost incurred due to spilling temporary "
3248                 "values to memory.  If this is non-zero, you may want to "
3249                 "adjust your shader to reduce register pressure.");
3250       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3251       stat->value.u64 = exe->stats.spills;
3252    }
3253 
3254    vk_outarray_append(&out, stat) {
3255       WRITE_STR(stat->name, "Fill Count");
3256       WRITE_STR(stat->description,
3257                 "Number of scratch fill operations.  This gives a rough "
3258                 "estimate of the cost incurred due to spilling temporary "
3259                 "values to memory.  If this is non-zero, you may want to "
3260                 "adjust your shader to reduce register pressure.");
3261       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3262       stat->value.u64 = exe->stats.fills;
3263    }
3264 
3265    vk_outarray_append(&out, stat) {
3266       WRITE_STR(stat->name, "Scratch Memory Size");
3267       WRITE_STR(stat->description,
3268                 "Number of bytes of scratch memory required by the "
3269                 "generated shader executable.  If this is non-zero, you "
3270                 "may want to adjust your shader to reduce register "
3271                 "pressure.");
3272       stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3273       stat->value.u64 = prog_data->total_scratch;
3274    }
3275 
3276    if (gl_shader_stage_uses_workgroup(exe->stage)) {
3277       vk_outarray_append(&out, stat) {
3278          WRITE_STR(stat->name, "Workgroup Memory Size");
3279          WRITE_STR(stat->description,
3280                    "Number of bytes of workgroup shared memory used by this "
3281                    "shader including any padding.");
3282          stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;
3283          stat->value.u64 = prog_data->total_shared;
3284       }
3285    }
3286 
3287    return vk_outarray_status(&out);
3288 }
3289 
3290 static bool
write_ir_text(VkPipelineExecutableInternalRepresentationKHR * ir,const char * data)3291 write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,
3292               const char *data)
3293 {
3294    ir->isText = VK_TRUE;
3295 
3296    size_t data_len = strlen(data) + 1;
3297 
3298    if (ir->pData == NULL) {
3299       ir->dataSize = data_len;
3300       return true;
3301    }
3302 
3303    strncpy(ir->pData, data, ir->dataSize);
3304    if (ir->dataSize < data_len)
3305       return false;
3306 
3307    ir->dataSize = data_len;
3308    return true;
3309 }
3310 
anv_GetPipelineExecutableInternalRepresentationsKHR(VkDevice device,const VkPipelineExecutableInfoKHR * pExecutableInfo,uint32_t * pInternalRepresentationCount,VkPipelineExecutableInternalRepresentationKHR * pInternalRepresentations)3311 VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(
3312     VkDevice                                    device,
3313     const VkPipelineExecutableInfoKHR*          pExecutableInfo,
3314     uint32_t*                                   pInternalRepresentationCount,
3315     VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)
3316 {
3317    ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);
3318    VK_OUTARRAY_MAKE(out, pInternalRepresentations,
3319                     pInternalRepresentationCount);
3320    bool incomplete_text = false;
3321 
3322    const struct anv_pipeline_executable *exe =
3323       anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);
3324 
3325    if (exe->nir) {
3326       vk_outarray_append(&out, ir) {
3327          WRITE_STR(ir->name, "Final NIR");
3328          WRITE_STR(ir->description,
3329                    "Final NIR before going into the back-end compiler");
3330 
3331          if (!write_ir_text(ir, exe->nir))
3332             incomplete_text = true;
3333       }
3334    }
3335 
3336    if (exe->disasm) {
3337       vk_outarray_append(&out, ir) {
3338          WRITE_STR(ir->name, "GEN Assembly");
3339          WRITE_STR(ir->description,
3340                    "Final GEN assembly for the generated shader binary");
3341 
3342          if (!write_ir_text(ir, exe->disasm))
3343             incomplete_text = true;
3344       }
3345    }
3346 
3347    return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);
3348 }
3349 
3350 VkResult
anv_GetRayTracingShaderGroupHandlesKHR(VkDevice _device,VkPipeline _pipeline,uint32_t firstGroup,uint32_t groupCount,size_t dataSize,void * pData)3351 anv_GetRayTracingShaderGroupHandlesKHR(
3352     VkDevice                                    _device,
3353     VkPipeline                                  _pipeline,
3354     uint32_t                                    firstGroup,
3355     uint32_t                                    groupCount,
3356     size_t                                      dataSize,
3357     void*                                       pData)
3358 {
3359    ANV_FROM_HANDLE(anv_device, device, _device);
3360    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
3361 
3362    if (pipeline->type != ANV_PIPELINE_RAY_TRACING)
3363       return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
3364 
3365    struct anv_ray_tracing_pipeline *rt_pipeline =
3366       anv_pipeline_to_ray_tracing(pipeline);
3367 
3368    for (uint32_t i = 0; i < groupCount; i++) {
3369       struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i];
3370       memcpy(pData, group->handle, sizeof(group->handle));
3371       pData += sizeof(group->handle);
3372    }
3373 
3374    return VK_SUCCESS;
3375 }
3376 
3377 VkResult
anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(VkDevice _device,VkPipeline pipeline,uint32_t firstGroup,uint32_t groupCount,size_t dataSize,void * pData)3378 anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(
3379     VkDevice                                    _device,
3380     VkPipeline                                  pipeline,
3381     uint32_t                                    firstGroup,
3382     uint32_t                                    groupCount,
3383     size_t                                      dataSize,
3384     void*                                       pData)
3385 {
3386    ANV_FROM_HANDLE(anv_device, device, _device);
3387    unreachable("Unimplemented");
3388    return vk_error(device, VK_ERROR_FEATURE_NOT_PRESENT);
3389 }
3390 
3391 VkDeviceSize
anv_GetRayTracingShaderGroupStackSizeKHR(VkDevice device,VkPipeline _pipeline,uint32_t group,VkShaderGroupShaderKHR groupShader)3392 anv_GetRayTracingShaderGroupStackSizeKHR(
3393     VkDevice                                    device,
3394     VkPipeline                                  _pipeline,
3395     uint32_t                                    group,
3396     VkShaderGroupShaderKHR                      groupShader)
3397 {
3398    ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);
3399    assert(pipeline->type == ANV_PIPELINE_RAY_TRACING);
3400 
3401    struct anv_ray_tracing_pipeline *rt_pipeline =
3402       anv_pipeline_to_ray_tracing(pipeline);
3403 
3404    assert(group < rt_pipeline->group_count);
3405 
3406    struct anv_shader_bin *bin;
3407    switch (groupShader) {
3408    case VK_SHADER_GROUP_SHADER_GENERAL_KHR:
3409       bin = rt_pipeline->groups[group].general;
3410       break;
3411 
3412    case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:
3413       bin = rt_pipeline->groups[group].closest_hit;
3414       break;
3415 
3416    case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:
3417       bin = rt_pipeline->groups[group].any_hit;
3418       break;
3419 
3420    case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:
3421       bin = rt_pipeline->groups[group].intersection;
3422       break;
3423 
3424    default:
3425       unreachable("Invalid VkShaderGroupShader enum");
3426    }
3427 
3428    if (bin == NULL)
3429       return 0;
3430 
3431    return brw_bs_prog_data_const(bin->prog_data)->max_stack_size;
3432 }
3433