1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /**
24  * @file iris_program.c
25  *
26  * This file contains the driver interface for compiling shaders.
27  *
28  * See iris_program_cache.c for the in-memory program cache where the
29  * compiled shaders are stored.
30  */
31 
32 #include <stdio.h>
33 #include <errno.h>
34 #include "pipe/p_defines.h"
35 #include "pipe/p_state.h"
36 #include "pipe/p_context.h"
37 #include "pipe/p_screen.h"
38 #include "util/u_atomic.h"
39 #include "util/u_upload_mgr.h"
40 #include "util/debug.h"
41 #include "util/u_async_debug.h"
42 #include "compiler/nir/nir.h"
43 #include "compiler/nir/nir_builder.h"
44 #include "compiler/nir/nir_serialize.h"
45 #include "intel/compiler/brw_compiler.h"
46 #include "intel/compiler/brw_nir.h"
47 #include "iris_context.h"
48 #include "nir/tgsi_to_nir.h"
49 
50 #define KEY_ID(prefix) .prefix.program_string_id = ish->program_id
51 #define BRW_KEY_INIT(gen, prog_id)                       \
52    .base.program_string_id = prog_id,                    \
53    .base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \
54    .base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688,   \
55    .base.tex.compressed_multisample_layout_mask = ~0,    \
56    .base.tex.msaa_16 = (gen >= 9 ? ~0 : 0)
57 
58 struct iris_threaded_compile_job {
59    struct iris_screen *screen;
60    struct u_upload_mgr *uploader;
61    struct pipe_debug_callback *dbg;
62    struct iris_uncompiled_shader *ish;
63    struct iris_compiled_shader *shader;
64 };
65 
66 static unsigned
get_new_program_id(struct iris_screen * screen)67 get_new_program_id(struct iris_screen *screen)
68 {
69    return p_atomic_inc_return(&screen->program_id);
70 }
71 
72 void
iris_finalize_program(struct iris_compiled_shader * shader,struct brw_stage_prog_data * prog_data,uint32_t * streamout,enum brw_param_builtin * system_values,unsigned num_system_values,unsigned kernel_input_size,unsigned num_cbufs,const struct iris_binding_table * bt)73 iris_finalize_program(struct iris_compiled_shader *shader,
74                       struct brw_stage_prog_data *prog_data,
75                       uint32_t *streamout,
76                       enum brw_param_builtin *system_values,
77                       unsigned num_system_values,
78                       unsigned kernel_input_size,
79                       unsigned num_cbufs,
80                       const struct iris_binding_table *bt)
81 {
82    shader->prog_data = prog_data;
83    shader->streamout = streamout;
84    shader->system_values = system_values;
85    shader->num_system_values = num_system_values;
86    shader->kernel_input_size = kernel_input_size;
87    shader->num_cbufs = num_cbufs;
88    shader->bt = *bt;
89 
90    ralloc_steal(shader, shader->prog_data);
91    ralloc_steal(shader->prog_data, (void *)prog_data->relocs);
92    ralloc_steal(shader->prog_data, prog_data->param);
93    ralloc_steal(shader->prog_data, prog_data->pull_param);
94    ralloc_steal(shader, shader->streamout);
95    ralloc_steal(shader, shader->system_values);
96 }
97 
98 static struct brw_vs_prog_key
iris_to_brw_vs_key(const struct intel_device_info * devinfo,const struct iris_vs_prog_key * key)99 iris_to_brw_vs_key(const struct intel_device_info *devinfo,
100                    const struct iris_vs_prog_key *key)
101 {
102    return (struct brw_vs_prog_key) {
103       BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
104 
105       /* Don't tell the backend about our clip plane constants, we've
106        * already lowered them in NIR and don't want it doing it again.
107        */
108       .nr_userclip_plane_consts = 0,
109    };
110 }
111 
112 static struct brw_tcs_prog_key
iris_to_brw_tcs_key(const struct intel_device_info * devinfo,const struct iris_tcs_prog_key * key)113 iris_to_brw_tcs_key(const struct intel_device_info *devinfo,
114                     const struct iris_tcs_prog_key *key)
115 {
116    return (struct brw_tcs_prog_key) {
117       BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
118       .tes_primitive_mode = key->tes_primitive_mode,
119       .input_vertices = key->input_vertices,
120       .patch_outputs_written = key->patch_outputs_written,
121       .outputs_written = key->outputs_written,
122       .quads_workaround = key->quads_workaround,
123    };
124 }
125 
126 static struct brw_tes_prog_key
iris_to_brw_tes_key(const struct intel_device_info * devinfo,const struct iris_tes_prog_key * key)127 iris_to_brw_tes_key(const struct intel_device_info *devinfo,
128                     const struct iris_tes_prog_key *key)
129 {
130    return (struct brw_tes_prog_key) {
131       BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
132       .patch_inputs_read = key->patch_inputs_read,
133       .inputs_read = key->inputs_read,
134    };
135 }
136 
137 static struct brw_gs_prog_key
iris_to_brw_gs_key(const struct intel_device_info * devinfo,const struct iris_gs_prog_key * key)138 iris_to_brw_gs_key(const struct intel_device_info *devinfo,
139                    const struct iris_gs_prog_key *key)
140 {
141    return (struct brw_gs_prog_key) {
142       BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
143    };
144 }
145 
146 static struct brw_wm_prog_key
iris_to_brw_fs_key(const struct intel_device_info * devinfo,const struct iris_fs_prog_key * key)147 iris_to_brw_fs_key(const struct intel_device_info *devinfo,
148                    const struct iris_fs_prog_key *key)
149 {
150    return (struct brw_wm_prog_key) {
151       BRW_KEY_INIT(devinfo->ver, key->base.program_string_id),
152       .nr_color_regions = key->nr_color_regions,
153       .flat_shade = key->flat_shade,
154       .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
155       .alpha_to_coverage = key->alpha_to_coverage,
156       .clamp_fragment_color = key->clamp_fragment_color,
157       .persample_interp = key->persample_interp,
158       .multisample_fbo = key->multisample_fbo,
159       .force_dual_color_blend = key->force_dual_color_blend,
160       .coherent_fb_fetch = key->coherent_fb_fetch,
161       .color_outputs_valid = key->color_outputs_valid,
162       .input_slots_valid = key->input_slots_valid,
163       .ignore_sample_mask_out = !key->multisample_fbo,
164    };
165 }
166 
167 static struct brw_cs_prog_key
iris_to_brw_cs_key(const struct intel_device_info * devinfo,const struct iris_cs_prog_key * key)168 iris_to_brw_cs_key(const struct intel_device_info *devinfo,
169                    const struct iris_cs_prog_key *key)
170 {
171    return (struct brw_cs_prog_key) {
172       BRW_KEY_INIT(devinfo->ver, key->base.program_string_id),
173    };
174 }
175 
176 static void *
upload_state(struct u_upload_mgr * uploader,struct iris_state_ref * ref,unsigned size,unsigned alignment)177 upload_state(struct u_upload_mgr *uploader,
178              struct iris_state_ref *ref,
179              unsigned size,
180              unsigned alignment)
181 {
182    void *p = NULL;
183    u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p);
184    return p;
185 }
186 
187 void
iris_upload_ubo_ssbo_surf_state(struct iris_context * ice,struct pipe_shader_buffer * buf,struct iris_state_ref * surf_state,isl_surf_usage_flags_t usage)188 iris_upload_ubo_ssbo_surf_state(struct iris_context *ice,
189                                 struct pipe_shader_buffer *buf,
190                                 struct iris_state_ref *surf_state,
191                                 isl_surf_usage_flags_t usage)
192 {
193    struct pipe_context *ctx = &ice->ctx;
194    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
195    bool ssbo = usage & ISL_SURF_USAGE_STORAGE_BIT;
196 
197    void *map =
198       upload_state(ice->state.surface_uploader, surf_state,
199                    screen->isl_dev.ss.size, 64);
200    if (!unlikely(map)) {
201       surf_state->res = NULL;
202       return;
203    }
204 
205    struct iris_resource *res = (void *) buf->buffer;
206    struct iris_bo *surf_bo = iris_resource_bo(surf_state->res);
207    surf_state->offset += iris_bo_offset_from_base_address(surf_bo);
208 
209    const bool dataport = ssbo || !screen->compiler->indirect_ubos_use_sampler;
210 
211    isl_buffer_fill_state(&screen->isl_dev, map,
212                          .address = res->bo->address + res->offset +
213                                     buf->buffer_offset,
214                          .size_B = buf->buffer_size - res->offset,
215                          .format = dataport ? ISL_FORMAT_RAW
216                                             : ISL_FORMAT_R32G32B32A32_FLOAT,
217                          .swizzle = ISL_SWIZZLE_IDENTITY,
218                          .stride_B = 1,
219                          .mocs = iris_mocs(res->bo, &screen->isl_dev, usage));
220 }
221 
222 static nir_ssa_def *
get_aoa_deref_offset(nir_builder * b,nir_deref_instr * deref,unsigned elem_size)223 get_aoa_deref_offset(nir_builder *b,
224                      nir_deref_instr *deref,
225                      unsigned elem_size)
226 {
227    unsigned array_size = elem_size;
228    nir_ssa_def *offset = nir_imm_int(b, 0);
229 
230    while (deref->deref_type != nir_deref_type_var) {
231       assert(deref->deref_type == nir_deref_type_array);
232 
233       /* This level's element size is the previous level's array size */
234       nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
235       assert(deref->arr.index.ssa);
236       offset = nir_iadd(b, offset,
237                            nir_imul(b, index, nir_imm_int(b, array_size)));
238 
239       deref = nir_deref_instr_parent(deref);
240       assert(glsl_type_is_array(deref->type));
241       array_size *= glsl_get_length(deref->type);
242    }
243 
244    /* Accessing an invalid surface index with the dataport can result in a
245     * hang.  According to the spec "if the index used to select an individual
246     * element is negative or greater than or equal to the size of the array,
247     * the results of the operation are undefined but may not lead to
248     * termination" -- which is one of the possible outcomes of the hang.
249     * Clamp the index to prevent access outside of the array bounds.
250     */
251    return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
252 }
253 
254 static void
iris_lower_storage_image_derefs(nir_shader * nir)255 iris_lower_storage_image_derefs(nir_shader *nir)
256 {
257    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
258 
259    nir_builder b;
260    nir_builder_init(&b, impl);
261 
262    nir_foreach_block(block, impl) {
263       nir_foreach_instr_safe(instr, block) {
264          if (instr->type != nir_instr_type_intrinsic)
265             continue;
266 
267          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
268          switch (intrin->intrinsic) {
269          case nir_intrinsic_image_deref_load:
270          case nir_intrinsic_image_deref_store:
271          case nir_intrinsic_image_deref_atomic_add:
272          case nir_intrinsic_image_deref_atomic_imin:
273          case nir_intrinsic_image_deref_atomic_umin:
274          case nir_intrinsic_image_deref_atomic_imax:
275          case nir_intrinsic_image_deref_atomic_umax:
276          case nir_intrinsic_image_deref_atomic_and:
277          case nir_intrinsic_image_deref_atomic_or:
278          case nir_intrinsic_image_deref_atomic_xor:
279          case nir_intrinsic_image_deref_atomic_exchange:
280          case nir_intrinsic_image_deref_atomic_comp_swap:
281          case nir_intrinsic_image_deref_size:
282          case nir_intrinsic_image_deref_samples:
283          case nir_intrinsic_image_deref_load_raw_intel:
284          case nir_intrinsic_image_deref_store_raw_intel: {
285             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
286             nir_variable *var = nir_deref_instr_get_variable(deref);
287 
288             b.cursor = nir_before_instr(&intrin->instr);
289             nir_ssa_def *index =
290                nir_iadd(&b, nir_imm_int(&b, var->data.driver_location),
291                             get_aoa_deref_offset(&b, deref, 1));
292             nir_rewrite_image_intrinsic(intrin, index, false);
293             break;
294          }
295 
296          default:
297             break;
298          }
299       }
300    }
301 }
302 
303 static bool
iris_uses_image_atomic(const nir_shader * shader)304 iris_uses_image_atomic(const nir_shader *shader)
305 {
306    nir_foreach_function(function, shader) {
307       if (function->impl == NULL)
308          continue;
309 
310       nir_foreach_block(block, function->impl) {
311          nir_foreach_instr(instr, block) {
312             if (instr->type != nir_instr_type_intrinsic)
313                continue;
314 
315             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
316             switch (intrin->intrinsic) {
317             case nir_intrinsic_image_deref_atomic_add:
318             case nir_intrinsic_image_deref_atomic_imin:
319             case nir_intrinsic_image_deref_atomic_umin:
320             case nir_intrinsic_image_deref_atomic_imax:
321             case nir_intrinsic_image_deref_atomic_umax:
322             case nir_intrinsic_image_deref_atomic_and:
323             case nir_intrinsic_image_deref_atomic_or:
324             case nir_intrinsic_image_deref_atomic_xor:
325             case nir_intrinsic_image_deref_atomic_exchange:
326             case nir_intrinsic_image_deref_atomic_comp_swap:
327                unreachable("Should have been lowered in "
328                            "iris_lower_storage_image_derefs");
329 
330             case nir_intrinsic_image_atomic_add:
331             case nir_intrinsic_image_atomic_imin:
332             case nir_intrinsic_image_atomic_umin:
333             case nir_intrinsic_image_atomic_imax:
334             case nir_intrinsic_image_atomic_umax:
335             case nir_intrinsic_image_atomic_and:
336             case nir_intrinsic_image_atomic_or:
337             case nir_intrinsic_image_atomic_xor:
338             case nir_intrinsic_image_atomic_exchange:
339             case nir_intrinsic_image_atomic_comp_swap:
340                return true;
341 
342             default:
343                break;
344             }
345          }
346       }
347    }
348 
349    return false;
350 }
351 
352 /**
353  * Undo nir_lower_passthrough_edgeflags but keep the inputs_read flag.
354  */
355 static bool
iris_fix_edge_flags(nir_shader * nir)356 iris_fix_edge_flags(nir_shader *nir)
357 {
358    if (nir->info.stage != MESA_SHADER_VERTEX) {
359       nir_shader_preserve_all_metadata(nir);
360       return false;
361    }
362 
363    nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out,
364                                                        VARYING_SLOT_EDGE);
365    if (!var) {
366       nir_shader_preserve_all_metadata(nir);
367       return false;
368    }
369 
370    var->data.mode = nir_var_shader_temp;
371    nir->info.outputs_written &= ~VARYING_BIT_EDGE;
372    nir->info.inputs_read &= ~VERT_BIT_EDGEFLAG;
373    nir_fixup_deref_modes(nir);
374 
375    nir_foreach_function(f, nir) {
376       if (f->impl) {
377          nir_metadata_preserve(f->impl, nir_metadata_block_index |
378                                         nir_metadata_dominance |
379                                         nir_metadata_live_ssa_defs |
380                                         nir_metadata_loop_analysis);
381       } else {
382          nir_metadata_preserve(f->impl, nir_metadata_all);
383       }
384    }
385 
386    return true;
387 }
388 
389 /**
390  * Fix an uncompiled shader's stream output info.
391  *
392  * Core Gallium stores output->register_index as a "slot" number, where
393  * slots are assigned consecutively to all outputs in info->outputs_written.
394  * This naive packing of outputs doesn't work for us - we too have slots,
395  * but the layout is defined by the VUE map, which we won't have until we
396  * compile a specific shader variant.  So, we remap these and simply store
397  * VARYING_SLOT_* in our copy's output->register_index fields.
398  *
399  * We also fix up VARYING_SLOT_{LAYER,VIEWPORT,PSIZ} to select the Y/Z/W
400  * components of our VUE header.  See brw_vue_map.c for the layout.
401  */
402 static void
update_so_info(struct pipe_stream_output_info * so_info,uint64_t outputs_written)403 update_so_info(struct pipe_stream_output_info *so_info,
404                uint64_t outputs_written)
405 {
406    uint8_t reverse_map[64] = {};
407    unsigned slot = 0;
408    while (outputs_written) {
409       reverse_map[slot++] = u_bit_scan64(&outputs_written);
410    }
411 
412    for (unsigned i = 0; i < so_info->num_outputs; i++) {
413       struct pipe_stream_output *output = &so_info->output[i];
414 
415       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
416       output->register_index = reverse_map[output->register_index];
417 
418       /* The VUE header contains three scalar fields packed together:
419        * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
420        * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
421        * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
422        */
423       switch (output->register_index) {
424       case VARYING_SLOT_LAYER:
425          assert(output->num_components == 1);
426          output->register_index = VARYING_SLOT_PSIZ;
427          output->start_component = 1;
428          break;
429       case VARYING_SLOT_VIEWPORT:
430          assert(output->num_components == 1);
431          output->register_index = VARYING_SLOT_PSIZ;
432          output->start_component = 2;
433          break;
434       case VARYING_SLOT_PSIZ:
435          assert(output->num_components == 1);
436          output->start_component = 3;
437          break;
438       }
439 
440       //info->outputs_written |= 1ull << output->register_index;
441    }
442 }
443 
444 static void
setup_vec4_image_sysval(uint32_t * sysvals,uint32_t idx,unsigned offset,unsigned n)445 setup_vec4_image_sysval(uint32_t *sysvals, uint32_t idx,
446                         unsigned offset, unsigned n)
447 {
448    assert(offset % sizeof(uint32_t) == 0);
449 
450    for (unsigned i = 0; i < n; ++i)
451       sysvals[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
452 
453    for (unsigned i = n; i < 4; ++i)
454       sysvals[i] = BRW_PARAM_BUILTIN_ZERO;
455 }
456 
457 /**
458  * Associate NIR uniform variables with the prog_data->param[] mechanism
459  * used by the backend.  Also, decide which UBOs we'd like to push in an
460  * ideal situation (though the backend can reduce this).
461  */
462 static void
iris_setup_uniforms(const struct brw_compiler * compiler,void * mem_ctx,nir_shader * nir,struct brw_stage_prog_data * prog_data,unsigned kernel_input_size,enum brw_param_builtin ** out_system_values,unsigned * out_num_system_values,unsigned * out_num_cbufs)463 iris_setup_uniforms(const struct brw_compiler *compiler,
464                     void *mem_ctx,
465                     nir_shader *nir,
466                     struct brw_stage_prog_data *prog_data,
467                     unsigned kernel_input_size,
468                     enum brw_param_builtin **out_system_values,
469                     unsigned *out_num_system_values,
470                     unsigned *out_num_cbufs)
471 {
472    UNUSED const struct intel_device_info *devinfo = compiler->devinfo;
473 
474    unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));
475 
476    const unsigned IRIS_MAX_SYSTEM_VALUES =
477       PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
478    enum brw_param_builtin *system_values =
479       rzalloc_array(mem_ctx, enum brw_param_builtin, IRIS_MAX_SYSTEM_VALUES);
480    unsigned num_system_values = 0;
481 
482    unsigned patch_vert_idx = -1;
483    unsigned ucp_idx[IRIS_MAX_CLIP_PLANES];
484    unsigned img_idx[PIPE_MAX_SHADER_IMAGES];
485    unsigned variable_group_size_idx = -1;
486    unsigned work_dim_idx = -1;
487    memset(ucp_idx, -1, sizeof(ucp_idx));
488    memset(img_idx, -1, sizeof(img_idx));
489 
490    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
491 
492    nir_builder b;
493    nir_builder_init(&b, impl);
494 
495    b.cursor = nir_before_block(nir_start_block(impl));
496    nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
497 
498    /* Turn system value intrinsics into uniforms */
499    nir_foreach_block(block, impl) {
500       nir_foreach_instr_safe(instr, block) {
501          if (instr->type != nir_instr_type_intrinsic)
502             continue;
503 
504          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
505          nir_ssa_def *offset;
506 
507          switch (intrin->intrinsic) {
508          case nir_intrinsic_load_constant: {
509             unsigned load_size = intrin->dest.ssa.num_components *
510                                  intrin->dest.ssa.bit_size / 8;
511             unsigned load_align = intrin->dest.ssa.bit_size / 8;
512 
513             /* This one is special because it reads from the shader constant
514              * data and not cbuf0 which gallium uploads for us.
515              */
516             b.cursor = nir_instr_remove(&intrin->instr);
517 
518             nir_ssa_def *offset =
519                nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1),
520                                 nir_intrinsic_base(intrin));
521 
522             assert(load_size < b.shader->constant_data_size);
523             unsigned max_offset = b.shader->constant_data_size - load_size;
524             offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset));
525 
526             nir_ssa_def *const_data_base_addr = nir_pack_64_2x32_split(&b,
527                nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
528                nir_load_reloc_const_intel(&b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
529 
530             nir_ssa_def *data =
531                nir_load_global(&b, nir_iadd(&b, const_data_base_addr,
532                                                 nir_u2u64(&b, offset)),
533                                load_align,
534                                intrin->dest.ssa.num_components,
535                                intrin->dest.ssa.bit_size);
536 
537             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
538                                      data);
539             continue;
540          }
541          case nir_intrinsic_load_user_clip_plane: {
542             unsigned ucp = nir_intrinsic_ucp_id(intrin);
543 
544             if (ucp_idx[ucp] == -1) {
545                ucp_idx[ucp] = num_system_values;
546                num_system_values += 4;
547             }
548 
549             for (int i = 0; i < 4; i++) {
550                system_values[ucp_idx[ucp] + i] =
551                   BRW_PARAM_BUILTIN_CLIP_PLANE(ucp, i);
552             }
553 
554             b.cursor = nir_before_instr(instr);
555             offset = nir_imm_int(&b, system_values_start +
556                                      ucp_idx[ucp] * sizeof(uint32_t));
557             break;
558          }
559          case nir_intrinsic_load_patch_vertices_in:
560             if (patch_vert_idx == -1)
561                patch_vert_idx = num_system_values++;
562 
563             system_values[patch_vert_idx] =
564                BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
565 
566             b.cursor = nir_before_instr(instr);
567             offset = nir_imm_int(&b, system_values_start +
568                                      patch_vert_idx * sizeof(uint32_t));
569             break;
570          case nir_intrinsic_image_deref_load_param_intel: {
571             assert(devinfo->ver < 9);
572             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
573             nir_variable *var = nir_deref_instr_get_variable(deref);
574 
575             if (img_idx[var->data.binding] == -1) {
576                /* GL only allows arrays of arrays of images. */
577                assert(glsl_type_is_image(glsl_without_array(var->type)));
578                unsigned num_images = MAX2(1, glsl_get_aoa_size(var->type));
579 
580                for (int i = 0; i < num_images; i++) {
581                   const unsigned img = var->data.binding + i;
582 
583                   img_idx[img] = num_system_values;
584                   num_system_values += BRW_IMAGE_PARAM_SIZE;
585 
586                   uint32_t *img_sv = &system_values[img_idx[img]];
587 
588                   setup_vec4_image_sysval(
589                      img_sv + BRW_IMAGE_PARAM_OFFSET_OFFSET, img,
590                      offsetof(struct brw_image_param, offset), 2);
591                   setup_vec4_image_sysval(
592                      img_sv + BRW_IMAGE_PARAM_SIZE_OFFSET, img,
593                      offsetof(struct brw_image_param, size), 3);
594                   setup_vec4_image_sysval(
595                      img_sv + BRW_IMAGE_PARAM_STRIDE_OFFSET, img,
596                      offsetof(struct brw_image_param, stride), 4);
597                   setup_vec4_image_sysval(
598                      img_sv + BRW_IMAGE_PARAM_TILING_OFFSET, img,
599                      offsetof(struct brw_image_param, tiling), 3);
600                   setup_vec4_image_sysval(
601                      img_sv + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, img,
602                      offsetof(struct brw_image_param, swizzling), 2);
603                }
604             }
605 
606             b.cursor = nir_before_instr(instr);
607             offset = nir_iadd(&b,
608                get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
609                nir_imm_int(&b, system_values_start +
610                                img_idx[var->data.binding] * 4 +
611                                nir_intrinsic_base(intrin) * 16));
612             break;
613          }
614          case nir_intrinsic_load_workgroup_size: {
615             assert(nir->info.workgroup_size_variable);
616             if (variable_group_size_idx == -1) {
617                variable_group_size_idx = num_system_values;
618                num_system_values += 3;
619                for (int i = 0; i < 3; i++) {
620                   system_values[variable_group_size_idx + i] =
621                      BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i;
622                }
623             }
624 
625             b.cursor = nir_before_instr(instr);
626             offset = nir_imm_int(&b, system_values_start +
627                                      variable_group_size_idx * sizeof(uint32_t));
628             break;
629          }
630          case nir_intrinsic_load_work_dim: {
631             if (work_dim_idx == -1) {
632                work_dim_idx = num_system_values++;
633                system_values[work_dim_idx] = BRW_PARAM_BUILTIN_WORK_DIM;
634             }
635             b.cursor = nir_before_instr(instr);
636             offset = nir_imm_int(&b, system_values_start +
637                                      work_dim_idx * sizeof(uint32_t));
638             break;
639          }
640          case nir_intrinsic_load_kernel_input: {
641             assert(nir_intrinsic_base(intrin) +
642                    nir_intrinsic_range(intrin) <= kernel_input_size);
643             b.cursor = nir_before_instr(instr);
644             offset = nir_iadd_imm(&b, intrin->src[0].ssa,
645                                       nir_intrinsic_base(intrin));
646             break;
647          }
648          default:
649             continue;
650          }
651 
652          nir_ssa_def *load =
653             nir_load_ubo(&b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size,
654                          temp_ubo_name, offset,
655                          .align_mul = 4,
656                          .align_offset = 0,
657                          .range_base = 0,
658                          .range = ~0);
659 
660          nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
661                                   load);
662          nir_instr_remove(instr);
663       }
664    }
665 
666    nir_validate_shader(nir, "before remapping");
667 
668    /* Uniforms are stored in constant buffer 0, the
669     * user-facing UBOs are indexed by one.  So if any constant buffer is
670     * needed, the constant buffer 0 will be needed, so account for it.
671     */
672    unsigned num_cbufs = nir->info.num_ubos;
673    if (num_cbufs || nir->num_uniforms)
674       num_cbufs++;
675 
676    /* Place the new params in a new cbuf. */
677    if (num_system_values > 0 || kernel_input_size > 0) {
678       unsigned sysval_cbuf_index = num_cbufs;
679       num_cbufs++;
680 
681       system_values = reralloc(mem_ctx, system_values, enum brw_param_builtin,
682                                num_system_values);
683 
684       nir_foreach_block(block, impl) {
685          nir_foreach_instr_safe(instr, block) {
686             if (instr->type != nir_instr_type_intrinsic)
687                continue;
688 
689             nir_intrinsic_instr *load = nir_instr_as_intrinsic(instr);
690 
691             if (load->intrinsic != nir_intrinsic_load_ubo)
692                continue;
693 
694             b.cursor = nir_before_instr(instr);
695 
696             assert(load->src[0].is_ssa);
697 
698             if (load->src[0].ssa == temp_ubo_name) {
699                nir_ssa_def *imm = nir_imm_int(&b, sysval_cbuf_index);
700                nir_instr_rewrite_src(instr, &load->src[0],
701                                      nir_src_for_ssa(imm));
702             }
703          }
704       }
705 
706       /* We need to fold the new iadds for brw_nir_analyze_ubo_ranges */
707       nir_opt_constant_folding(nir);
708    } else {
709       ralloc_free(system_values);
710       system_values = NULL;
711    }
712 
713    assert(num_cbufs < PIPE_MAX_CONSTANT_BUFFERS);
714    nir_validate_shader(nir, "after remap");
715 
716    /* We don't use params[] but gallium leaves num_uniforms set.  We use this
717     * to detect when cbuf0 exists but we don't need it anymore when we get
718     * here.  Instead, zero it out so that the back-end doesn't get confused
719     * when nr_params * 4 != num_uniforms != nr_params * 4.
720     */
721    nir->num_uniforms = 0;
722 
723    *out_system_values = system_values;
724    *out_num_system_values = num_system_values;
725    *out_num_cbufs = num_cbufs;
726 }
727 
728 static const char *surface_group_names[] = {
729    [IRIS_SURFACE_GROUP_RENDER_TARGET]      = "render target",
730    [IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = "non-coherent render target read",
731    [IRIS_SURFACE_GROUP_CS_WORK_GROUPS]     = "CS work groups",
732    [IRIS_SURFACE_GROUP_TEXTURE]            = "texture",
733    [IRIS_SURFACE_GROUP_UBO]                = "ubo",
734    [IRIS_SURFACE_GROUP_SSBO]               = "ssbo",
735    [IRIS_SURFACE_GROUP_IMAGE]              = "image",
736 };
737 
738 static void
iris_print_binding_table(FILE * fp,const char * name,const struct iris_binding_table * bt)739 iris_print_binding_table(FILE *fp, const char *name,
740                          const struct iris_binding_table *bt)
741 {
742    STATIC_ASSERT(ARRAY_SIZE(surface_group_names) == IRIS_SURFACE_GROUP_COUNT);
743 
744    uint32_t total = 0;
745    uint32_t compacted = 0;
746 
747    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
748       uint32_t size = bt->sizes[i];
749       total += size;
750       if (size)
751          compacted += util_bitcount64(bt->used_mask[i]);
752    }
753 
754    if (total == 0) {
755       fprintf(fp, "Binding table for %s is empty\n\n", name);
756       return;
757    }
758 
759    if (total != compacted) {
760       fprintf(fp, "Binding table for %s "
761               "(compacted to %u entries from %u entries)\n",
762               name, compacted, total);
763    } else {
764       fprintf(fp, "Binding table for %s (%u entries)\n", name, total);
765    }
766 
767    uint32_t entry = 0;
768    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
769       uint64_t mask = bt->used_mask[i];
770       while (mask) {
771          int index = u_bit_scan64(&mask);
772          fprintf(fp, "  [%u] %s #%d\n", entry++, surface_group_names[i], index);
773       }
774    }
775    fprintf(fp, "\n");
776 }
777 
778 enum {
779    /* Max elements in a surface group. */
780    SURFACE_GROUP_MAX_ELEMENTS = 64,
781 };
782 
783 /**
784  * Map a <group, index> pair to a binding table index.
785  *
786  * For example: <UBO, 5> => binding table index 12
787  */
788 uint32_t
iris_group_index_to_bti(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t index)789 iris_group_index_to_bti(const struct iris_binding_table *bt,
790                         enum iris_surface_group group, uint32_t index)
791 {
792    assert(index < bt->sizes[group]);
793    uint64_t mask = bt->used_mask[group];
794    uint64_t bit = 1ull << index;
795    if (bit & mask) {
796       return bt->offsets[group] + util_bitcount64((bit - 1) & mask);
797    } else {
798       return IRIS_SURFACE_NOT_USED;
799    }
800 }
801 
802 /**
803  * Map a binding table index back to a <group, index> pair.
804  *
805  * For example: binding table index 12 => <UBO, 5>
806  */
807 uint32_t
iris_bti_to_group_index(const struct iris_binding_table * bt,enum iris_surface_group group,uint32_t bti)808 iris_bti_to_group_index(const struct iris_binding_table *bt,
809                         enum iris_surface_group group, uint32_t bti)
810 {
811    uint64_t used_mask = bt->used_mask[group];
812    assert(bti >= bt->offsets[group]);
813 
814    uint32_t c = bti - bt->offsets[group];
815    while (used_mask) {
816       int i = u_bit_scan64(&used_mask);
817       if (c == 0)
818          return i;
819       c--;
820    }
821 
822    return IRIS_SURFACE_NOT_USED;
823 }
824 
825 static void
rewrite_src_with_bti(nir_builder * b,struct iris_binding_table * bt,nir_instr * instr,nir_src * src,enum iris_surface_group group)826 rewrite_src_with_bti(nir_builder *b, struct iris_binding_table *bt,
827                      nir_instr *instr, nir_src *src,
828                      enum iris_surface_group group)
829 {
830    assert(bt->sizes[group] > 0);
831 
832    b->cursor = nir_before_instr(instr);
833    nir_ssa_def *bti;
834    if (nir_src_is_const(*src)) {
835       uint32_t index = nir_src_as_uint(*src);
836       bti = nir_imm_intN_t(b, iris_group_index_to_bti(bt, group, index),
837                            src->ssa->bit_size);
838    } else {
839       /* Indirect usage makes all the surfaces of the group to be available,
840        * so we can just add the base.
841        */
842       assert(bt->used_mask[group] == BITFIELD64_MASK(bt->sizes[group]));
843       bti = nir_iadd_imm(b, src->ssa, bt->offsets[group]);
844    }
845    nir_instr_rewrite_src(instr, src, nir_src_for_ssa(bti));
846 }
847 
848 static void
mark_used_with_src(struct iris_binding_table * bt,nir_src * src,enum iris_surface_group group)849 mark_used_with_src(struct iris_binding_table *bt, nir_src *src,
850                    enum iris_surface_group group)
851 {
852    assert(bt->sizes[group] > 0);
853 
854    if (nir_src_is_const(*src)) {
855       uint64_t index = nir_src_as_uint(*src);
856       assert(index < bt->sizes[group]);
857       bt->used_mask[group] |= 1ull << index;
858    } else {
859       /* There's an indirect usage, we need all the surfaces. */
860       bt->used_mask[group] = BITFIELD64_MASK(bt->sizes[group]);
861    }
862 }
863 
864 static bool
skip_compacting_binding_tables(void)865 skip_compacting_binding_tables(void)
866 {
867    static int skip = -1;
868    if (skip < 0)
869       skip = env_var_as_boolean("INTEL_DISABLE_COMPACT_BINDING_TABLE", false);
870    return skip;
871 }
872 
873 /**
874  * Set up the binding table indices and apply to the shader.
875  */
876 static void
iris_setup_binding_table(const struct intel_device_info * devinfo,struct nir_shader * nir,struct iris_binding_table * bt,unsigned num_render_targets,unsigned num_system_values,unsigned num_cbufs)877 iris_setup_binding_table(const struct intel_device_info *devinfo,
878                          struct nir_shader *nir,
879                          struct iris_binding_table *bt,
880                          unsigned num_render_targets,
881                          unsigned num_system_values,
882                          unsigned num_cbufs)
883 {
884    const struct shader_info *info = &nir->info;
885 
886    memset(bt, 0, sizeof(*bt));
887 
888    /* Set the sizes for each surface group.  For some groups, we already know
889     * upfront how many will be used, so mark them.
890     */
891    if (info->stage == MESA_SHADER_FRAGMENT) {
892       bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET] = num_render_targets;
893       /* All render targets used. */
894       bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET] =
895          BITFIELD64_MASK(num_render_targets);
896 
897       /* Setup render target read surface group in order to support non-coherent
898        * framebuffer fetch on Gfx8
899        */
900       if (devinfo->ver == 8 && info->outputs_read) {
901          bt->sizes[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] = num_render_targets;
902          bt->used_mask[IRIS_SURFACE_GROUP_RENDER_TARGET_READ] =
903             BITFIELD64_MASK(num_render_targets);
904       }
905    } else if (info->stage == MESA_SHADER_COMPUTE) {
906       bt->sizes[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
907    }
908 
909    bt->sizes[IRIS_SURFACE_GROUP_TEXTURE] = BITSET_LAST_BIT(info->textures_used);
910    bt->used_mask[IRIS_SURFACE_GROUP_TEXTURE] = info->textures_used[0];
911 
912    bt->sizes[IRIS_SURFACE_GROUP_IMAGE] = info->num_images;
913 
914    /* Allocate an extra slot in the UBO section for NIR constants.
915     * Binding table compaction will remove it if unnecessary.
916     *
917     * We don't include them in iris_compiled_shader::num_cbufs because
918     * they are uploaded separately from shs->constbuf[], but from a shader
919     * point of view, they're another UBO (at the end of the section).
920     */
921    bt->sizes[IRIS_SURFACE_GROUP_UBO] = num_cbufs + 1;
922 
923    bt->sizes[IRIS_SURFACE_GROUP_SSBO] = info->num_ssbos;
924 
925    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
926       assert(bt->sizes[i] <= SURFACE_GROUP_MAX_ELEMENTS);
927 
928    /* Mark surfaces used for the cases we don't have the information available
929     * upfront.
930     */
931    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
932    nir_foreach_block (block, impl) {
933       nir_foreach_instr (instr, block) {
934          if (instr->type != nir_instr_type_intrinsic)
935             continue;
936 
937          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
938          switch (intrin->intrinsic) {
939          case nir_intrinsic_load_num_workgroups:
940             bt->used_mask[IRIS_SURFACE_GROUP_CS_WORK_GROUPS] = 1;
941             break;
942 
943          case nir_intrinsic_load_output:
944             if (devinfo->ver == 8) {
945                mark_used_with_src(bt, &intrin->src[0],
946                                   IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
947             }
948             break;
949 
950          case nir_intrinsic_image_size:
951          case nir_intrinsic_image_load:
952          case nir_intrinsic_image_store:
953          case nir_intrinsic_image_atomic_add:
954          case nir_intrinsic_image_atomic_imin:
955          case nir_intrinsic_image_atomic_umin:
956          case nir_intrinsic_image_atomic_imax:
957          case nir_intrinsic_image_atomic_umax:
958          case nir_intrinsic_image_atomic_and:
959          case nir_intrinsic_image_atomic_or:
960          case nir_intrinsic_image_atomic_xor:
961          case nir_intrinsic_image_atomic_exchange:
962          case nir_intrinsic_image_atomic_comp_swap:
963          case nir_intrinsic_image_load_raw_intel:
964          case nir_intrinsic_image_store_raw_intel:
965             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_IMAGE);
966             break;
967 
968          case nir_intrinsic_load_ubo:
969             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_UBO);
970             break;
971 
972          case nir_intrinsic_store_ssbo:
973             mark_used_with_src(bt, &intrin->src[1], IRIS_SURFACE_GROUP_SSBO);
974             break;
975 
976          case nir_intrinsic_get_ssbo_size:
977          case nir_intrinsic_ssbo_atomic_add:
978          case nir_intrinsic_ssbo_atomic_imin:
979          case nir_intrinsic_ssbo_atomic_umin:
980          case nir_intrinsic_ssbo_atomic_imax:
981          case nir_intrinsic_ssbo_atomic_umax:
982          case nir_intrinsic_ssbo_atomic_and:
983          case nir_intrinsic_ssbo_atomic_or:
984          case nir_intrinsic_ssbo_atomic_xor:
985          case nir_intrinsic_ssbo_atomic_exchange:
986          case nir_intrinsic_ssbo_atomic_comp_swap:
987          case nir_intrinsic_ssbo_atomic_fmin:
988          case nir_intrinsic_ssbo_atomic_fmax:
989          case nir_intrinsic_ssbo_atomic_fcomp_swap:
990          case nir_intrinsic_load_ssbo:
991             mark_used_with_src(bt, &intrin->src[0], IRIS_SURFACE_GROUP_SSBO);
992             break;
993 
994          default:
995             break;
996          }
997       }
998    }
999 
1000    /* When disable we just mark everything as used. */
1001    if (unlikely(skip_compacting_binding_tables())) {
1002       for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++)
1003          bt->used_mask[i] = BITFIELD64_MASK(bt->sizes[i]);
1004    }
1005 
1006    /* Calculate the offsets and the binding table size based on the used
1007     * surfaces.  After this point, the functions to go between "group indices"
1008     * and binding table indices can be used.
1009     */
1010    uint32_t next = 0;
1011    for (int i = 0; i < IRIS_SURFACE_GROUP_COUNT; i++) {
1012       if (bt->used_mask[i] != 0) {
1013          bt->offsets[i] = next;
1014          next += util_bitcount64(bt->used_mask[i]);
1015       }
1016    }
1017    bt->size_bytes = next * 4;
1018 
1019    if (INTEL_DEBUG(DEBUG_BT)) {
1020       iris_print_binding_table(stderr, gl_shader_stage_name(info->stage), bt);
1021    }
1022 
1023    /* Apply the binding table indices.  The backend compiler is not expected
1024     * to change those, as we haven't set any of the *_start entries in brw
1025     * binding_table.
1026     */
1027    nir_builder b;
1028    nir_builder_init(&b, impl);
1029 
1030    nir_foreach_block (block, impl) {
1031       nir_foreach_instr (instr, block) {
1032          if (instr->type == nir_instr_type_tex) {
1033             nir_tex_instr *tex = nir_instr_as_tex(instr);
1034             tex->texture_index =
1035                iris_group_index_to_bti(bt, IRIS_SURFACE_GROUP_TEXTURE,
1036                                        tex->texture_index);
1037             continue;
1038          }
1039 
1040          if (instr->type != nir_instr_type_intrinsic)
1041             continue;
1042 
1043          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1044          switch (intrin->intrinsic) {
1045          case nir_intrinsic_image_size:
1046          case nir_intrinsic_image_load:
1047          case nir_intrinsic_image_store:
1048          case nir_intrinsic_image_atomic_add:
1049          case nir_intrinsic_image_atomic_imin:
1050          case nir_intrinsic_image_atomic_umin:
1051          case nir_intrinsic_image_atomic_imax:
1052          case nir_intrinsic_image_atomic_umax:
1053          case nir_intrinsic_image_atomic_and:
1054          case nir_intrinsic_image_atomic_or:
1055          case nir_intrinsic_image_atomic_xor:
1056          case nir_intrinsic_image_atomic_exchange:
1057          case nir_intrinsic_image_atomic_comp_swap:
1058          case nir_intrinsic_image_load_raw_intel:
1059          case nir_intrinsic_image_store_raw_intel:
1060             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1061                                  IRIS_SURFACE_GROUP_IMAGE);
1062             break;
1063 
1064          case nir_intrinsic_load_ubo:
1065             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1066                                  IRIS_SURFACE_GROUP_UBO);
1067             break;
1068 
1069          case nir_intrinsic_store_ssbo:
1070             rewrite_src_with_bti(&b, bt, instr, &intrin->src[1],
1071                                  IRIS_SURFACE_GROUP_SSBO);
1072             break;
1073 
1074          case nir_intrinsic_load_output:
1075             if (devinfo->ver == 8) {
1076                rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1077                                     IRIS_SURFACE_GROUP_RENDER_TARGET_READ);
1078             }
1079             break;
1080 
1081          case nir_intrinsic_get_ssbo_size:
1082          case nir_intrinsic_ssbo_atomic_add:
1083          case nir_intrinsic_ssbo_atomic_imin:
1084          case nir_intrinsic_ssbo_atomic_umin:
1085          case nir_intrinsic_ssbo_atomic_imax:
1086          case nir_intrinsic_ssbo_atomic_umax:
1087          case nir_intrinsic_ssbo_atomic_and:
1088          case nir_intrinsic_ssbo_atomic_or:
1089          case nir_intrinsic_ssbo_atomic_xor:
1090          case nir_intrinsic_ssbo_atomic_exchange:
1091          case nir_intrinsic_ssbo_atomic_comp_swap:
1092          case nir_intrinsic_ssbo_atomic_fmin:
1093          case nir_intrinsic_ssbo_atomic_fmax:
1094          case nir_intrinsic_ssbo_atomic_fcomp_swap:
1095          case nir_intrinsic_load_ssbo:
1096             rewrite_src_with_bti(&b, bt, instr, &intrin->src[0],
1097                                  IRIS_SURFACE_GROUP_SSBO);
1098             break;
1099 
1100          default:
1101             break;
1102          }
1103       }
1104    }
1105 }
1106 
1107 static void
iris_debug_recompile(struct iris_screen * screen,struct pipe_debug_callback * dbg,struct iris_uncompiled_shader * ish,const struct brw_base_prog_key * key)1108 iris_debug_recompile(struct iris_screen *screen,
1109                      struct pipe_debug_callback *dbg,
1110                      struct iris_uncompiled_shader *ish,
1111                      const struct brw_base_prog_key *key)
1112 {
1113    if (!ish || list_is_empty(&ish->variants)
1114             || list_is_singular(&ish->variants))
1115       return;
1116 
1117    const struct intel_device_info *devinfo = &screen->devinfo;
1118    const struct brw_compiler *c = screen->compiler;
1119    const struct shader_info *info = &ish->nir->info;
1120 
1121    brw_shader_perf_log(c, dbg, "Recompiling %s shader for program %s: %s\n",
1122                        _mesa_shader_stage_to_string(info->stage),
1123                        info->name ? info->name : "(no identifier)",
1124                        info->label ? info->label : "");
1125 
1126    struct iris_compiled_shader *shader =
1127       list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1128    const void *old_iris_key = &shader->key;
1129 
1130    union brw_any_prog_key old_key;
1131 
1132    switch (info->stage) {
1133    case MESA_SHADER_VERTEX:
1134       old_key.vs = iris_to_brw_vs_key(devinfo, old_iris_key);
1135       break;
1136    case MESA_SHADER_TESS_CTRL:
1137       old_key.tcs = iris_to_brw_tcs_key(devinfo, old_iris_key);
1138       break;
1139    case MESA_SHADER_TESS_EVAL:
1140       old_key.tes = iris_to_brw_tes_key(devinfo, old_iris_key);
1141       break;
1142    case MESA_SHADER_GEOMETRY:
1143       old_key.gs = iris_to_brw_gs_key(devinfo, old_iris_key);
1144       break;
1145    case MESA_SHADER_FRAGMENT:
1146       old_key.wm = iris_to_brw_fs_key(devinfo, old_iris_key);
1147       break;
1148    case MESA_SHADER_COMPUTE:
1149       old_key.cs = iris_to_brw_cs_key(devinfo, old_iris_key);
1150       break;
1151    default:
1152       unreachable("invalid shader stage");
1153    }
1154 
1155    brw_debug_key_recompile(c, dbg, info->stage, &old_key.base, key);
1156 }
1157 
1158 static void
check_urb_size(struct iris_context * ice,unsigned needed_size,gl_shader_stage stage)1159 check_urb_size(struct iris_context *ice,
1160                unsigned needed_size,
1161                gl_shader_stage stage)
1162 {
1163    unsigned last_allocated_size = ice->shaders.urb.size[stage];
1164 
1165    /* If the last URB allocation wasn't large enough for our needs,
1166     * flag it as needing to be reconfigured.  Otherwise, we can use
1167     * the existing config.  However, if the URB is constrained, and
1168     * we can shrink our size for this stage, we may be able to gain
1169     * extra concurrency by reconfiguring it to be smaller.  Do so.
1170     */
1171    if (last_allocated_size < needed_size ||
1172        (ice->shaders.urb.constrained && last_allocated_size > needed_size)) {
1173       ice->state.dirty |= IRIS_DIRTY_URB;
1174    }
1175 }
1176 
1177 /**
1178  * Get the shader for the last enabled geometry stage.
1179  *
1180  * This stage is the one which will feed stream output and the rasterizer.
1181  */
1182 static gl_shader_stage
last_vue_stage(struct iris_context * ice)1183 last_vue_stage(struct iris_context *ice)
1184 {
1185    if (ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
1186       return MESA_SHADER_GEOMETRY;
1187 
1188    if (ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
1189       return MESA_SHADER_TESS_EVAL;
1190 
1191    return MESA_SHADER_VERTEX;
1192 }
1193 
1194 /**
1195  * \param added  Set to \c true if the variant was added to the list (i.e., a
1196  *               variant matching \c key was not found).  Set to \c false
1197  *               otherwise.
1198  */
1199 static inline struct iris_compiled_shader *
find_or_add_variant(const struct iris_screen * screen,struct iris_uncompiled_shader * ish,enum iris_program_cache_id cache_id,const void * key,unsigned key_size,bool * added)1200 find_or_add_variant(const struct iris_screen *screen,
1201                     struct iris_uncompiled_shader *ish,
1202                     enum iris_program_cache_id cache_id,
1203                     const void *key, unsigned key_size,
1204                     bool *added)
1205 {
1206    struct list_head *start = ish->variants.next;
1207 
1208    *added = false;
1209 
1210    if (screen->precompile) {
1211       /* Check the first list entry.  There will always be at least one
1212        * variant in the list (most likely the precompile variant), and
1213        * other contexts only append new variants, so we can safely check
1214        * it without locking, saving that cost in the common case.
1215        */
1216       struct iris_compiled_shader *first =
1217          list_first_entry(&ish->variants, struct iris_compiled_shader, link);
1218 
1219       if (memcmp(&first->key, key, key_size) == 0) {
1220          util_queue_fence_wait(&first->ready);
1221          return first;
1222       }
1223 
1224       /* Skip this one in the loop below */
1225       start = first->link.next;
1226    }
1227 
1228    struct iris_compiled_shader *variant = NULL;
1229 
1230    /* If it doesn't match, we have to walk the list; other contexts may be
1231     * concurrently appending shaders to it, so we need to lock here.
1232     */
1233    simple_mtx_lock(&ish->lock);
1234 
1235    list_for_each_entry_from(struct iris_compiled_shader, v, start,
1236                             &ish->variants, link) {
1237       if (memcmp(&v->key, key, key_size) == 0) {
1238          variant = v;
1239          break;
1240       }
1241    }
1242 
1243    if (variant == NULL) {
1244       variant = iris_create_shader_variant(screen, NULL, cache_id,
1245                                            key_size, key);
1246 
1247       /* Append our new variant to the shader's variant list. */
1248       list_addtail(&variant->link, &ish->variants);
1249       *added = true;
1250 
1251       simple_mtx_unlock(&ish->lock);
1252    } else {
1253       simple_mtx_unlock(&ish->lock);
1254 
1255       util_queue_fence_wait(&variant->ready);
1256    }
1257 
1258    return variant;
1259 }
1260 
1261 static void
iris_threaded_compile_job_delete(void * _job,UNUSED void * _gdata,UNUSED int thread_index)1262 iris_threaded_compile_job_delete(void *_job, UNUSED void *_gdata,
1263                                  UNUSED int thread_index)
1264 {
1265    free(_job);
1266 }
1267 
1268 static void
iris_schedule_compile(struct iris_screen * screen,struct util_queue_fence * ready_fence,struct pipe_debug_callback * dbg,struct iris_threaded_compile_job * job,util_queue_execute_func execute)1269 iris_schedule_compile(struct iris_screen *screen,
1270                       struct util_queue_fence *ready_fence,
1271                       struct pipe_debug_callback *dbg,
1272                       struct iris_threaded_compile_job *job,
1273                       util_queue_execute_func execute)
1274 
1275 {
1276    util_queue_fence_init(ready_fence);
1277 
1278    struct util_async_debug_callback async_debug;
1279 
1280    if (dbg) {
1281       u_async_debug_init(&async_debug);
1282       job->dbg = &async_debug.base;
1283    }
1284 
1285    util_queue_add_job(&screen->shader_compiler_queue, job, ready_fence, execute,
1286                       iris_threaded_compile_job_delete, 0);
1287 
1288    if (screen->driconf.sync_compile || dbg)
1289       util_queue_fence_wait(ready_fence);
1290 
1291    if (dbg) {
1292       u_async_debug_drain(&async_debug, dbg);
1293       u_async_debug_cleanup(&async_debug);
1294    }
1295 }
1296 
1297 /**
1298  * Compile a vertex shader, and upload the assembly.
1299  */
1300 static void
iris_compile_vs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct pipe_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1301 iris_compile_vs(struct iris_screen *screen,
1302                 struct u_upload_mgr *uploader,
1303                 struct pipe_debug_callback *dbg,
1304                 struct iris_uncompiled_shader *ish,
1305                 struct iris_compiled_shader *shader)
1306 {
1307    const struct brw_compiler *compiler = screen->compiler;
1308    const struct intel_device_info *devinfo = &screen->devinfo;
1309    void *mem_ctx = ralloc_context(NULL);
1310    struct brw_vs_prog_data *vs_prog_data =
1311       rzalloc(mem_ctx, struct brw_vs_prog_data);
1312    struct brw_vue_prog_data *vue_prog_data = &vs_prog_data->base;
1313    struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1314    enum brw_param_builtin *system_values;
1315    unsigned num_system_values;
1316    unsigned num_cbufs;
1317 
1318    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1319    const struct iris_vs_prog_key *const key = &shader->key.vs;
1320 
1321    if (key->vue.nr_userclip_plane_consts) {
1322       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1323       nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1324                         true, false, NULL);
1325       nir_lower_io_to_temporaries(nir, impl, true, false);
1326       nir_lower_global_vars_to_local(nir);
1327       nir_lower_vars_to_ssa(nir);
1328       nir_shader_gather_info(nir, impl);
1329    }
1330 
1331    prog_data->use_alt_mode = nir->info.is_arb_asm;
1332 
1333    iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1334                        &num_system_values, &num_cbufs);
1335 
1336    struct iris_binding_table bt;
1337    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1338                             num_system_values, num_cbufs);
1339 
1340    brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1341 
1342    brw_compute_vue_map(devinfo,
1343                        &vue_prog_data->vue_map, nir->info.outputs_written,
1344                        nir->info.separate_shader, /* pos_slots */ 1);
1345 
1346    struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(devinfo, key);
1347 
1348    struct brw_compile_vs_params params = {
1349       .nir = nir,
1350       .key = &brw_key,
1351       .prog_data = vs_prog_data,
1352       .log_data = dbg,
1353    };
1354 
1355    const unsigned *program = brw_compile_vs(compiler, mem_ctx, &params);
1356    if (program == NULL) {
1357       dbg_printf("Failed to compile vertex shader: %s\n", params.error_str);
1358       ralloc_free(mem_ctx);
1359 
1360       shader->compilation_failed = true;
1361       util_queue_fence_signal(&shader->ready);
1362 
1363       return;
1364    }
1365 
1366    shader->compilation_failed = false;
1367 
1368    iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1369 
1370    uint32_t *so_decls =
1371       screen->vtbl.create_so_decl_list(&ish->stream_output,
1372                                     &vue_prog_data->vue_map);
1373 
1374    iris_finalize_program(shader, prog_data, so_decls, system_values,
1375                          num_system_values, 0, num_cbufs, &bt);
1376 
1377    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_VS,
1378                       sizeof(*key), key, program);
1379 
1380    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1381 
1382    ralloc_free(mem_ctx);
1383 }
1384 
1385 /**
1386  * Update the current vertex shader variant.
1387  *
1388  * Fill out the key, look in the cache, compile and bind if needed.
1389  */
1390 static void
iris_update_compiled_vs(struct iris_context * ice)1391 iris_update_compiled_vs(struct iris_context *ice)
1392 {
1393    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1394    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_VERTEX];
1395    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1396    struct iris_uncompiled_shader *ish =
1397       ice->shaders.uncompiled[MESA_SHADER_VERTEX];
1398 
1399    struct iris_vs_prog_key key = { KEY_ID(vue.base) };
1400    screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1401 
1402    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
1403    bool added;
1404    struct iris_compiled_shader *shader =
1405       find_or_add_variant(screen, ish, IRIS_CACHE_VS, &key, sizeof(key), &added);
1406 
1407    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1408                                           &key, sizeof(key))) {
1409       iris_compile_vs(screen, uploader, &ice->dbg, ish, shader);
1410    }
1411 
1412    if (shader->compilation_failed)
1413       shader = NULL;
1414 
1415    if (old != shader) {
1416       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_VERTEX],
1417                                     shader);
1418       ice->state.dirty |= IRIS_DIRTY_VF_SGVS;
1419       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_VS |
1420                                 IRIS_STAGE_DIRTY_BINDINGS_VS |
1421                                 IRIS_STAGE_DIRTY_CONSTANTS_VS;
1422       shs->sysvals_need_upload = true;
1423 
1424       unsigned urb_entry_size = shader ?
1425          ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1426       check_urb_size(ice, urb_entry_size, MESA_SHADER_VERTEX);
1427    }
1428 }
1429 
1430 /**
1431  * Get the shader_info for a given stage, or NULL if the stage is disabled.
1432  */
1433 const struct shader_info *
iris_get_shader_info(const struct iris_context * ice,gl_shader_stage stage)1434 iris_get_shader_info(const struct iris_context *ice, gl_shader_stage stage)
1435 {
1436    const struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
1437 
1438    if (!ish)
1439       return NULL;
1440 
1441    const nir_shader *nir = ish->nir;
1442    return &nir->info;
1443 }
1444 
1445 /**
1446  * Get the union of TCS output and TES input slots.
1447  *
1448  * TCS and TES need to agree on a common URB entry layout.  In particular,
1449  * the data for all patch vertices is stored in a single URB entry (unlike
1450  * GS which has one entry per input vertex).  This means that per-vertex
1451  * array indexing needs a stride.
1452  *
1453  * SSO requires locations to match, but doesn't require the number of
1454  * outputs/inputs to match (in fact, the TCS often has extra outputs).
1455  * So, we need to take the extra step of unifying these on the fly.
1456  */
1457 static void
get_unified_tess_slots(const struct iris_context * ice,uint64_t * per_vertex_slots,uint32_t * per_patch_slots)1458 get_unified_tess_slots(const struct iris_context *ice,
1459                        uint64_t *per_vertex_slots,
1460                        uint32_t *per_patch_slots)
1461 {
1462    const struct shader_info *tcs =
1463       iris_get_shader_info(ice, MESA_SHADER_TESS_CTRL);
1464    const struct shader_info *tes =
1465       iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1466 
1467    *per_vertex_slots = tes->inputs_read;
1468    *per_patch_slots = tes->patch_inputs_read;
1469 
1470    if (tcs) {
1471       *per_vertex_slots |= tcs->outputs_written;
1472       *per_patch_slots |= tcs->patch_outputs_written;
1473    }
1474 }
1475 
1476 /**
1477  * Compile a tessellation control shader, and upload the assembly.
1478  */
1479 static void
iris_compile_tcs(struct iris_screen * screen,struct hash_table * passthrough_ht,struct u_upload_mgr * uploader,struct pipe_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1480 iris_compile_tcs(struct iris_screen *screen,
1481                  struct hash_table *passthrough_ht,
1482                  struct u_upload_mgr *uploader,
1483                  struct pipe_debug_callback *dbg,
1484                  struct iris_uncompiled_shader *ish,
1485                  struct iris_compiled_shader *shader)
1486 {
1487    const struct brw_compiler *compiler = screen->compiler;
1488    const struct nir_shader_compiler_options *options =
1489       compiler->glsl_compiler_options[MESA_SHADER_TESS_CTRL].NirOptions;
1490    void *mem_ctx = ralloc_context(NULL);
1491    struct brw_tcs_prog_data *tcs_prog_data =
1492       rzalloc(mem_ctx, struct brw_tcs_prog_data);
1493    struct brw_vue_prog_data *vue_prog_data = &tcs_prog_data->base;
1494    struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1495    const struct intel_device_info *devinfo = &screen->devinfo;
1496    enum brw_param_builtin *system_values = NULL;
1497    unsigned num_system_values = 0;
1498    unsigned num_cbufs = 0;
1499 
1500    nir_shader *nir;
1501 
1502    struct iris_binding_table bt;
1503 
1504    const struct iris_tcs_prog_key *const key = &shader->key.tcs;
1505    struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(devinfo, key);
1506 
1507    if (ish) {
1508       nir = nir_shader_clone(mem_ctx, ish->nir);
1509 
1510       iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1511                           &num_system_values, &num_cbufs);
1512       iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1513                                num_system_values, num_cbufs);
1514       brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1515    } else {
1516       nir =
1517          brw_nir_create_passthrough_tcs(mem_ctx, compiler, options, &brw_key);
1518 
1519       /* Reserve space for passing the default tess levels as constants. */
1520       num_cbufs = 1;
1521       num_system_values = 8;
1522       system_values =
1523          rzalloc_array(mem_ctx, enum brw_param_builtin, num_system_values);
1524       prog_data->param = rzalloc_array(mem_ctx, uint32_t, num_system_values);
1525       prog_data->nr_params = num_system_values;
1526 
1527       if (key->tes_primitive_mode == GL_QUADS) {
1528          for (int i = 0; i < 4; i++)
1529             system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1530 
1531          system_values[3] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1532          system_values[2] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y;
1533       } else if (key->tes_primitive_mode == GL_TRIANGLES) {
1534          for (int i = 0; i < 3; i++)
1535             system_values[7 - i] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X + i;
1536 
1537          system_values[4] = BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_X;
1538       } else {
1539          assert(key->tes_primitive_mode == GL_ISOLINES);
1540          system_values[7] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_Y;
1541          system_values[6] = BRW_PARAM_BUILTIN_TESS_LEVEL_OUTER_X;
1542       }
1543 
1544       /* Manually setup the TCS binding table. */
1545       memset(&bt, 0, sizeof(bt));
1546       bt.sizes[IRIS_SURFACE_GROUP_UBO] = 1;
1547       bt.used_mask[IRIS_SURFACE_GROUP_UBO] = 1;
1548       bt.size_bytes = 4;
1549 
1550       prog_data->ubo_ranges[0].length = 1;
1551    }
1552 
1553    char *error_str = NULL;
1554    const unsigned *program =
1555       brw_compile_tcs(compiler, dbg, mem_ctx, &brw_key, tcs_prog_data,
1556                       nir, -1, NULL, &error_str);
1557    if (program == NULL) {
1558       dbg_printf("Failed to compile control shader: %s\n", error_str);
1559       ralloc_free(mem_ctx);
1560 
1561       shader->compilation_failed = true;
1562       util_queue_fence_signal(&shader->ready);
1563 
1564       return;
1565    }
1566 
1567    shader->compilation_failed = false;
1568 
1569    iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1570 
1571    iris_finalize_program(shader, prog_data, NULL, system_values,
1572                          num_system_values, 0, num_cbufs, &bt);
1573 
1574    iris_upload_shader(screen, ish, shader, passthrough_ht, uploader,
1575                       IRIS_CACHE_TCS, sizeof(*key), key, program);
1576 
1577    if (ish)
1578       iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1579 
1580    ralloc_free(mem_ctx);
1581 }
1582 
1583 /**
1584  * Update the current tessellation control shader variant.
1585  *
1586  * Fill out the key, look in the cache, compile and bind if needed.
1587  */
1588 static void
iris_update_compiled_tcs(struct iris_context * ice)1589 iris_update_compiled_tcs(struct iris_context *ice)
1590 {
1591    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_CTRL];
1592    struct iris_uncompiled_shader *tcs =
1593       ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL];
1594    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1595    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1596    const struct brw_compiler *compiler = screen->compiler;
1597    const struct intel_device_info *devinfo = &screen->devinfo;
1598 
1599    const struct shader_info *tes_info =
1600       iris_get_shader_info(ice, MESA_SHADER_TESS_EVAL);
1601    struct iris_tcs_prog_key key = {
1602       .vue.base.program_string_id = tcs ? tcs->program_id : 0,
1603       .tes_primitive_mode = tes_info->tess.primitive_mode,
1604       .input_vertices =
1605          !tcs || compiler->use_tcs_8_patch ? ice->state.vertices_per_patch : 0,
1606       .quads_workaround = devinfo->ver < 9 &&
1607                           tes_info->tess.primitive_mode == GL_QUADS &&
1608                           tes_info->tess.spacing == TESS_SPACING_EQUAL,
1609    };
1610    get_unified_tess_slots(ice, &key.outputs_written,
1611                           &key.patch_outputs_written);
1612    screen->vtbl.populate_tcs_key(ice, &key);
1613 
1614    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TCS];
1615    struct iris_compiled_shader *shader;
1616    bool added = false;
1617 
1618    if (tcs != NULL) {
1619       shader = find_or_add_variant(screen, tcs, IRIS_CACHE_TCS, &key,
1620                                    sizeof(key), &added);
1621    } else {
1622       /* Look for and possibly create a passthrough TCS */
1623       shader = iris_find_cached_shader(ice, IRIS_CACHE_TCS, sizeof(key), &key);
1624 
1625 
1626       if (shader == NULL) {
1627          shader = iris_create_shader_variant(screen, ice->shaders.cache,
1628                                              IRIS_CACHE_TCS, sizeof(key), &key);
1629          added = true;
1630       }
1631 
1632    }
1633 
1634    /* If the shader was not found in (whichever cache), call iris_compile_tcs
1635     * if either ish is NULL or the shader could not be found in the disk
1636     * cache.
1637     */
1638    if (added &&
1639        (tcs == NULL || !iris_disk_cache_retrieve(screen, uploader, tcs, shader,
1640                                                  &key, sizeof(key)))) {
1641       iris_compile_tcs(screen, ice->shaders.cache, uploader, &ice->dbg, tcs,
1642                        shader);
1643    }
1644 
1645    if (shader->compilation_failed)
1646       shader = NULL;
1647 
1648    if (old != shader) {
1649       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL],
1650                                     shader);
1651       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TCS |
1652                                 IRIS_STAGE_DIRTY_BINDINGS_TCS |
1653                                 IRIS_STAGE_DIRTY_CONSTANTS_TCS;
1654       shs->sysvals_need_upload = true;
1655 
1656       unsigned urb_entry_size = shader ?
1657          ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1658       check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_CTRL);
1659    }
1660 }
1661 
1662 /**
1663  * Compile a tessellation evaluation shader, and upload the assembly.
1664  */
1665 static void
iris_compile_tes(struct iris_screen * screen,struct u_upload_mgr * uploader,struct pipe_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1666 iris_compile_tes(struct iris_screen *screen,
1667                  struct u_upload_mgr *uploader,
1668                  struct pipe_debug_callback *dbg,
1669                  struct iris_uncompiled_shader *ish,
1670                  struct iris_compiled_shader *shader)
1671 {
1672    const struct brw_compiler *compiler = screen->compiler;
1673    void *mem_ctx = ralloc_context(NULL);
1674    struct brw_tes_prog_data *tes_prog_data =
1675       rzalloc(mem_ctx, struct brw_tes_prog_data);
1676    struct brw_vue_prog_data *vue_prog_data = &tes_prog_data->base;
1677    struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1678    enum brw_param_builtin *system_values;
1679    const struct intel_device_info *devinfo = &screen->devinfo;
1680    unsigned num_system_values;
1681    unsigned num_cbufs;
1682 
1683    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1684    const struct iris_tes_prog_key *const key = &shader->key.tes;
1685 
1686    if (key->vue.nr_userclip_plane_consts) {
1687       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1688       nir_lower_clip_vs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1689                         true, false, NULL);
1690       nir_lower_io_to_temporaries(nir, impl, true, false);
1691       nir_lower_global_vars_to_local(nir);
1692       nir_lower_vars_to_ssa(nir);
1693       nir_shader_gather_info(nir, impl);
1694    }
1695 
1696    iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1697                        &num_system_values, &num_cbufs);
1698 
1699    struct iris_binding_table bt;
1700    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1701                             num_system_values, num_cbufs);
1702 
1703    brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1704 
1705    struct brw_vue_map input_vue_map;
1706    brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
1707                             key->patch_inputs_read);
1708 
1709    struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(devinfo, key);
1710 
1711    char *error_str = NULL;
1712    const unsigned *program =
1713       brw_compile_tes(compiler, dbg, mem_ctx, &brw_key, &input_vue_map,
1714                       tes_prog_data, nir, -1, NULL, &error_str);
1715    if (program == NULL) {
1716       dbg_printf("Failed to compile evaluation shader: %s\n", error_str);
1717       ralloc_free(mem_ctx);
1718 
1719       shader->compilation_failed = true;
1720       util_queue_fence_signal(&shader->ready);
1721 
1722       return;
1723    }
1724 
1725    shader->compilation_failed = false;
1726 
1727    iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1728 
1729    uint32_t *so_decls =
1730       screen->vtbl.create_so_decl_list(&ish->stream_output,
1731                                     &vue_prog_data->vue_map);
1732 
1733    iris_finalize_program(shader, prog_data, so_decls, system_values,
1734                          num_system_values, 0, num_cbufs, &bt);
1735 
1736    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_TES,
1737                       sizeof(*key), key, program);
1738 
1739    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1740 
1741    ralloc_free(mem_ctx);
1742 }
1743 
1744 /**
1745  * Update the current tessellation evaluation shader variant.
1746  *
1747  * Fill out the key, look in the cache, compile and bind if needed.
1748  */
1749 static void
iris_update_compiled_tes(struct iris_context * ice)1750 iris_update_compiled_tes(struct iris_context *ice)
1751 {
1752    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1753    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1754    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
1755    struct iris_uncompiled_shader *ish =
1756       ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
1757 
1758    struct iris_tes_prog_key key = { KEY_ID(vue.base) };
1759    get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
1760    screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1761 
1762    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_TES];
1763    bool added;
1764    struct iris_compiled_shader *shader =
1765       find_or_add_variant(screen, ish, IRIS_CACHE_TES, &key, sizeof(key), &added);
1766 
1767    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1768                                           &key, sizeof(key))) {
1769       iris_compile_tes(screen, uploader, &ice->dbg, ish, shader);
1770    }
1771 
1772    if (shader->compilation_failed)
1773       shader = NULL;
1774 
1775    if (old != shader) {
1776       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL],
1777                                     shader);
1778       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_TES |
1779                                 IRIS_STAGE_DIRTY_BINDINGS_TES |
1780                                 IRIS_STAGE_DIRTY_CONSTANTS_TES;
1781       shs->sysvals_need_upload = true;
1782 
1783       unsigned urb_entry_size = shader ?
1784          ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1785       check_urb_size(ice, urb_entry_size, MESA_SHADER_TESS_EVAL);
1786    }
1787 
1788    /* TODO: Could compare and avoid flagging this. */
1789    const struct shader_info *tes_info = &ish->nir->info;
1790    if (BITSET_TEST(tes_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {
1791       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CONSTANTS_TES;
1792       ice->state.shaders[MESA_SHADER_TESS_EVAL].sysvals_need_upload = true;
1793    }
1794 }
1795 
1796 /**
1797  * Compile a geometry shader, and upload the assembly.
1798  */
1799 static void
iris_compile_gs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct pipe_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)1800 iris_compile_gs(struct iris_screen *screen,
1801                 struct u_upload_mgr *uploader,
1802                 struct pipe_debug_callback *dbg,
1803                 struct iris_uncompiled_shader *ish,
1804                 struct iris_compiled_shader *shader)
1805 {
1806    const struct brw_compiler *compiler = screen->compiler;
1807    const struct intel_device_info *devinfo = &screen->devinfo;
1808    void *mem_ctx = ralloc_context(NULL);
1809    struct brw_gs_prog_data *gs_prog_data =
1810       rzalloc(mem_ctx, struct brw_gs_prog_data);
1811    struct brw_vue_prog_data *vue_prog_data = &gs_prog_data->base;
1812    struct brw_stage_prog_data *prog_data = &vue_prog_data->base;
1813    enum brw_param_builtin *system_values;
1814    unsigned num_system_values;
1815    unsigned num_cbufs;
1816 
1817    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1818    const struct iris_gs_prog_key *const key = &shader->key.gs;
1819 
1820    if (key->vue.nr_userclip_plane_consts) {
1821       nir_function_impl *impl = nir_shader_get_entrypoint(nir);
1822       nir_lower_clip_gs(nir, (1 << key->vue.nr_userclip_plane_consts) - 1,
1823                         false, NULL);
1824       nir_lower_io_to_temporaries(nir, impl, true, false);
1825       nir_lower_global_vars_to_local(nir);
1826       nir_lower_vars_to_ssa(nir);
1827       nir_shader_gather_info(nir, impl);
1828    }
1829 
1830    iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1831                        &num_system_values, &num_cbufs);
1832 
1833    struct iris_binding_table bt;
1834    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
1835                             num_system_values, num_cbufs);
1836 
1837    brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1838 
1839    brw_compute_vue_map(devinfo,
1840                        &vue_prog_data->vue_map, nir->info.outputs_written,
1841                        nir->info.separate_shader, /* pos_slots */ 1);
1842 
1843    struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(devinfo, key);
1844 
1845    char *error_str = NULL;
1846    const unsigned *program =
1847       brw_compile_gs(compiler, dbg, mem_ctx, &brw_key, gs_prog_data,
1848                      nir, -1, NULL, &error_str);
1849    if (program == NULL) {
1850       dbg_printf("Failed to compile geometry shader: %s\n", error_str);
1851       ralloc_free(mem_ctx);
1852 
1853       shader->compilation_failed = true;
1854       util_queue_fence_signal(&shader->ready);
1855 
1856       return;
1857    }
1858 
1859    shader->compilation_failed = false;
1860 
1861    iris_debug_recompile(screen, dbg, ish, &brw_key.base);
1862 
1863    uint32_t *so_decls =
1864       screen->vtbl.create_so_decl_list(&ish->stream_output,
1865                                     &vue_prog_data->vue_map);
1866 
1867    iris_finalize_program(shader, prog_data, so_decls, system_values,
1868                          num_system_values, 0, num_cbufs, &bt);
1869 
1870    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_GS,
1871                       sizeof(*key), key, program);
1872 
1873    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
1874 
1875    ralloc_free(mem_ctx);
1876 }
1877 
1878 /**
1879  * Update the current geometry shader variant.
1880  *
1881  * Fill out the key, look in the cache, compile and bind if needed.
1882  */
1883 static void
iris_update_compiled_gs(struct iris_context * ice)1884 iris_update_compiled_gs(struct iris_context *ice)
1885 {
1886    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_GEOMETRY];
1887    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
1888    struct iris_uncompiled_shader *ish =
1889       ice->shaders.uncompiled[MESA_SHADER_GEOMETRY];
1890    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_GS];
1891    struct iris_compiled_shader *shader = NULL;
1892    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
1893 
1894    if (ish) {
1895       struct iris_gs_prog_key key = { KEY_ID(vue.base) };
1896       screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
1897 
1898       bool added;
1899 
1900       shader = find_or_add_variant(screen, ish, IRIS_CACHE_GS, &key,
1901                                    sizeof(key), &added);
1902 
1903       if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
1904                                              &key, sizeof(key))) {
1905          iris_compile_gs(screen, uploader, &ice->dbg, ish, shader);
1906       }
1907 
1908       if (shader->compilation_failed)
1909          shader = NULL;
1910    }
1911 
1912    if (old != shader) {
1913       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_GEOMETRY],
1914                                     shader);
1915       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_GS |
1916                                 IRIS_STAGE_DIRTY_BINDINGS_GS |
1917                                 IRIS_STAGE_DIRTY_CONSTANTS_GS;
1918       shs->sysvals_need_upload = true;
1919 
1920       unsigned urb_entry_size = shader ?
1921          ((struct brw_vue_prog_data *) shader->prog_data)->urb_entry_size : 0;
1922       check_urb_size(ice, urb_entry_size, MESA_SHADER_GEOMETRY);
1923    }
1924 }
1925 
1926 /**
1927  * Compile a fragment (pixel) shader, and upload the assembly.
1928  */
1929 static void
iris_compile_fs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct pipe_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader,struct brw_vue_map * vue_map)1930 iris_compile_fs(struct iris_screen *screen,
1931                 struct u_upload_mgr *uploader,
1932                 struct pipe_debug_callback *dbg,
1933                 struct iris_uncompiled_shader *ish,
1934                 struct iris_compiled_shader *shader,
1935                 struct brw_vue_map *vue_map)
1936 {
1937    const struct brw_compiler *compiler = screen->compiler;
1938    void *mem_ctx = ralloc_context(NULL);
1939    struct brw_wm_prog_data *fs_prog_data =
1940       rzalloc(mem_ctx, struct brw_wm_prog_data);
1941    struct brw_stage_prog_data *prog_data = &fs_prog_data->base;
1942    enum brw_param_builtin *system_values;
1943    const struct intel_device_info *devinfo = &screen->devinfo;
1944    unsigned num_system_values;
1945    unsigned num_cbufs;
1946 
1947    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
1948    const struct iris_fs_prog_key *const key = &shader->key.fs;
1949 
1950    prog_data->use_alt_mode = nir->info.is_arb_asm;
1951 
1952    iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
1953                        &num_system_values, &num_cbufs);
1954 
1955    /* Lower output variables to load_output intrinsics before setting up
1956     * binding tables, so iris_setup_binding_table can map any load_output
1957     * intrinsics to IRIS_SURFACE_GROUP_RENDER_TARGET_READ on Gfx8 for
1958     * non-coherent framebuffer fetches.
1959     */
1960    brw_nir_lower_fs_outputs(nir);
1961 
1962    /* On Gfx11+, shader RT write messages have a "Null Render Target" bit
1963     * and do not need a binding table entry with a null surface.  Earlier
1964     * generations need an entry for a null surface.
1965     */
1966    int null_rts = devinfo->ver < 11 ? 1 : 0;
1967 
1968    struct iris_binding_table bt;
1969    iris_setup_binding_table(devinfo, nir, &bt,
1970                             MAX2(key->nr_color_regions, null_rts),
1971                             num_system_values, num_cbufs);
1972 
1973    brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
1974 
1975    struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(devinfo, key);
1976 
1977    struct brw_compile_fs_params params = {
1978       .nir = nir,
1979       .key = &brw_key,
1980       .prog_data = fs_prog_data,
1981 
1982       .allow_spilling = true,
1983       .vue_map = vue_map,
1984 
1985       .log_data = dbg,
1986    };
1987 
1988    const unsigned *program = brw_compile_fs(compiler, mem_ctx, &params);
1989    if (program == NULL) {
1990       dbg_printf("Failed to compile fragment shader: %s\n", params.error_str);
1991       ralloc_free(mem_ctx);
1992 
1993       shader->compilation_failed = true;
1994       util_queue_fence_signal(&shader->ready);
1995 
1996       return;
1997    }
1998 
1999    shader->compilation_failed = false;
2000 
2001    iris_debug_recompile(screen, dbg, ish, &brw_key.base);
2002 
2003    iris_finalize_program(shader, prog_data, NULL, system_values,
2004                          num_system_values, 0, num_cbufs, &bt);
2005 
2006    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_FS,
2007                       sizeof(*key), key, program);
2008 
2009    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2010 
2011    ralloc_free(mem_ctx);
2012 }
2013 
2014 /**
2015  * Update the current fragment shader variant.
2016  *
2017  * Fill out the key, look in the cache, compile and bind if needed.
2018  */
2019 static void
iris_update_compiled_fs(struct iris_context * ice)2020 iris_update_compiled_fs(struct iris_context *ice)
2021 {
2022    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_FRAGMENT];
2023    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2024    struct iris_uncompiled_shader *ish =
2025       ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2026    struct iris_fs_prog_key key = { KEY_ID(base) };
2027    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2028    screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
2029 
2030    struct brw_vue_map *last_vue_map =
2031       &brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map;
2032 
2033    if (ish->nos & (1ull << IRIS_NOS_LAST_VUE_MAP))
2034       key.input_slots_valid = last_vue_map->slots_valid;
2035 
2036    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_FS];
2037    bool added;
2038    struct iris_compiled_shader *shader =
2039       find_or_add_variant(screen, ish, IRIS_CACHE_FS, &key,
2040                           sizeof(key), &added);
2041 
2042    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2043                                           &key, sizeof(key))) {
2044       iris_compile_fs(screen, uploader, &ice->dbg, ish, shader, last_vue_map);
2045    }
2046 
2047    if (shader->compilation_failed)
2048       shader = NULL;
2049 
2050    if (old != shader) {
2051       // XXX: only need to flag CLIP if barycentric has NONPERSPECTIVE
2052       // toggles.  might be able to avoid flagging SBE too.
2053       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_FRAGMENT],
2054                                     shader);
2055       ice->state.dirty |= IRIS_DIRTY_WM |
2056                           IRIS_DIRTY_CLIP |
2057                           IRIS_DIRTY_SBE;
2058       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_FS |
2059                                 IRIS_STAGE_DIRTY_BINDINGS_FS |
2060                                 IRIS_STAGE_DIRTY_CONSTANTS_FS;
2061       shs->sysvals_need_upload = true;
2062    }
2063 }
2064 
2065 /**
2066  * Update the last enabled stage's VUE map.
2067  *
2068  * When the shader feeding the rasterizer's output interface changes, we
2069  * need to re-emit various packets.
2070  */
2071 static void
update_last_vue_map(struct iris_context * ice,struct iris_compiled_shader * shader)2072 update_last_vue_map(struct iris_context *ice,
2073                     struct iris_compiled_shader *shader)
2074 {
2075    struct brw_vue_prog_data *vue_prog_data = (void *) shader->prog_data;
2076    struct brw_vue_map *vue_map = &vue_prog_data->vue_map;
2077    struct brw_vue_map *old_map = !ice->shaders.last_vue_shader ? NULL :
2078       &brw_vue_prog_data(ice->shaders.last_vue_shader->prog_data)->vue_map;
2079    const uint64_t changed_slots =
2080       (old_map ? old_map->slots_valid : 0ull) ^ vue_map->slots_valid;
2081 
2082    if (changed_slots & VARYING_BIT_VIEWPORT) {
2083       ice->state.num_viewports =
2084          (vue_map->slots_valid & VARYING_BIT_VIEWPORT) ? IRIS_MAX_VIEWPORTS : 1;
2085       ice->state.dirty |= IRIS_DIRTY_CLIP |
2086                           IRIS_DIRTY_SF_CL_VIEWPORT |
2087                           IRIS_DIRTY_CC_VIEWPORT |
2088                           IRIS_DIRTY_SCISSOR_RECT;
2089       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_FS |
2090          ice->state.stage_dirty_for_nos[IRIS_NOS_LAST_VUE_MAP];
2091    }
2092 
2093    if (changed_slots || (old_map && old_map->separate != vue_map->separate)) {
2094       ice->state.dirty |= IRIS_DIRTY_SBE;
2095    }
2096 
2097    iris_shader_variant_reference(&ice->shaders.last_vue_shader, shader);
2098 }
2099 
2100 static void
iris_update_pull_constant_descriptors(struct iris_context * ice,gl_shader_stage stage)2101 iris_update_pull_constant_descriptors(struct iris_context *ice,
2102                                       gl_shader_stage stage)
2103 {
2104    struct iris_compiled_shader *shader = ice->shaders.prog[stage];
2105 
2106    if (!shader || !shader->prog_data->has_ubo_pull)
2107       return;
2108 
2109    struct iris_shader_state *shs = &ice->state.shaders[stage];
2110    bool any_new_descriptors =
2111       shader->num_system_values > 0 && shs->sysvals_need_upload;
2112 
2113    unsigned bound_cbufs = shs->bound_cbufs;
2114 
2115    while (bound_cbufs) {
2116       const int i = u_bit_scan(&bound_cbufs);
2117       struct pipe_shader_buffer *cbuf = &shs->constbuf[i];
2118       struct iris_state_ref *surf_state = &shs->constbuf_surf_state[i];
2119       if (!surf_state->res && cbuf->buffer) {
2120          iris_upload_ubo_ssbo_surf_state(ice, cbuf, surf_state,
2121                                          ISL_SURF_USAGE_CONSTANT_BUFFER_BIT);
2122          any_new_descriptors = true;
2123       }
2124    }
2125 
2126    if (any_new_descriptors)
2127       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_BINDINGS_VS << stage;
2128 }
2129 
2130 /**
2131  * Update the current shader variants for the given state.
2132  *
2133  * This should be called on every draw call to ensure that the correct
2134  * shaders are bound.  It will also flag any dirty state triggered by
2135  * swapping out those shaders.
2136  */
2137 void
iris_update_compiled_shaders(struct iris_context * ice)2138 iris_update_compiled_shaders(struct iris_context *ice)
2139 {
2140    const uint64_t stage_dirty = ice->state.stage_dirty;
2141 
2142    if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_TCS |
2143                       IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2144        struct iris_uncompiled_shader *tes =
2145           ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
2146        if (tes) {
2147           iris_update_compiled_tcs(ice);
2148           iris_update_compiled_tes(ice);
2149        } else {
2150          iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_CTRL], NULL);
2151          iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_TESS_EVAL], NULL);
2152           ice->state.stage_dirty |=
2153              IRIS_STAGE_DIRTY_TCS | IRIS_STAGE_DIRTY_TES |
2154              IRIS_STAGE_DIRTY_BINDINGS_TCS | IRIS_STAGE_DIRTY_BINDINGS_TES |
2155              IRIS_STAGE_DIRTY_CONSTANTS_TCS | IRIS_STAGE_DIRTY_CONSTANTS_TES;
2156 
2157           if (ice->shaders.urb.constrained)
2158              ice->state.dirty |= IRIS_DIRTY_URB;
2159        }
2160    }
2161 
2162    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_VS)
2163       iris_update_compiled_vs(ice);
2164    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_GS)
2165       iris_update_compiled_gs(ice);
2166 
2167    if (stage_dirty & (IRIS_STAGE_DIRTY_UNCOMPILED_GS |
2168                       IRIS_STAGE_DIRTY_UNCOMPILED_TES)) {
2169       const struct iris_compiled_shader *gs =
2170          ice->shaders.prog[MESA_SHADER_GEOMETRY];
2171       const struct iris_compiled_shader *tes =
2172          ice->shaders.prog[MESA_SHADER_TESS_EVAL];
2173 
2174       bool points_or_lines = false;
2175 
2176       if (gs) {
2177          const struct brw_gs_prog_data *gs_prog_data = (void *) gs->prog_data;
2178          points_or_lines =
2179             gs_prog_data->output_topology == _3DPRIM_POINTLIST ||
2180             gs_prog_data->output_topology == _3DPRIM_LINESTRIP;
2181       } else if (tes) {
2182          const struct brw_tes_prog_data *tes_data = (void *) tes->prog_data;
2183          points_or_lines =
2184             tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_LINE ||
2185             tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;
2186       }
2187 
2188       if (ice->shaders.output_topology_is_points_or_lines != points_or_lines) {
2189          /* Outbound to XY Clip enables */
2190          ice->shaders.output_topology_is_points_or_lines = points_or_lines;
2191          ice->state.dirty |= IRIS_DIRTY_CLIP;
2192       }
2193    }
2194 
2195    gl_shader_stage last_stage = last_vue_stage(ice);
2196    struct iris_compiled_shader *shader = ice->shaders.prog[last_stage];
2197    struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[last_stage];
2198    update_last_vue_map(ice, shader);
2199    if (ice->state.streamout != shader->streamout) {
2200       ice->state.streamout = shader->streamout;
2201       ice->state.dirty |= IRIS_DIRTY_SO_DECL_LIST | IRIS_DIRTY_STREAMOUT;
2202    }
2203 
2204    if (ice->state.streamout_active) {
2205       for (int i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
2206          struct iris_stream_output_target *so =
2207             (void *) ice->state.so_target[i];
2208          if (so)
2209             so->stride = ish->stream_output.stride[i] * sizeof(uint32_t);
2210       }
2211    }
2212 
2213    if (stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_FS)
2214       iris_update_compiled_fs(ice);
2215 
2216    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) {
2217       if (ice->state.stage_dirty & (IRIS_STAGE_DIRTY_CONSTANTS_VS << i))
2218          iris_update_pull_constant_descriptors(ice, i);
2219    }
2220 }
2221 
2222 static void
iris_compile_cs(struct iris_screen * screen,struct u_upload_mgr * uploader,struct pipe_debug_callback * dbg,struct iris_uncompiled_shader * ish,struct iris_compiled_shader * shader)2223 iris_compile_cs(struct iris_screen *screen,
2224                 struct u_upload_mgr *uploader,
2225                 struct pipe_debug_callback *dbg,
2226                 struct iris_uncompiled_shader *ish,
2227                 struct iris_compiled_shader *shader)
2228 {
2229    const struct brw_compiler *compiler = screen->compiler;
2230    void *mem_ctx = ralloc_context(NULL);
2231    struct brw_cs_prog_data *cs_prog_data =
2232       rzalloc(mem_ctx, struct brw_cs_prog_data);
2233    struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
2234    enum brw_param_builtin *system_values;
2235    const struct intel_device_info *devinfo = &screen->devinfo;
2236    unsigned num_system_values;
2237    unsigned num_cbufs;
2238 
2239    nir_shader *nir = nir_shader_clone(mem_ctx, ish->nir);
2240    const struct iris_cs_prog_key *const key = &shader->key.cs;
2241 
2242    NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);
2243 
2244    iris_setup_uniforms(compiler, mem_ctx, nir, prog_data,
2245                        ish->kernel_input_size,
2246                        &system_values, &num_system_values, &num_cbufs);
2247 
2248    struct iris_binding_table bt;
2249    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
2250                             num_system_values, num_cbufs);
2251 
2252    struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(devinfo, key);
2253 
2254    struct brw_compile_cs_params params = {
2255       .nir = nir,
2256       .key = &brw_key,
2257       .prog_data = cs_prog_data,
2258       .log_data = dbg,
2259    };
2260 
2261    const unsigned *program = brw_compile_cs(compiler, mem_ctx, &params);
2262    if (program == NULL) {
2263       dbg_printf("Failed to compile compute shader: %s\n", params.error_str);
2264 
2265       shader->compilation_failed = true;
2266       util_queue_fence_signal(&shader->ready);
2267 
2268       return;
2269    }
2270 
2271    shader->compilation_failed = false;
2272 
2273    iris_debug_recompile(screen, dbg, ish, &brw_key.base);
2274 
2275    iris_finalize_program(shader, prog_data, NULL, system_values,
2276                          num_system_values, ish->kernel_input_size, num_cbufs,
2277                          &bt);
2278 
2279    iris_upload_shader(screen, ish, shader, NULL, uploader, IRIS_CACHE_CS,
2280                       sizeof(*key), key, program);
2281 
2282    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
2283 
2284    ralloc_free(mem_ctx);
2285 }
2286 
2287 static void
iris_update_compiled_cs(struct iris_context * ice)2288 iris_update_compiled_cs(struct iris_context *ice)
2289 {
2290    struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE];
2291    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
2292    struct iris_uncompiled_shader *ish =
2293       ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
2294 
2295    struct iris_cs_prog_key key = { KEY_ID(base) };
2296    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2297    screen->vtbl.populate_cs_key(ice, &key);
2298 
2299    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
2300    bool added;
2301    struct iris_compiled_shader *shader =
2302       find_or_add_variant(screen, ish, IRIS_CACHE_CS, &key,
2303                           sizeof(key), &added);
2304 
2305    if (added && !iris_disk_cache_retrieve(screen, uploader, ish, shader,
2306                                           &key, sizeof(key))) {
2307       iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
2308    }
2309 
2310    if (shader->compilation_failed)
2311       shader = NULL;
2312 
2313    if (old != shader) {
2314       iris_shader_variant_reference(&ice->shaders.prog[MESA_SHADER_COMPUTE],
2315                                     shader);
2316       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_CS |
2317                                 IRIS_STAGE_DIRTY_BINDINGS_CS |
2318                                 IRIS_STAGE_DIRTY_CONSTANTS_CS;
2319       shs->sysvals_need_upload = true;
2320    }
2321 }
2322 
2323 void
iris_update_compiled_compute_shader(struct iris_context * ice)2324 iris_update_compiled_compute_shader(struct iris_context *ice)
2325 {
2326    if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_UNCOMPILED_CS)
2327       iris_update_compiled_cs(ice);
2328 
2329    if (ice->state.stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS)
2330       iris_update_pull_constant_descriptors(ice, MESA_SHADER_COMPUTE);
2331 }
2332 
2333 void
iris_fill_cs_push_const_buffer(struct brw_cs_prog_data * cs_prog_data,unsigned threads,uint32_t * dst)2334 iris_fill_cs_push_const_buffer(struct brw_cs_prog_data *cs_prog_data,
2335                                unsigned threads,
2336                                uint32_t *dst)
2337 {
2338    assert(brw_cs_push_const_total_size(cs_prog_data, threads) > 0);
2339    assert(cs_prog_data->push.cross_thread.size == 0);
2340    assert(cs_prog_data->push.per_thread.dwords == 1);
2341    assert(cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID);
2342    for (unsigned t = 0; t < threads; t++)
2343       dst[8 * t] = t;
2344 }
2345 
2346 /**
2347  * Allocate scratch BOs as needed for the given per-thread size and stage.
2348  */
2349 struct iris_bo *
iris_get_scratch_space(struct iris_context * ice,unsigned per_thread_scratch,gl_shader_stage stage)2350 iris_get_scratch_space(struct iris_context *ice,
2351                        unsigned per_thread_scratch,
2352                        gl_shader_stage stage)
2353 {
2354    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2355    struct iris_bufmgr *bufmgr = screen->bufmgr;
2356    const struct intel_device_info *devinfo = &screen->devinfo;
2357 
2358    unsigned encoded_size = ffs(per_thread_scratch) - 11;
2359    assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_bos));
2360    assert(per_thread_scratch == 1 << (encoded_size + 10));
2361 
2362    /* On GFX version 12.5, scratch access changed to a surface-based model.
2363     * Instead of each shader type having its own layout based on IDs passed
2364     * from the relevant fixed-function unit, all scratch access is based on
2365     * thread IDs like it always has been for compute.
2366     */
2367    if (devinfo->verx10 >= 125)
2368       stage = MESA_SHADER_COMPUTE;
2369 
2370    struct iris_bo **bop = &ice->shaders.scratch_bos[encoded_size][stage];
2371 
2372    if (!*bop) {
2373       assert(stage < ARRAY_SIZE(devinfo->max_scratch_ids));
2374       uint32_t size = per_thread_scratch * devinfo->max_scratch_ids[stage];
2375       *bop = iris_bo_alloc(bufmgr, "scratch", size, 1, IRIS_MEMZONE_SHADER, 0);
2376    }
2377 
2378    return *bop;
2379 }
2380 
2381 const struct iris_state_ref *
iris_get_scratch_surf(struct iris_context * ice,unsigned per_thread_scratch)2382 iris_get_scratch_surf(struct iris_context *ice,
2383                       unsigned per_thread_scratch)
2384 {
2385    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
2386    ASSERTED const struct intel_device_info *devinfo = &screen->devinfo;
2387 
2388    assert(devinfo->verx10 >= 125);
2389 
2390    unsigned encoded_size = ffs(per_thread_scratch) - 11;
2391    assert(encoded_size < ARRAY_SIZE(ice->shaders.scratch_surfs));
2392    assert(per_thread_scratch == 1 << (encoded_size + 10));
2393 
2394    struct iris_state_ref *ref = &ice->shaders.scratch_surfs[encoded_size];
2395 
2396    if (ref->res)
2397       return ref;
2398 
2399    struct iris_bo *scratch_bo =
2400       iris_get_scratch_space(ice, per_thread_scratch, MESA_SHADER_COMPUTE);
2401 
2402    void *map = upload_state(ice->state.bindless_uploader, ref,
2403                             screen->isl_dev.ss.size, 64);
2404 
2405    isl_buffer_fill_state(&screen->isl_dev, map,
2406                          .address = scratch_bo->address,
2407                          .size_B = scratch_bo->size,
2408                          .format = ISL_FORMAT_RAW,
2409                          .swizzle = ISL_SWIZZLE_IDENTITY,
2410                          .mocs = iris_mocs(scratch_bo, &screen->isl_dev, 0),
2411                          .stride_B = per_thread_scratch,
2412                          .is_scratch = true);
2413 
2414    return ref;
2415 }
2416 
2417 /* ------------------------------------------------------------------- */
2418 
2419 /**
2420  * The pipe->create_[stage]_state() driver hooks.
2421  *
2422  * Performs basic NIR preprocessing, records any state dependencies, and
2423  * returns an iris_uncompiled_shader as the Gallium CSO.
2424  *
2425  * Actual shader compilation to assembly happens later, at first use.
2426  */
2427 static void *
iris_create_uncompiled_shader(struct iris_screen * screen,nir_shader * nir,const struct pipe_stream_output_info * so_info)2428 iris_create_uncompiled_shader(struct iris_screen *screen,
2429                               nir_shader *nir,
2430                               const struct pipe_stream_output_info *so_info)
2431 {
2432    struct iris_uncompiled_shader *ish =
2433       calloc(1, sizeof(struct iris_uncompiled_shader));
2434    if (!ish)
2435       return NULL;
2436 
2437    pipe_reference_init(&ish->ref, 1);
2438    list_inithead(&ish->variants);
2439    simple_mtx_init(&ish->lock, mtx_plain);
2440 
2441    ish->uses_atomic_load_store = iris_uses_image_atomic(nir);
2442 
2443    ish->program_id = get_new_program_id(screen);
2444    ish->nir = nir;
2445    if (so_info) {
2446       memcpy(&ish->stream_output, so_info, sizeof(*so_info));
2447       update_so_info(&ish->stream_output, nir->info.outputs_written);
2448    }
2449 
2450    if (screen->disk_cache) {
2451       /* Serialize the NIR to a binary blob that we can hash for the disk
2452        * cache.  Drop unnecessary information (like variable names)
2453        * so the serialized NIR is smaller, and also to let us detect more
2454        * isomorphic shaders when hashing, increasing cache hits.
2455        */
2456       struct blob blob;
2457       blob_init(&blob);
2458       nir_serialize(&blob, nir, true);
2459       _mesa_sha1_compute(blob.data, blob.size, ish->nir_sha1);
2460       blob_finish(&blob);
2461    }
2462 
2463    return ish;
2464 }
2465 
2466 static void *
iris_create_compute_state(struct pipe_context * ctx,const struct pipe_compute_state * state)2467 iris_create_compute_state(struct pipe_context *ctx,
2468                           const struct pipe_compute_state *state)
2469 {
2470    struct iris_context *ice = (void *) ctx;
2471    struct iris_screen *screen = (void *) ctx->screen;
2472    struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
2473    const nir_shader_compiler_options *options =
2474       screen->compiler->glsl_compiler_options[MESA_SHADER_COMPUTE].NirOptions;
2475 
2476    nir_shader *nir;
2477    switch (state->ir_type) {
2478    case PIPE_SHADER_IR_NIR:
2479       nir = (void *)state->prog;
2480       break;
2481 
2482    case PIPE_SHADER_IR_NIR_SERIALIZED: {
2483       struct blob_reader reader;
2484       const struct pipe_binary_program_header *hdr = state->prog;
2485       blob_reader_init(&reader, hdr->blob, hdr->num_bytes);
2486       nir = nir_deserialize(NULL, options, &reader);
2487       break;
2488    }
2489 
2490    default:
2491       unreachable("Unsupported IR");
2492    }
2493 
2494    /* Most of iris doesn't really care about the difference between compute
2495     * shaders and kernels.  We also tend to hard-code COMPUTE everywhere so
2496     * it's way easier if we just normalize to COMPUTE here.
2497     */
2498    assert(nir->info.stage == MESA_SHADER_COMPUTE ||
2499           nir->info.stage == MESA_SHADER_KERNEL);
2500    nir->info.stage = MESA_SHADER_COMPUTE;
2501 
2502    struct iris_uncompiled_shader *ish =
2503       iris_create_uncompiled_shader(screen, nir, NULL);
2504    ish->kernel_input_size = state->req_input_mem;
2505    ish->kernel_shared_size = state->req_local_mem;
2506 
2507    // XXX: disallow more than 64KB of shared variables
2508 
2509    if (screen->precompile) {
2510       struct iris_cs_prog_key key = { KEY_ID(base) };
2511 
2512       struct iris_compiled_shader *shader =
2513          iris_create_shader_variant(screen, NULL, IRIS_CACHE_CS,
2514                                     sizeof(key), &key);
2515 
2516       /* Append our new variant to the shader's variant list. */
2517       list_addtail(&shader->link, &ish->variants);
2518 
2519       if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
2520                                     &key, sizeof(key))) {
2521          iris_compile_cs(screen, uploader, &ice->dbg, ish, shader);
2522       }
2523    }
2524 
2525    return ish;
2526 }
2527 
2528 static void
iris_compile_shader(void * _job,UNUSED void * _gdata,UNUSED int thread_index)2529 iris_compile_shader(void *_job, UNUSED void *_gdata, UNUSED int thread_index)
2530 {
2531    const struct iris_threaded_compile_job *job =
2532       (struct iris_threaded_compile_job *) _job;
2533 
2534    struct iris_screen *screen = job->screen;
2535    struct u_upload_mgr *uploader = job->uploader;
2536    struct pipe_debug_callback *dbg = job->dbg;
2537    struct iris_uncompiled_shader *ish = job->ish;
2538    struct iris_compiled_shader *shader = job->shader;
2539 
2540    switch (ish->nir->info.stage) {
2541    case MESA_SHADER_VERTEX:
2542       iris_compile_vs(screen, uploader, dbg, ish, shader);
2543       break;
2544    case MESA_SHADER_TESS_CTRL:
2545       iris_compile_tcs(screen, NULL, uploader, dbg, ish, shader);
2546       break;
2547    case MESA_SHADER_TESS_EVAL:
2548       iris_compile_tes(screen, uploader, dbg, ish, shader);
2549       break;
2550    case MESA_SHADER_GEOMETRY:
2551       iris_compile_gs(screen, uploader, dbg, ish, shader);
2552       break;
2553    case MESA_SHADER_FRAGMENT:
2554       iris_compile_fs(screen, uploader, dbg, ish, shader, NULL);
2555       break;
2556 
2557    default:
2558       unreachable("Invalid shader stage.");
2559    }
2560 }
2561 
2562 static void *
iris_create_shader_state(struct pipe_context * ctx,const struct pipe_shader_state * state)2563 iris_create_shader_state(struct pipe_context *ctx,
2564                          const struct pipe_shader_state *state)
2565 {
2566    struct iris_context *ice = (void *) ctx;
2567    struct iris_screen *screen = (void *) ctx->screen;
2568    struct nir_shader *nir;
2569 
2570    if (state->type == PIPE_SHADER_IR_TGSI)
2571       nir = tgsi_to_nir(state->tokens, ctx->screen, false);
2572    else
2573       nir = state->ir.nir;
2574 
2575    const struct shader_info *const info = &nir->info;
2576    struct iris_uncompiled_shader *ish =
2577       iris_create_uncompiled_shader(screen, nir, &state->stream_output);
2578 
2579    union iris_any_prog_key key;
2580    unsigned key_size = 0;
2581 
2582    memset(&key, 0, sizeof(key));
2583 
2584    switch (info->stage) {
2585    case MESA_SHADER_VERTEX:
2586       /* User clip planes */
2587       if (info->clip_distance_array_size == 0)
2588          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2589 
2590       key.vs = (struct iris_vs_prog_key) { KEY_ID(vue.base) };
2591       key_size = sizeof(key.vs);
2592       break;
2593 
2594    case MESA_SHADER_TESS_CTRL: {
2595       const unsigned _GL_TRIANGLES = 0x0004;
2596 
2597       key.tcs = (struct iris_tcs_prog_key) {
2598          KEY_ID(vue.base),
2599          // XXX: make sure the linker fills this out from the TES...
2600          .tes_primitive_mode =
2601          info->tess.primitive_mode ? info->tess.primitive_mode
2602                                    : _GL_TRIANGLES,
2603          .outputs_written = info->outputs_written,
2604          .patch_outputs_written = info->patch_outputs_written,
2605       };
2606 
2607       /* 8_PATCH mode needs the key to contain the input patch dimensionality.
2608        * We don't have that information, so we randomly guess that the input
2609        * and output patches are the same size.  This is a bad guess, but we
2610        * can't do much better.
2611        */
2612       if (screen->compiler->use_tcs_8_patch)
2613          key.tcs.input_vertices = info->tess.tcs_vertices_out;
2614 
2615       key_size = sizeof(key.tcs);
2616       break;
2617    }
2618 
2619    case MESA_SHADER_TESS_EVAL:
2620       /* User clip planes */
2621       if (info->clip_distance_array_size == 0)
2622          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2623 
2624       key.tes = (struct iris_tes_prog_key) {
2625          KEY_ID(vue.base),
2626          // XXX: not ideal, need TCS output/TES input unification
2627          .inputs_read = info->inputs_read,
2628          .patch_inputs_read = info->patch_inputs_read,
2629       };
2630 
2631       key_size = sizeof(key.tes);
2632       break;
2633 
2634    case MESA_SHADER_GEOMETRY:
2635       /* User clip planes */
2636       if (info->clip_distance_array_size == 0)
2637          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
2638 
2639       key.gs = (struct iris_gs_prog_key) { KEY_ID(vue.base) };
2640       key_size = sizeof(key.gs);
2641       break;
2642 
2643    case MESA_SHADER_FRAGMENT:
2644       ish->nos |= (1ull << IRIS_NOS_FRAMEBUFFER) |
2645                   (1ull << IRIS_NOS_DEPTH_STENCIL_ALPHA) |
2646                   (1ull << IRIS_NOS_RASTERIZER) |
2647                   (1ull << IRIS_NOS_BLEND);
2648 
2649       /* The program key needs the VUE map if there are > 16 inputs */
2650       if (util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) > 16) {
2651          ish->nos |= (1ull << IRIS_NOS_LAST_VUE_MAP);
2652       }
2653 
2654       const uint64_t color_outputs = info->outputs_written &
2655          ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
2656            BITFIELD64_BIT(FRAG_RESULT_STENCIL) |
2657            BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK));
2658 
2659       bool can_rearrange_varyings =
2660          util_bitcount64(info->inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16;
2661 
2662       const struct intel_device_info *devinfo = &screen->devinfo;
2663 
2664       key.fs = (struct iris_fs_prog_key) {
2665          KEY_ID(base),
2666          .nr_color_regions = util_bitcount(color_outputs),
2667          .coherent_fb_fetch = devinfo->ver >= 9,
2668          .input_slots_valid =
2669             can_rearrange_varyings ? 0 : info->inputs_read | VARYING_BIT_POS,
2670       };
2671 
2672       key_size = sizeof(key.fs);
2673       break;
2674 
2675    default:
2676       unreachable("Invalid shader stage.");
2677    }
2678 
2679    if (screen->precompile) {
2680       struct u_upload_mgr *uploader = ice->shaders.uploader_unsync;
2681 
2682       struct iris_compiled_shader *shader =
2683          iris_create_shader_variant(screen, NULL,
2684                                     (enum iris_program_cache_id) info->stage,
2685                                     key_size, &key);
2686 
2687       /* Append our new variant to the shader's variant list. */
2688       list_addtail(&shader->link, &ish->variants);
2689 
2690       if (!iris_disk_cache_retrieve(screen, uploader, ish, shader,
2691                                     &key, key_size)) {
2692          assert(!util_queue_fence_is_signalled(&shader->ready));
2693 
2694          struct iris_threaded_compile_job *job = calloc(1, sizeof(*job));
2695 
2696          job->screen = screen;
2697          job->uploader = uploader;
2698          job->ish = ish;
2699          job->shader = shader;
2700 
2701          iris_schedule_compile(screen, &ish->ready, &ice->dbg, job,
2702                                iris_compile_shader);
2703       }
2704    }
2705 
2706    return ish;
2707 }
2708 
2709 /**
2710  * Called when the refcount on the iris_uncompiled_shader reaches 0.
2711  *
2712  * Frees the iris_uncompiled_shader.
2713  *
2714  * \sa iris_delete_shader_state
2715  */
2716 void
iris_destroy_shader_state(struct pipe_context * ctx,void * state)2717 iris_destroy_shader_state(struct pipe_context *ctx, void *state)
2718 {
2719    struct iris_uncompiled_shader *ish = state;
2720 
2721    /* No need to take ish->lock; we hold the last reference to ish */
2722    list_for_each_entry_safe(struct iris_compiled_shader, shader,
2723                             &ish->variants, link) {
2724       list_del(&shader->link);
2725 
2726       iris_shader_variant_reference(&shader, NULL);
2727    }
2728 
2729    simple_mtx_destroy(&ish->lock);
2730    util_queue_fence_destroy(&ish->ready);
2731 
2732    ralloc_free(ish->nir);
2733    free(ish);
2734 }
2735 
2736 /**
2737  * The pipe->delete_[stage]_state() driver hooks.
2738  *
2739  * \sa iris_destroy_shader_state
2740  */
2741 static void
iris_delete_shader_state(struct pipe_context * ctx,void * state)2742 iris_delete_shader_state(struct pipe_context *ctx, void *state)
2743 {
2744    struct iris_uncompiled_shader *ish = state;
2745    struct iris_context *ice = (void *) ctx;
2746 
2747    const gl_shader_stage stage = ish->nir->info.stage;
2748 
2749    if (ice->shaders.uncompiled[stage] == ish) {
2750       ice->shaders.uncompiled[stage] = NULL;
2751       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
2752    }
2753 
2754    if (pipe_reference(&ish->ref, NULL))
2755       iris_destroy_shader_state(ctx, state);
2756 }
2757 
2758 /**
2759  * The pipe->bind_[stage]_state() driver hook.
2760  *
2761  * Binds an uncompiled shader as the current one for a particular stage.
2762  * Updates dirty tracking to account for the shader's NOS.
2763  */
2764 static void
bind_shader_state(struct iris_context * ice,struct iris_uncompiled_shader * ish,gl_shader_stage stage)2765 bind_shader_state(struct iris_context *ice,
2766                   struct iris_uncompiled_shader *ish,
2767                   gl_shader_stage stage)
2768 {
2769    uint64_t stage_dirty_bit = IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
2770    const uint64_t nos = ish ? ish->nos : 0;
2771 
2772    const struct shader_info *old_info = iris_get_shader_info(ice, stage);
2773    const struct shader_info *new_info = ish ? &ish->nir->info : NULL;
2774 
2775    if ((old_info ? BITSET_LAST_BIT(old_info->textures_used) : 0) !=
2776        (new_info ? BITSET_LAST_BIT(new_info->textures_used) : 0)) {
2777       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_SAMPLER_STATES_VS << stage;
2778    }
2779 
2780    ice->shaders.uncompiled[stage] = ish;
2781    ice->state.stage_dirty |= stage_dirty_bit;
2782 
2783    /* Record that CSOs need to mark IRIS_DIRTY_UNCOMPILED_XS when they change
2784     * (or that they no longer need to do so).
2785     */
2786    for (int i = 0; i < IRIS_NOS_COUNT; i++) {
2787       if (nos & (1 << i))
2788          ice->state.stage_dirty_for_nos[i] |= stage_dirty_bit;
2789       else
2790          ice->state.stage_dirty_for_nos[i] &= ~stage_dirty_bit;
2791    }
2792 }
2793 
2794 static void
iris_bind_vs_state(struct pipe_context * ctx,void * state)2795 iris_bind_vs_state(struct pipe_context *ctx, void *state)
2796 {
2797    struct iris_context *ice = (struct iris_context *)ctx;
2798    struct iris_uncompiled_shader *ish = state;
2799 
2800    if (ish) {
2801       const struct shader_info *info = &ish->nir->info;
2802       if (ice->state.window_space_position != info->vs.window_space_position) {
2803          ice->state.window_space_position = info->vs.window_space_position;
2804 
2805          ice->state.dirty |= IRIS_DIRTY_CLIP |
2806                              IRIS_DIRTY_RASTER |
2807                              IRIS_DIRTY_CC_VIEWPORT;
2808       }
2809 
2810       const bool uses_draw_params =
2811          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_FIRST_VERTEX) ||
2812          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_BASE_INSTANCE);
2813       const bool uses_derived_draw_params =
2814          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_DRAW_ID) ||
2815          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_IS_INDEXED_DRAW);
2816       const bool needs_sgvs_element = uses_draw_params ||
2817          BITSET_TEST(info->system_values_read, SYSTEM_VALUE_INSTANCE_ID) ||
2818          BITSET_TEST(info->system_values_read,
2819                      SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
2820 
2821       if (ice->state.vs_uses_draw_params != uses_draw_params ||
2822           ice->state.vs_uses_derived_draw_params != uses_derived_draw_params ||
2823           ice->state.vs_needs_edge_flag != info->vs.needs_edge_flag) {
2824          ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS |
2825                              IRIS_DIRTY_VERTEX_ELEMENTS;
2826       }
2827 
2828       ice->state.vs_uses_draw_params = uses_draw_params;
2829       ice->state.vs_uses_derived_draw_params = uses_derived_draw_params;
2830       ice->state.vs_needs_sgvs_element = needs_sgvs_element;
2831       ice->state.vs_needs_edge_flag = info->vs.needs_edge_flag;
2832    }
2833 
2834    bind_shader_state((void *) ctx, state, MESA_SHADER_VERTEX);
2835 }
2836 
2837 static void
iris_bind_tcs_state(struct pipe_context * ctx,void * state)2838 iris_bind_tcs_state(struct pipe_context *ctx, void *state)
2839 {
2840    bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_CTRL);
2841 }
2842 
2843 static void
iris_bind_tes_state(struct pipe_context * ctx,void * state)2844 iris_bind_tes_state(struct pipe_context *ctx, void *state)
2845 {
2846    struct iris_context *ice = (struct iris_context *)ctx;
2847 
2848    /* Enabling/disabling optional stages requires a URB reconfiguration. */
2849    if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL])
2850       ice->state.dirty |= IRIS_DIRTY_URB;
2851 
2852    bind_shader_state((void *) ctx, state, MESA_SHADER_TESS_EVAL);
2853 }
2854 
2855 static void
iris_bind_gs_state(struct pipe_context * ctx,void * state)2856 iris_bind_gs_state(struct pipe_context *ctx, void *state)
2857 {
2858    struct iris_context *ice = (struct iris_context *)ctx;
2859 
2860    /* Enabling/disabling optional stages requires a URB reconfiguration. */
2861    if (!!state != !!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY])
2862       ice->state.dirty |= IRIS_DIRTY_URB;
2863 
2864    bind_shader_state((void *) ctx, state, MESA_SHADER_GEOMETRY);
2865 }
2866 
2867 static void
iris_bind_fs_state(struct pipe_context * ctx,void * state)2868 iris_bind_fs_state(struct pipe_context *ctx, void *state)
2869 {
2870    struct iris_context *ice = (struct iris_context *) ctx;
2871    struct iris_screen *screen = (struct iris_screen *) ctx->screen;
2872    const struct intel_device_info *devinfo = &screen->devinfo;
2873    struct iris_uncompiled_shader *old_ish =
2874       ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
2875    struct iris_uncompiled_shader *new_ish = state;
2876 
2877    const unsigned color_bits =
2878       BITFIELD64_BIT(FRAG_RESULT_COLOR) |
2879       BITFIELD64_RANGE(FRAG_RESULT_DATA0, BRW_MAX_DRAW_BUFFERS);
2880 
2881    /* Fragment shader outputs influence HasWriteableRT */
2882    if (!old_ish || !new_ish ||
2883        (old_ish->nir->info.outputs_written & color_bits) !=
2884        (new_ish->nir->info.outputs_written & color_bits))
2885       ice->state.dirty |= IRIS_DIRTY_PS_BLEND;
2886 
2887    if (devinfo->ver == 8)
2888       ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
2889 
2890    bind_shader_state((void *) ctx, state, MESA_SHADER_FRAGMENT);
2891 }
2892 
2893 static void
iris_bind_cs_state(struct pipe_context * ctx,void * state)2894 iris_bind_cs_state(struct pipe_context *ctx, void *state)
2895 {
2896    bind_shader_state((void *) ctx, state, MESA_SHADER_COMPUTE);
2897 }
2898 
2899 static char *
iris_finalize_nir(struct pipe_screen * _screen,void * nirptr)2900 iris_finalize_nir(struct pipe_screen *_screen, void *nirptr)
2901 {
2902    struct iris_screen *screen = (struct iris_screen *)_screen;
2903    struct nir_shader *nir = (struct nir_shader *) nirptr;
2904    const struct intel_device_info *devinfo = &screen->devinfo;
2905 
2906    NIR_PASS_V(nir, iris_fix_edge_flags);
2907 
2908    brw_preprocess_nir(screen->compiler, nir, NULL);
2909 
2910    NIR_PASS_V(nir, brw_nir_lower_storage_image, devinfo);
2911    NIR_PASS_V(nir, iris_lower_storage_image_derefs);
2912 
2913    nir_sweep(nir);
2914 
2915    return NULL;
2916 }
2917 
2918 static void
iris_set_max_shader_compiler_threads(struct pipe_screen * pscreen,unsigned max_threads)2919 iris_set_max_shader_compiler_threads(struct pipe_screen *pscreen,
2920                                      unsigned max_threads)
2921 {
2922    struct iris_screen *screen = (struct iris_screen *) pscreen;
2923    util_queue_adjust_num_threads(&screen->shader_compiler_queue, max_threads);
2924 }
2925 
2926 static bool
iris_is_parallel_shader_compilation_finished(struct pipe_screen * pscreen,void * v_shader,enum pipe_shader_type p_stage)2927 iris_is_parallel_shader_compilation_finished(struct pipe_screen *pscreen,
2928                                              void *v_shader,
2929                                              enum pipe_shader_type p_stage)
2930 {
2931    struct iris_screen *screen = (struct iris_screen *) pscreen;
2932 
2933    /* Threaded compilation is only used for the precompile.  If precompile is
2934     * disabled, threaded compilation is "done."
2935     */
2936    if (!screen->precompile)
2937       return true;
2938 
2939    struct iris_uncompiled_shader *ish = v_shader;
2940 
2941    /* When precompile is enabled, the first entry is the precompile variant.
2942     * Check the ready fence of the precompile variant.
2943     */
2944    struct iris_compiled_shader *first =
2945       list_first_entry(&ish->variants, struct iris_compiled_shader, link);
2946 
2947    return util_queue_fence_is_signalled(&first->ready);
2948 }
2949 
2950 void
iris_init_screen_program_functions(struct pipe_screen * pscreen)2951 iris_init_screen_program_functions(struct pipe_screen *pscreen)
2952 {
2953    pscreen->is_parallel_shader_compilation_finished =
2954       iris_is_parallel_shader_compilation_finished;
2955    pscreen->set_max_shader_compiler_threads =
2956       iris_set_max_shader_compiler_threads;
2957    pscreen->finalize_nir = iris_finalize_nir;
2958 }
2959 
2960 void
iris_init_program_functions(struct pipe_context * ctx)2961 iris_init_program_functions(struct pipe_context *ctx)
2962 {
2963    ctx->create_vs_state  = iris_create_shader_state;
2964    ctx->create_tcs_state = iris_create_shader_state;
2965    ctx->create_tes_state = iris_create_shader_state;
2966    ctx->create_gs_state  = iris_create_shader_state;
2967    ctx->create_fs_state  = iris_create_shader_state;
2968    ctx->create_compute_state = iris_create_compute_state;
2969 
2970    ctx->delete_vs_state  = iris_delete_shader_state;
2971    ctx->delete_tcs_state = iris_delete_shader_state;
2972    ctx->delete_tes_state = iris_delete_shader_state;
2973    ctx->delete_gs_state  = iris_delete_shader_state;
2974    ctx->delete_fs_state  = iris_delete_shader_state;
2975    ctx->delete_compute_state = iris_delete_shader_state;
2976 
2977    ctx->bind_vs_state  = iris_bind_vs_state;
2978    ctx->bind_tcs_state = iris_bind_tcs_state;
2979    ctx->bind_tes_state = iris_bind_tes_state;
2980    ctx->bind_gs_state  = iris_bind_gs_state;
2981    ctx->bind_fs_state  = iris_bind_fs_state;
2982    ctx->bind_compute_state = iris_bind_cs_state;
2983 }
2984