1 /*
2  * Copyright 2018 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "zink_context.h"
25 #include "zink_compiler.h"
26 #include "zink_program.h"
27 #include "zink_screen.h"
28 #include "nir_to_spirv/nir_to_spirv.h"
29 
30 #include "pipe/p_state.h"
31 
32 #include "nir.h"
33 #include "compiler/nir/nir_builder.h"
34 
35 #include "nir/tgsi_to_nir.h"
36 #include "tgsi/tgsi_dump.h"
37 #include "tgsi/tgsi_from_mesa.h"
38 
39 #include "util/u_memory.h"
40 
41 static void
create_vs_pushconst(nir_shader * nir)42 create_vs_pushconst(nir_shader *nir)
43 {
44    nir_variable *vs_pushconst;
45    /* create compatible layout for the ntv push constant loader */
46    struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 2);
47    fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
48    fields[0].name = ralloc_asprintf(nir, "draw_mode_is_indexed");
49    fields[0].offset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed);
50    fields[1].type = glsl_array_type(glsl_uint_type(), 1, 0);
51    fields[1].name = ralloc_asprintf(nir, "draw_id");
52    fields[1].offset = offsetof(struct zink_gfx_push_constant, draw_id);
53    vs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
54                                                  glsl_struct_type(fields, 2, "struct", false), "vs_pushconst");
55    vs_pushconst->data.location = INT_MAX; //doesn't really matter
56 }
57 
58 static void
create_cs_pushconst(nir_shader * nir)59 create_cs_pushconst(nir_shader *nir)
60 {
61    nir_variable *cs_pushconst;
62    /* create compatible layout for the ntv push constant loader */
63    struct glsl_struct_field *fields = rzalloc_size(nir, 1 * sizeof(struct glsl_struct_field));
64    fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
65    fields[0].name = ralloc_asprintf(nir, "work_dim");
66    fields[0].offset = 0;
67    cs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
68                                                  glsl_struct_type(fields, 1, "struct", false), "cs_pushconst");
69    cs_pushconst->data.location = INT_MAX; //doesn't really matter
70 }
71 
72 static bool
reads_work_dim(nir_shader * shader)73 reads_work_dim(nir_shader *shader)
74 {
75    return BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_WORK_DIM);
76 }
77 
78 static bool
lower_discard_if_instr(nir_builder * b,nir_instr * instr_,UNUSED void * cb_data)79 lower_discard_if_instr(nir_builder *b, nir_instr *instr_, UNUSED void *cb_data)
80 {
81    if (instr_->type != nir_instr_type_intrinsic)
82       return false;
83 
84    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(instr_);
85 
86    if (instr->intrinsic == nir_intrinsic_discard_if) {
87       b->cursor = nir_before_instr(&instr->instr);
88 
89       nir_if *if_stmt = nir_push_if(b, nir_ssa_for_src(b, instr->src[0], 1));
90       nir_discard(b);
91       nir_pop_if(b, if_stmt);
92       nir_instr_remove(&instr->instr);
93       return true;
94    }
95    /* a shader like this (shaders@glsl-fs-discard-04):
96 
97       uniform int j, k;
98 
99       void main()
100       {
101        for (int i = 0; i < j; i++) {
102         if (i > k)
103          continue;
104         discard;
105        }
106        gl_FragColor = vec4(0.0, 1.0, 0.0, 0.0);
107       }
108 
109 
110 
111       will generate nir like:
112 
113       loop   {
114          //snip
115          if   ssa_11   {
116             block   block_5:
117             /   preds:   block_4   /
118             vec1   32   ssa_17   =   iadd   ssa_50,   ssa_31
119             /   succs:   block_7   /
120          }   else   {
121             block   block_6:
122             /   preds:   block_4   /
123             intrinsic   discard   ()   () <-- not last instruction
124             vec1   32   ssa_23   =   iadd   ssa_50,   ssa_31 <-- dead code loop itr increment
125             /   succs:   block_7   /
126          }
127          //snip
128       }
129 
130       which means that we can't assert like this:
131 
132       assert(instr->intrinsic != nir_intrinsic_discard ||
133              nir_block_last_instr(instr->instr.block) == &instr->instr);
134 
135 
136       and it's unnecessary anyway since post-vtn optimizing will dce the instructions following the discard
137     */
138 
139    return false;
140 }
141 
142 static bool
lower_discard_if(nir_shader * shader)143 lower_discard_if(nir_shader *shader)
144 {
145    return nir_shader_instructions_pass(shader,
146                                        lower_discard_if_instr,
147                                        nir_metadata_dominance,
148                                        NULL);
149 }
150 
151 static bool
lower_work_dim_instr(nir_builder * b,nir_instr * in,void * data)152 lower_work_dim_instr(nir_builder *b, nir_instr *in, void *data)
153 {
154    if (in->type != nir_instr_type_intrinsic)
155       return false;
156    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
157    if (instr->intrinsic != nir_intrinsic_load_work_dim)
158       return false;
159 
160    if (instr->intrinsic == nir_intrinsic_load_work_dim) {
161       b->cursor = nir_after_instr(&instr->instr);
162       nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
163       load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
164       nir_intrinsic_set_range(load, 3 * sizeof(uint32_t));
165       load->num_components = 1;
166       nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "work_dim");
167       nir_builder_instr_insert(b, &load->instr);
168 
169       nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
170    }
171 
172    return true;
173 }
174 
175 static bool
lower_work_dim(nir_shader * shader)176 lower_work_dim(nir_shader *shader)
177 {
178    if (shader->info.stage != MESA_SHADER_KERNEL)
179       return false;
180 
181    if (!reads_work_dim(shader))
182       return false;
183 
184    return nir_shader_instructions_pass(shader, lower_work_dim_instr, nir_metadata_dominance, NULL);
185 }
186 
187 static bool
lower_64bit_vertex_attribs_instr(nir_builder * b,nir_instr * instr,void * data)188 lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
189 {
190    if (instr->type != nir_instr_type_deref)
191       return false;
192    nir_deref_instr *deref = nir_instr_as_deref(instr);
193    if (deref->deref_type != nir_deref_type_var)
194       return false;
195    nir_variable *var = nir_deref_instr_get_variable(deref);
196    if (var->data.mode != nir_var_shader_in)
197       return false;
198    if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
199       return false;
200 
201    /* create second variable for the split */
202    nir_variable *var2 = nir_variable_clone(var, b->shader);
203    /* split new variable into second slot */
204    var2->data.driver_location++;
205    nir_shader_add_variable(b->shader, var2);
206 
207    unsigned total_num_components = glsl_get_vector_elements(var->type);
208    /* new variable is the second half of the dvec */
209    var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
210    /* clamp original variable to a dvec2 */
211    deref->type = var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
212 
213    /* create deref instr for new variable */
214    b->cursor = nir_after_instr(instr);
215    nir_deref_instr *deref2 = nir_build_deref_var(b, var2);
216 
217    nir_foreach_use_safe(use_src, &deref->dest.ssa) {
218       nir_instr *use_instr = use_src->parent_instr;
219       assert(use_instr->type == nir_instr_type_intrinsic &&
220              nir_instr_as_intrinsic(use_instr)->intrinsic == nir_intrinsic_load_deref);
221 
222       /* this is a load instruction for the deref, and we need to split it into two instructions that we can
223        * then zip back into a single ssa def */
224       nir_intrinsic_instr *intr = nir_instr_as_intrinsic(use_instr);
225       /* clamp the first load to 2 64bit components */
226       intr->num_components = intr->dest.ssa.num_components = 2;
227       b->cursor = nir_after_instr(use_instr);
228       /* this is the second load instruction for the second half of the dvec3/4 components */
229       nir_intrinsic_instr *intr2 = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref);
230       intr2->src[0] = nir_src_for_ssa(&deref2->dest.ssa);
231       intr2->num_components = total_num_components - 2;
232       nir_ssa_dest_init(&intr2->instr, &intr2->dest, intr2->num_components, 64, NULL);
233       nir_builder_instr_insert(b, &intr2->instr);
234 
235       nir_ssa_def *def[4];
236       /* create a new dvec3/4 comprised of all the loaded components from both variables */
237       def[0] = nir_vector_extract(b, &intr->dest.ssa, nir_imm_int(b, 0));
238       def[1] = nir_vector_extract(b, &intr->dest.ssa, nir_imm_int(b, 1));
239       def[2] = nir_vector_extract(b, &intr2->dest.ssa, nir_imm_int(b, 0));
240       if (total_num_components == 4)
241          def[3] = nir_vector_extract(b, &intr2->dest.ssa, nir_imm_int(b, 1));
242       nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
243       /* use the assembled dvec3/4 for all other uses of the load */
244       nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
245                                      new_vec->parent_instr);
246    }
247 
248    return true;
249 }
250 
251 /* "64-bit three- and four-component vectors consume two consecutive locations."
252  *  - 14.1.4. Location Assignment
253  *
254  * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
255  * are assigned to consecutive locations, loaded separately, and then assembled back into a
256  * composite value that's used in place of the original loaded ssa src
257  */
258 static bool
lower_64bit_vertex_attribs(nir_shader * shader)259 lower_64bit_vertex_attribs(nir_shader *shader)
260 {
261    if (shader->info.stage != MESA_SHADER_VERTEX)
262       return false;
263 
264    return nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
265 }
266 
267 static bool
lower_basevertex_instr(nir_builder * b,nir_instr * in,void * data)268 lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
269 {
270    if (in->type != nir_instr_type_intrinsic)
271       return false;
272    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
273    if (instr->intrinsic != nir_intrinsic_load_base_vertex)
274       return false;
275 
276    b->cursor = nir_after_instr(&instr->instr);
277    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
278    load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
279    nir_intrinsic_set_range(load, 4);
280    load->num_components = 1;
281    nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
282    nir_builder_instr_insert(b, &load->instr);
283 
284    nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
285                                           nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
286                                           &instr->dest.ssa,
287                                           nir_imm_int(b, 0),
288                                           NULL);
289 
290    nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
291                                   composite->parent_instr);
292    return true;
293 }
294 
295 static bool
lower_basevertex(nir_shader * shader)296 lower_basevertex(nir_shader *shader)
297 {
298    if (shader->info.stage != MESA_SHADER_VERTEX)
299       return false;
300 
301    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
302       return false;
303 
304    return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
305 }
306 
307 
308 static bool
lower_drawid_instr(nir_builder * b,nir_instr * in,void * data)309 lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
310 {
311    if (in->type != nir_instr_type_intrinsic)
312       return false;
313    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
314    if (instr->intrinsic != nir_intrinsic_load_draw_id)
315       return false;
316 
317    b->cursor = nir_before_instr(&instr->instr);
318    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
319    load->src[0] = nir_src_for_ssa(nir_imm_int(b, 1));
320    nir_intrinsic_set_range(load, 4);
321    load->num_components = 1;
322    nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
323    nir_builder_instr_insert(b, &load->instr);
324 
325    nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
326 
327    return true;
328 }
329 
330 static bool
lower_drawid(nir_shader * shader)331 lower_drawid(nir_shader *shader)
332 {
333    if (shader->info.stage != MESA_SHADER_VERTEX)
334       return false;
335 
336    if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
337       return false;
338 
339    return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
340 }
341 
342 static bool
lower_dual_blend(nir_shader * shader)343 lower_dual_blend(nir_shader *shader)
344 {
345    bool progress = false;
346    nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
347    if (var) {
348       var->data.location = FRAG_RESULT_DATA0;
349       var->data.index = 1;
350       progress = true;
351    }
352    nir_shader_preserve_all_metadata(shader);
353    return progress;
354 }
355 
356 void
zink_screen_init_compiler(struct zink_screen * screen)357 zink_screen_init_compiler(struct zink_screen *screen)
358 {
359    static const struct nir_shader_compiler_options
360    default_options = {
361       .lower_ffma16 = true,
362       .lower_ffma32 = true,
363       .lower_ffma64 = true,
364       .lower_scmp = true,
365       .lower_fdph = true,
366       .lower_flrp32 = true,
367       .lower_fpow = true,
368       .lower_fsat = true,
369       .lower_extract_byte = true,
370       .lower_extract_word = true,
371       .lower_insert_byte = true,
372       .lower_insert_word = true,
373       .lower_mul_high = true,
374       .lower_rotate = true,
375       .lower_uadd_carry = true,
376       .lower_pack_64_2x32_split = true,
377       .lower_unpack_64_2x32_split = true,
378       .lower_pack_32_2x16_split = true,
379       .lower_unpack_32_2x16_split = true,
380       .lower_vector_cmp = true,
381       .lower_int64_options = 0,
382       .lower_doubles_options = ~nir_lower_fp64_full_software,
383       .lower_uniforms_to_ubo = true,
384       .has_fsub = true,
385       .has_isub = true,
386       .lower_mul_2x32_64 = true,
387       .support_16bit_alu = true, /* not quite what it sounds like */
388    };
389 
390    screen->nir_options = default_options;
391 
392    if (!screen->info.feats.features.shaderInt64)
393       screen->nir_options.lower_int64_options = ~0;
394 
395    if (!screen->info.feats.features.shaderFloat64) {
396       screen->nir_options.lower_doubles_options = ~0;
397       screen->nir_options.lower_flrp64 = true;
398       screen->nir_options.lower_ffma64 = true;
399    }
400 }
401 
402 const void *
zink_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type shader)403 zink_get_compiler_options(struct pipe_screen *pscreen,
404                           enum pipe_shader_ir ir,
405                           enum pipe_shader_type shader)
406 {
407    assert(ir == PIPE_SHADER_IR_NIR);
408    return &zink_screen(pscreen)->nir_options;
409 }
410 
411 struct nir_shader *
zink_tgsi_to_nir(struct pipe_screen * screen,const struct tgsi_token * tokens)412 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
413 {
414    if (zink_debug & ZINK_DEBUG_TGSI) {
415       fprintf(stderr, "TGSI shader:\n---8<---\n");
416       tgsi_dump_to_file(tokens, 0, stderr);
417       fprintf(stderr, "---8<---\n\n");
418    }
419 
420    return tgsi_to_nir(tokens, screen, false);
421 }
422 
423 static void
optimize_nir(struct nir_shader * s)424 optimize_nir(struct nir_shader *s)
425 {
426    bool progress;
427    do {
428       progress = false;
429       NIR_PASS_V(s, nir_lower_vars_to_ssa);
430       NIR_PASS(progress, s, nir_copy_prop);
431       NIR_PASS(progress, s, nir_opt_remove_phis);
432       NIR_PASS(progress, s, nir_opt_dce);
433       NIR_PASS(progress, s, nir_opt_dead_cf);
434       NIR_PASS(progress, s, nir_opt_cse);
435       NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
436       NIR_PASS(progress, s, nir_opt_algebraic);
437       NIR_PASS(progress, s, nir_opt_constant_folding);
438       NIR_PASS(progress, s, nir_opt_undef);
439       NIR_PASS(progress, s, zink_nir_lower_b2b);
440    } while (progress);
441 
442    do {
443       progress = false;
444       NIR_PASS(progress, s, nir_opt_algebraic_late);
445       if (progress) {
446          NIR_PASS_V(s, nir_copy_prop);
447          NIR_PASS_V(s, nir_opt_dce);
448          NIR_PASS_V(s, nir_opt_cse);
449       }
450    } while (progress);
451 }
452 
453 /* - copy the lowered fbfetch variable
454  * - set the new one up as an input attachment for descriptor 0.6
455  * - load it as an image
456  * - overwrite the previous load
457  */
458 static bool
lower_fbfetch_instr(nir_builder * b,nir_instr * instr,void * data)459 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
460 {
461    if (instr->type != nir_instr_type_intrinsic)
462       return false;
463    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
464    if (intr->intrinsic != nir_intrinsic_load_deref)
465       return false;
466    nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
467    if (var != data)
468       return false;
469    b->cursor = nir_after_instr(instr);
470    nir_variable *fbfetch = nir_variable_clone(data, b->shader);
471    /* If Dim is SubpassData, ... Image Format must be Unknown
472     * - SPIRV OpTypeImage specification
473     */
474    fbfetch->data.image.format = 0;
475    fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
476    fbfetch->data.mode = nir_var_uniform;
477    fbfetch->data.binding = ZINK_FBFETCH_BINDING;
478    fbfetch->type = glsl_image_type(GLSL_SAMPLER_DIM_SUBPASS, false, GLSL_TYPE_FLOAT);
479    nir_shader_add_variable(b->shader, fbfetch);
480    nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
481    nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), nir_ssa_undef(b, 1, 32), nir_imm_int(b, 0));
482    unsigned swiz[4] = {2, 1, 0, 3};
483    nir_ssa_def *swizzle = nir_swizzle(b, load, swiz, 4);
484    nir_ssa_def_rewrite_uses(&intr->dest.ssa, swizzle);
485    return true;
486 }
487 
488 static bool
lower_fbfetch(nir_shader * shader,nir_variable ** fbfetch)489 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch)
490 {
491    nir_foreach_shader_out_variable(var, shader) {
492       if (var->data.fb_fetch_output) {
493          *fbfetch = var;
494          break;
495       }
496    }
497    assert(*fbfetch);
498    if (!*fbfetch)
499       return false;
500    return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, *fbfetch);
501 }
502 
503 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
504 static bool
check_psiz(struct nir_shader * s)505 check_psiz(struct nir_shader *s)
506 {
507    nir_foreach_shader_out_variable(var, s) {
508       if (var->data.location == VARYING_SLOT_PSIZ) {
509          /* genuine PSIZ outputs will have this set */
510          return !!var->data.explicit_location;
511       }
512    }
513    return false;
514 }
515 
516 static void
update_so_info(struct zink_shader * zs,const struct pipe_stream_output_info * so_info,uint64_t outputs_written,bool have_psiz)517 update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_info,
518                uint64_t outputs_written, bool have_psiz)
519 {
520    uint8_t reverse_map[64] = {0};
521    unsigned slot = 0;
522    /* semi-copied from iris */
523    while (outputs_written) {
524       int bit = u_bit_scan64(&outputs_written);
525       /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
526       if (bit == VARYING_SLOT_PSIZ && !have_psiz)
527          continue;
528       reverse_map[slot++] = bit;
529    }
530 
531    nir_foreach_shader_out_variable(var, zs->nir)
532       var->data.explicit_xfb_buffer = 0;
533 
534    bool inlined[64] = {0};
535    for (unsigned i = 0; i < so_info->num_outputs; i++) {
536       const struct pipe_stream_output *output = &so_info->output[i];
537       unsigned slot = reverse_map[output->register_index];
538       /* always set stride to be used during draw */
539       zs->streamout.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
540       if ((zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) &&
541           !output->start_component) {
542          nir_variable *var = NULL;
543          while (!var)
544             var = nir_find_variable_with_location(zs->nir, nir_var_shader_out, slot--);
545          slot++;
546          if (inlined[slot])
547             continue;
548          assert(var && var->data.location == slot);
549          /* if this is the entire variable, try to blast it out during the initial declaration */
550          if (glsl_get_components(var->type) == output->num_components) {
551             var->data.explicit_xfb_buffer = 1;
552             var->data.xfb.buffer = output->output_buffer;
553             var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
554             var->data.offset = output->dst_offset * 4;
555             var->data.stream = output->stream;
556             inlined[slot] = true;
557             continue;
558          }
559       }
560       zs->streamout.so_info.output[zs->streamout.so_info.num_outputs] = *output;
561       /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
562       zs->streamout.so_info_slots[zs->streamout.so_info.num_outputs++] = reverse_map[output->register_index];
563    }
564    zs->streamout.have_xfb = !!zs->streamout.so_info.num_outputs;
565 }
566 
567 struct decompose_state {
568   nir_variable **split;
569   bool needs_w;
570 };
571 
572 static bool
lower_attrib(nir_builder * b,nir_instr * instr,void * data)573 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
574 {
575    struct decompose_state *state = data;
576    nir_variable **split = state->split;
577    if (instr->type != nir_instr_type_intrinsic)
578       return false;
579    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
580    if (intr->intrinsic != nir_intrinsic_load_deref)
581       return false;
582    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
583    nir_variable *var = nir_deref_instr_get_variable(deref);
584    if (var != split[0])
585       return false;
586    unsigned num_components = glsl_get_vector_elements(split[0]->type);
587    b->cursor = nir_after_instr(instr);
588    nir_ssa_def *loads[4];
589    for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
590       loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
591    if (state->needs_w) {
592       /* oob load w comopnent to get correct value for int/float */
593       loads[3] = nir_channel(b, loads[0], 3);
594       loads[0] = nir_channel(b, loads[0], 0);
595    }
596    nir_ssa_def *new_load = nir_vec(b, loads, num_components);
597    nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
598    nir_instr_remove_v(instr);
599    return true;
600 }
601 
602 static bool
decompose_attribs(nir_shader * nir,uint32_t decomposed_attrs,uint32_t decomposed_attrs_without_w)603 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
604 {
605    uint32_t bits = 0;
606    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
607       bits |= BITFIELD_BIT(var->data.driver_location);
608    bits = ~bits;
609    u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
610       nir_variable *split[5];
611       struct decompose_state state;
612       state.split = split;
613       nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
614       assert(var);
615       split[0] = var;
616       bits |= BITFIELD_BIT(var->data.driver_location);
617       const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
618       unsigned num_components = glsl_get_vector_elements(var->type);
619       state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
620       for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
621          split[i+1] = nir_variable_clone(var, nir);
622          split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
623          if (decomposed_attrs_without_w & BITFIELD_BIT(location))
624             split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
625          else
626             split[i+1]->type = new_type;
627          split[i+1]->data.driver_location = ffs(bits) - 1;
628          bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
629          nir_shader_add_variable(nir, split[i+1]);
630       }
631       var->data.mode = nir_var_shader_temp;
632       nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
633    }
634    nir_fixup_deref_modes(nir);
635    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
636    optimize_nir(nir);
637    return true;
638 }
639 
640 static void
assign_producer_var_io(gl_shader_stage stage,nir_variable * var,unsigned * reserved,unsigned char * slot_map)641 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
642 {
643    unsigned slot = var->data.location;
644    switch (var->data.location) {
645    case VARYING_SLOT_POS:
646    case VARYING_SLOT_PNTC:
647    case VARYING_SLOT_PSIZ:
648    case VARYING_SLOT_LAYER:
649    case VARYING_SLOT_PRIMITIVE_ID:
650    case VARYING_SLOT_CLIP_DIST0:
651    case VARYING_SLOT_CULL_DIST0:
652    case VARYING_SLOT_VIEWPORT:
653    case VARYING_SLOT_FACE:
654    case VARYING_SLOT_TESS_LEVEL_OUTER:
655    case VARYING_SLOT_TESS_LEVEL_INNER:
656       /* use a sentinel value to avoid counting later */
657       var->data.driver_location = UINT_MAX;
658       break;
659 
660    default:
661       if (var->data.patch) {
662          assert(var->data.location >= VARYING_SLOT_PATCH0);
663          slot = var->data.location - VARYING_SLOT_PATCH0;
664       } else if (var->data.location >= VARYING_SLOT_VAR0 &&
665                  var->data.mode == nir_var_shader_in &&
666                   stage == MESA_SHADER_TESS_EVAL) {
667          slot = var->data.location - VARYING_SLOT_VAR0;
668       } else {
669          if (slot_map[var->data.location] == 0xff) {
670             assert(*reserved < MAX_VARYING);
671             slot_map[var->data.location] = *reserved;
672             *reserved += glsl_count_vec4_slots(var->type, false, false);
673          }
674          slot = slot_map[var->data.location];
675          assert(slot < MAX_VARYING);
676       }
677       var->data.driver_location = slot;
678    }
679 }
680 
681 ALWAYS_INLINE static bool
is_texcoord(gl_shader_stage stage,const nir_variable * var)682 is_texcoord(gl_shader_stage stage, const nir_variable *var)
683 {
684    if (stage != MESA_SHADER_FRAGMENT)
685       return false;
686    return var->data.location >= VARYING_SLOT_TEX0 &&
687           var->data.location <= VARYING_SLOT_TEX7;
688 }
689 
690 static bool
assign_consumer_var_io(gl_shader_stage stage,nir_variable * var,unsigned * reserved,unsigned char * slot_map)691 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
692 {
693    switch (var->data.location) {
694    case VARYING_SLOT_POS:
695    case VARYING_SLOT_PNTC:
696    case VARYING_SLOT_PSIZ:
697    case VARYING_SLOT_LAYER:
698    case VARYING_SLOT_PRIMITIVE_ID:
699    case VARYING_SLOT_CLIP_DIST0:
700    case VARYING_SLOT_CULL_DIST0:
701    case VARYING_SLOT_VIEWPORT:
702    case VARYING_SLOT_FACE:
703    case VARYING_SLOT_TESS_LEVEL_OUTER:
704    case VARYING_SLOT_TESS_LEVEL_INNER:
705       /* use a sentinel value to avoid counting later */
706       var->data.driver_location = UINT_MAX;
707       break;
708    default:
709       if (var->data.patch) {
710          assert(var->data.location >= VARYING_SLOT_PATCH0);
711          var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
712       } else if (var->data.location >= VARYING_SLOT_VAR0 &&
713           stage == MESA_SHADER_TESS_CTRL &&
714           var->data.mode == nir_var_shader_out)
715          var->data.driver_location = var->data.location - VARYING_SLOT_VAR0;
716       else {
717          if (slot_map[var->data.location] == (unsigned char)-1) {
718             if (!is_texcoord(stage, var))
719                /* dead io */
720                return false;
721             /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE */
722             slot_map[var->data.location] = (*reserved)++;
723          }
724          var->data.driver_location = slot_map[var->data.location];
725       }
726    }
727    return true;
728 }
729 
730 
731 static bool
rewrite_and_discard_read(nir_builder * b,nir_instr * instr,void * data)732 rewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data)
733 {
734    nir_variable *var = data;
735    if (instr->type != nir_instr_type_intrinsic)
736       return false;
737 
738    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
739    if (intr->intrinsic != nir_intrinsic_load_deref)
740       return false;
741    nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
742    if (deref_var != var)
743       return false;
744    nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
745    nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef);
746    return true;
747 }
748 
749 void
zink_compiler_assign_io(nir_shader * producer,nir_shader * consumer)750 zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer)
751 {
752    unsigned reserved = 0;
753    unsigned char slot_map[VARYING_SLOT_MAX];
754    memset(slot_map, -1, sizeof(slot_map));
755    bool do_fixup = false;
756    nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
757    if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
758       /* never assign from tcs -> tes, always invert */
759       nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
760          assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
761       nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
762          if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
763             /* this is an output, nothing more needs to be done for it to be dropped */
764             do_fixup = true;
765       }
766    } else {
767       nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
768          assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
769       nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
770          if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
771             do_fixup = true;
772             /* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */
773             nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var);
774          }
775       }
776    }
777    if (!do_fixup)
778       return;
779    nir_fixup_deref_modes(nir);
780    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
781    optimize_nir(nir);
782 }
783 
784 VkShaderModule
zink_shader_compile(struct zink_screen * screen,struct zink_shader * zs,nir_shader * base_nir,const struct zink_shader_key * key)785 zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, const struct zink_shader_key *key)
786 {
787    VkShaderModule mod = VK_NULL_HANDLE;
788    void *streamout = NULL;
789    nir_shader *nir = nir_shader_clone(NULL, base_nir);
790 
791    if (key) {
792       if (key->inline_uniforms) {
793          NIR_PASS_V(nir, nir_inline_uniforms,
794                     nir->info.num_inlinable_uniforms,
795                     key->base.inlined_uniform_values,
796                     nir->info.inlinable_uniform_dw_offsets);
797 
798          optimize_nir(nir);
799 
800          /* This must be done again. */
801          NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
802                                                           nir_var_shader_out);
803       }
804 
805       /* TODO: use a separate mem ctx here for ralloc */
806       switch (zs->nir->info.stage) {
807       case MESA_SHADER_VERTEX: {
808          uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
809          const struct zink_vs_key *vs_key = zink_vs_key(key);
810          switch (vs_key->size) {
811          case 4:
812             decomposed_attrs = vs_key->u32.decomposed_attrs;
813             decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
814             break;
815          case 2:
816             decomposed_attrs = vs_key->u16.decomposed_attrs;
817             decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
818             break;
819          case 1:
820             decomposed_attrs = vs_key->u8.decomposed_attrs;
821             decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
822             break;
823          default: break;
824          }
825          if (decomposed_attrs || decomposed_attrs_without_w)
826             NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
827          FALLTHROUGH;
828       }
829       case MESA_SHADER_TESS_EVAL:
830       case MESA_SHADER_GEOMETRY:
831          if (zink_vs_key_base(key)->last_vertex_stage) {
832             if (zs->streamout.have_xfb)
833                streamout = &zs->streamout;
834 
835             if (!zink_vs_key_base(key)->clip_halfz) {
836                NIR_PASS_V(nir, nir_lower_clip_halfz);
837             }
838             if (zink_vs_key_base(key)->push_drawid) {
839                NIR_PASS_V(nir, lower_drawid);
840             }
841          }
842          break;
843       case MESA_SHADER_FRAGMENT:
844          if (!zink_fs_key(key)->samples &&
845             nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
846             /* VK will always use gl_SampleMask[] values even if sample count is 0,
847             * so we need to skip this write here to mimic GL's behavior of ignoring it
848             */
849             nir_foreach_shader_out_variable(var, nir) {
850                if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
851                   var->data.mode = nir_var_shader_temp;
852             }
853             nir_fixup_deref_modes(nir);
854             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
855             optimize_nir(nir);
856          }
857          if (zink_fs_key(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
858             NIR_PASS_V(nir, lower_dual_blend);
859          }
860          if (zink_fs_key(key)->coord_replace_bits) {
861             NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits,
862                      false, zink_fs_key(key)->coord_replace_yinvert);
863          }
864          if (nir->info.fs.uses_fbfetch_output) {
865             nir_variable *fbfetch = NULL;
866             NIR_PASS_V(nir, lower_fbfetch, &fbfetch);
867             /* old variable must be deleted to avoid spirv errors */
868             fbfetch->data.mode = nir_var_shader_temp;
869             nir_fixup_deref_modes(nir);
870             NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
871             optimize_nir(nir);
872          }
873          break;
874       default: break;
875       }
876    }
877    NIR_PASS_V(nir, nir_convert_from_ssa, true);
878 
879    struct spirv_shader *spirv = nir_to_spirv(nir, streamout, screen->spirv_version);
880    if (!spirv)
881       goto done;
882 
883    if (zink_debug & ZINK_DEBUG_SPIRV) {
884       char buf[256];
885       static int i;
886       snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
887       FILE *fp = fopen(buf, "wb");
888       if (fp) {
889          fwrite(spirv->words, sizeof(uint32_t), spirv->num_words, fp);
890          fclose(fp);
891          fprintf(stderr, "wrote '%s'...\n", buf);
892       }
893    }
894 
895    VkShaderModuleCreateInfo smci = {0};
896    smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
897    smci.codeSize = spirv->num_words * sizeof(uint32_t);
898    smci.pCode = spirv->words;
899 
900    if (VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &mod) != VK_SUCCESS)
901       mod = VK_NULL_HANDLE;
902 
903 done:
904    ralloc_free(nir);
905 
906    /* TODO: determine if there's any reason to cache spirv output? */
907    ralloc_free(spirv);
908    return mod;
909 }
910 
911 static bool
lower_baseinstance_instr(nir_builder * b,nir_instr * instr,void * data)912 lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
913 {
914    if (instr->type != nir_instr_type_intrinsic)
915       return false;
916    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
917    if (intr->intrinsic != nir_intrinsic_load_instance_id)
918       return false;
919    b->cursor = nir_after_instr(instr);
920    nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
921    nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
922    return true;
923 }
924 
925 static bool
lower_baseinstance(nir_shader * shader)926 lower_baseinstance(nir_shader *shader)
927 {
928    if (shader->info.stage != MESA_SHADER_VERTEX)
929       return false;
930    return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
931 }
932 
933 bool nir_lower_dynamic_bo_access(nir_shader *shader);
934 
935 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
936  * so instead we delete all those broken variables and just make new ones
937  */
938 static bool
unbreak_bos(nir_shader * shader)939 unbreak_bos(nir_shader *shader)
940 {
941    uint32_t ssbo_used = 0;
942    uint32_t ubo_used = 0;
943    uint64_t max_ssbo_size = 0;
944    uint64_t max_ubo_size = 0;
945    bool ssbo_sizes[PIPE_MAX_SHADER_BUFFERS] = {false};
946 
947    if (!shader->info.num_ssbos && !shader->info.num_ubos && !shader->num_uniforms)
948       return false;
949    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
950    nir_foreach_block(block, impl) {
951       nir_foreach_instr(instr, block) {
952          if (instr->type != nir_instr_type_intrinsic)
953             continue;
954 
955          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
956          switch (intrin->intrinsic) {
957          case nir_intrinsic_store_ssbo:
958             ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[1]));
959             break;
960 
961          case nir_intrinsic_get_ssbo_size: {
962             uint32_t slot = nir_src_as_uint(intrin->src[0]);
963             ssbo_used |= BITFIELD_BIT(slot);
964             ssbo_sizes[slot] = true;
965             break;
966          }
967          case nir_intrinsic_ssbo_atomic_add:
968          case nir_intrinsic_ssbo_atomic_imin:
969          case nir_intrinsic_ssbo_atomic_umin:
970          case nir_intrinsic_ssbo_atomic_imax:
971          case nir_intrinsic_ssbo_atomic_umax:
972          case nir_intrinsic_ssbo_atomic_and:
973          case nir_intrinsic_ssbo_atomic_or:
974          case nir_intrinsic_ssbo_atomic_xor:
975          case nir_intrinsic_ssbo_atomic_exchange:
976          case nir_intrinsic_ssbo_atomic_comp_swap:
977          case nir_intrinsic_ssbo_atomic_fmin:
978          case nir_intrinsic_ssbo_atomic_fmax:
979          case nir_intrinsic_ssbo_atomic_fcomp_swap:
980          case nir_intrinsic_load_ssbo:
981             ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0]));
982             break;
983          case nir_intrinsic_load_ubo:
984          case nir_intrinsic_load_ubo_vec4:
985             ubo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0]));
986             break;
987          default:
988             break;
989          }
990       }
991    }
992 
993    nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
994       const struct glsl_type *type = glsl_without_array(var->type);
995       if (type_is_counter(type))
996          continue;
997       unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
998       if (var->data.mode == nir_var_mem_ubo)
999          max_ubo_size = MAX2(max_ubo_size, size);
1000       else
1001          max_ssbo_size = MAX2(max_ssbo_size, size);
1002       var->data.mode = nir_var_shader_temp;
1003    }
1004    nir_fixup_deref_modes(shader);
1005    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1006    optimize_nir(shader);
1007 
1008    if (!ssbo_used && !ubo_used)
1009       return false;
1010 
1011    struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
1012    fields[0].name = ralloc_strdup(shader, "base");
1013    fields[1].name = ralloc_strdup(shader, "unsized");
1014    if (ubo_used) {
1015       const struct glsl_type *ubo_type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
1016       fields[0].type = ubo_type;
1017       u_foreach_bit(slot, ubo_used) {
1018          char buf[64];
1019          snprintf(buf, sizeof(buf), "ubo_slot_%u", slot);
1020          nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo, glsl_struct_type(fields, 1, "struct", false), buf);
1021          var->interface_type = var->type;
1022          var->data.driver_location = slot;
1023       }
1024    }
1025    if (ssbo_used) {
1026       const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), max_ssbo_size * 4, 4);
1027       const struct glsl_type *unsized = glsl_array_type(glsl_uint_type(), 0, 4);
1028       fields[0].type = ssbo_type;
1029       u_foreach_bit(slot, ssbo_used) {
1030          char buf[64];
1031          snprintf(buf, sizeof(buf), "ssbo_slot_%u", slot);
1032          if (ssbo_sizes[slot])
1033             fields[1].type = unsized;
1034          else
1035             fields[1].type = NULL;
1036          nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
1037                                                  glsl_struct_type(fields, 1 + !!ssbo_sizes[slot], "struct", false), buf);
1038          var->interface_type = var->type;
1039          var->data.driver_location = slot;
1040       }
1041    }
1042    return true;
1043 }
1044 
1045 /* this is a "default" bindless texture used if the shader has no texture variables */
1046 static nir_variable *
create_bindless_texture(nir_shader * nir,nir_tex_instr * tex)1047 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex)
1048 {
1049    unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
1050    nir_variable *var;
1051 
1052    const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
1053    var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
1054    var->data.descriptor_set = ZINK_DESCRIPTOR_BINDLESS;
1055    var->data.driver_location = var->data.binding = binding;
1056    return var;
1057 }
1058 
1059 /* this is a "default" bindless image used if the shader has no image variables */
1060 static nir_variable *
create_bindless_image(nir_shader * nir,enum glsl_sampler_dim dim)1061 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim)
1062 {
1063    unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
1064    nir_variable *var;
1065 
1066    const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1067    var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
1068    var->data.descriptor_set = ZINK_DESCRIPTOR_BINDLESS;
1069    var->data.driver_location = var->data.binding = binding;
1070    var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
1071    return var;
1072 }
1073 
1074 /* rewrite bindless instructions as array deref instructions */
1075 static bool
lower_bindless_instr(nir_builder * b,nir_instr * in,void * data)1076 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
1077 {
1078    nir_variable **bindless = data;
1079 
1080    if (in->type == nir_instr_type_tex) {
1081       nir_tex_instr *tex = nir_instr_as_tex(in);
1082       int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
1083       if (idx == -1)
1084          return false;
1085 
1086       nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless[1] : bindless[0];
1087       if (!var)
1088          var = create_bindless_texture(b->shader, tex);
1089       b->cursor = nir_before_instr(in);
1090       nir_deref_instr *deref = nir_build_deref_var(b, var);
1091       if (glsl_type_is_array(var->type))
1092          deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
1093       nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
1094 
1095       /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
1096        * match up with it in contrast to normal sampler ops where things are a bit more flexible;
1097        * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
1098        * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
1099        *
1100        * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
1101        * - Warhammer 40k: Dawn of War III
1102        */
1103       unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
1104       unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1105       unsigned coord_components = nir_src_num_components(tex->src[c].src);
1106       if (coord_components < needed_components) {
1107          nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
1108          nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
1109          tex->coord_components = needed_components;
1110       }
1111       return true;
1112    }
1113    if (in->type != nir_instr_type_intrinsic)
1114       return false;
1115    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
1116 
1117    nir_intrinsic_op op;
1118 #define OP_SWAP(OP) \
1119    case nir_intrinsic_bindless_image_##OP: \
1120       op = nir_intrinsic_image_deref_##OP; \
1121       break;
1122 
1123 
1124    /* convert bindless intrinsics to deref intrinsics */
1125    switch (instr->intrinsic) {
1126    OP_SWAP(atomic_add)
1127    OP_SWAP(atomic_and)
1128    OP_SWAP(atomic_comp_swap)
1129    OP_SWAP(atomic_dec_wrap)
1130    OP_SWAP(atomic_exchange)
1131    OP_SWAP(atomic_fadd)
1132    OP_SWAP(atomic_fmax)
1133    OP_SWAP(atomic_fmin)
1134    OP_SWAP(atomic_imax)
1135    OP_SWAP(atomic_imin)
1136    OP_SWAP(atomic_inc_wrap)
1137    OP_SWAP(atomic_or)
1138    OP_SWAP(atomic_umax)
1139    OP_SWAP(atomic_umin)
1140    OP_SWAP(atomic_xor)
1141    OP_SWAP(format)
1142    OP_SWAP(load)
1143    OP_SWAP(order)
1144    OP_SWAP(samples)
1145    OP_SWAP(size)
1146    OP_SWAP(store)
1147    default:
1148       return false;
1149    }
1150 
1151    enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
1152    nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless[3] : bindless[2];
1153    if (!var)
1154       var = create_bindless_image(b->shader, dim);
1155    instr->intrinsic = op;
1156    b->cursor = nir_before_instr(in);
1157    nir_deref_instr *deref = nir_build_deref_var(b, var);
1158    if (glsl_type_is_array(var->type))
1159       deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
1160    nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
1161    return true;
1162 }
1163 
1164 static bool
lower_bindless(nir_shader * shader,nir_variable ** bindless)1165 lower_bindless(nir_shader *shader, nir_variable **bindless)
1166 {
1167    if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
1168       return false;
1169    nir_fixup_deref_modes(shader);
1170    NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1171    optimize_nir(shader);
1172    return true;
1173 }
1174 
1175 /* convert shader image/texture io variables to int64 handles for bindless indexing */
1176 static bool
lower_bindless_io_instr(nir_builder * b,nir_instr * in,void * data)1177 lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
1178 {
1179    if (in->type != nir_instr_type_intrinsic)
1180       return false;
1181    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
1182    if (instr->intrinsic != nir_intrinsic_load_deref &&
1183        instr->intrinsic != nir_intrinsic_store_deref)
1184       return false;
1185 
1186    nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
1187    nir_variable *var = nir_deref_instr_get_variable(src_deref);
1188    if (var->data.bindless)
1189       return false;
1190    if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
1191       return false;
1192    if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
1193       return false;
1194 
1195    var->type = glsl_int64_t_type();
1196    var->data.bindless = 1;
1197    b->cursor = nir_before_instr(in);
1198    nir_deref_instr *deref = nir_build_deref_var(b, var);
1199    if (instr->intrinsic == nir_intrinsic_load_deref) {
1200        nir_ssa_def *def = nir_load_deref(b, deref);
1201        nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
1202        nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
1203    } else {
1204       nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
1205    }
1206    nir_instr_remove(in);
1207    nir_instr_remove(&src_deref->instr);
1208    return true;
1209 }
1210 
1211 static bool
lower_bindless_io(nir_shader * shader)1212 lower_bindless_io(nir_shader *shader)
1213 {
1214    return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
1215 }
1216 
1217 static uint32_t
zink_binding(gl_shader_stage stage,VkDescriptorType type,int index)1218 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index)
1219 {
1220    if (stage == MESA_SHADER_NONE) {
1221       unreachable("not supported");
1222    } else {
1223       switch (type) {
1224       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1225       case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1226          assert(index < PIPE_MAX_CONSTANT_BUFFERS);
1227          return (stage * PIPE_MAX_CONSTANT_BUFFERS) + index;
1228 
1229       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1230       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1231          assert(index < PIPE_MAX_SAMPLERS);
1232          return (stage * PIPE_MAX_SAMPLERS) + index;
1233 
1234       case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1235          assert(index < PIPE_MAX_SHADER_BUFFERS);
1236          return (stage * PIPE_MAX_SHADER_BUFFERS) + index;
1237 
1238       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1239       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1240          assert(index < PIPE_MAX_SHADER_IMAGES);
1241          return (stage * PIPE_MAX_SHADER_IMAGES) + index;
1242 
1243       default:
1244          unreachable("unexpected type");
1245       }
1246    }
1247 }
1248 
1249 static void
handle_bindless_var(nir_shader * nir,nir_variable * var,const struct glsl_type * type,nir_variable ** bindless)1250 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, nir_variable **bindless)
1251 {
1252    if (glsl_type_is_struct(type)) {
1253       for (unsigned i = 0; i < glsl_get_length(type); i++)
1254          handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
1255       return;
1256    }
1257 
1258    /* just a random scalar in a struct */
1259    if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
1260       return;
1261 
1262    VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
1263    unsigned binding;
1264    switch (vktype) {
1265       case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1266          binding = 0;
1267          break;
1268       case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1269          binding = 1;
1270          break;
1271       case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1272          binding = 2;
1273          break;
1274       case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1275          binding = 3;
1276          break;
1277       default:
1278          unreachable("unknown");
1279    }
1280    if (!bindless[binding]) {
1281       bindless[binding] = nir_variable_clone(var, nir);
1282       bindless[binding]->data.bindless = 0;
1283       bindless[binding]->data.descriptor_set = ZINK_DESCRIPTOR_BINDLESS;
1284       bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
1285       bindless[binding]->data.driver_location = bindless[binding]->data.binding = binding;
1286       if (!bindless[binding]->data.image.format)
1287          bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
1288       nir_shader_add_variable(nir, bindless[binding]);
1289    } else {
1290       assert(glsl_get_sampler_dim(glsl_without_array(bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
1291    }
1292    var->data.mode = nir_var_shader_temp;
1293 }
1294 
1295 static enum pipe_prim_type
gl_prim_to_pipe(unsigned primitive_type)1296 gl_prim_to_pipe(unsigned primitive_type)
1297 {
1298    switch (primitive_type) {
1299    case GL_POINTS:
1300       return PIPE_PRIM_POINTS;
1301    case GL_LINES:
1302    case GL_LINE_LOOP:
1303    case GL_LINE_STRIP:
1304    case GL_LINES_ADJACENCY:
1305    case GL_LINE_STRIP_ADJACENCY:
1306    case GL_ISOLINES:
1307       return PIPE_PRIM_LINES;
1308    default:
1309       return PIPE_PRIM_TRIANGLES;
1310    }
1311 }
1312 
1313 static enum pipe_prim_type
get_shader_base_prim_type(struct nir_shader * nir)1314 get_shader_base_prim_type(struct nir_shader *nir)
1315 {
1316    switch (nir->info.stage) {
1317    case MESA_SHADER_GEOMETRY:
1318       return gl_prim_to_pipe(nir->info.gs.output_primitive);
1319    case MESA_SHADER_TESS_EVAL:
1320       return nir->info.tess.point_mode ? PIPE_PRIM_POINTS : gl_prim_to_pipe(nir->info.tess.primitive_mode);
1321    default:
1322       break;
1323    }
1324    return PIPE_PRIM_MAX;
1325 }
1326 
1327 struct zink_shader *
zink_shader_create(struct zink_screen * screen,struct nir_shader * nir,const struct pipe_stream_output_info * so_info)1328 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
1329                    const struct pipe_stream_output_info *so_info)
1330 {
1331    struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
1332    bool have_psiz = false;
1333 
1334    ret->hash = _mesa_hash_pointer(ret);
1335    ret->reduced_prim = get_shader_base_prim_type(nir);
1336 
1337    ret->programs = _mesa_pointer_set_create(NULL);
1338    simple_mtx_init(&ret->lock, mtx_plain);
1339 
1340    nir_variable_mode indirect_derefs_modes = nir_var_function_temp;
1341    if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
1342        nir->info.stage == MESA_SHADER_TESS_EVAL)
1343       indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
1344 
1345    NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
1346               UINT32_MAX);
1347 
1348    if (nir->info.stage == MESA_SHADER_VERTEX)
1349       create_vs_pushconst(nir);
1350    else if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
1351             nir->info.stage == MESA_SHADER_TESS_EVAL)
1352       NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
1353    else if (nir->info.stage == MESA_SHADER_KERNEL)
1354       create_cs_pushconst(nir);
1355 
1356    if (nir->info.stage < MESA_SHADER_FRAGMENT)
1357       have_psiz = check_psiz(nir);
1358    NIR_PASS_V(nir, lower_basevertex);
1359    NIR_PASS_V(nir, lower_work_dim);
1360    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
1361    NIR_PASS_V(nir, lower_baseinstance);
1362 
1363    {
1364       nir_lower_subgroups_options subgroup_options = {0};
1365       subgroup_options.lower_to_scalar = true;
1366       subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
1367       subgroup_options.ballot_bit_size = 32;
1368       subgroup_options.ballot_components = 4;
1369       subgroup_options.lower_subgroup_masks = true;
1370       NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
1371    }
1372 
1373    optimize_nir(nir);
1374    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
1375    NIR_PASS_V(nir, lower_discard_if);
1376    NIR_PASS_V(nir, nir_lower_fragcolor,
1377          nir->info.fs.color_is_dual_source ? 1 : 8);
1378    NIR_PASS_V(nir, lower_64bit_vertex_attribs);
1379    NIR_PASS_V(nir, unbreak_bos);
1380 
1381    if (zink_debug & ZINK_DEBUG_NIR) {
1382       fprintf(stderr, "NIR shader:\n---8<---\n");
1383       nir_print_shader(nir, stderr);
1384       fprintf(stderr, "---8<---\n");
1385    }
1386 
1387    nir_variable *bindless[4] = {0};
1388    bool has_bindless_io = false;
1389    nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
1390       if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
1391          has_bindless_io = true;
1392          break;
1393       }
1394    }
1395    if (has_bindless_io)
1396       NIR_PASS_V(nir, lower_bindless_io);
1397 
1398    foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
1399       if (_nir_shader_variable_has_mode(var, nir_var_uniform |
1400                                         nir_var_mem_ubo |
1401                                         nir_var_mem_ssbo)) {
1402          enum zink_descriptor_type ztype;
1403          const struct glsl_type *type = glsl_without_array(var->type);
1404          if (var->data.mode == nir_var_mem_ubo) {
1405             ztype = ZINK_DESCRIPTOR_TYPE_UBO;
1406             /* buffer 0 is a push descriptor */
1407             var->data.descriptor_set = !!var->data.driver_location;
1408             var->data.binding = !var->data.driver_location ? nir->info.stage :
1409                                 zink_binding(nir->info.stage,
1410                                              VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1411                                              var->data.driver_location);
1412             assert(var->data.driver_location || var->data.binding < 10);
1413             VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
1414             int binding = var->data.binding;
1415 
1416             ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
1417             ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
1418             ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
1419             ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
1420             ret->ubos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index);
1421             ret->num_bindings[ztype]++;
1422          } else if (var->data.mode == nir_var_mem_ssbo) {
1423             ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
1424             var->data.descriptor_set = ztype + 1;
1425             var->data.binding = zink_binding(nir->info.stage,
1426                                              VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1427                                              var->data.driver_location);
1428             ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
1429             ret->ssbos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index);
1430             ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
1431             ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1432             ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
1433             ret->num_bindings[ztype]++;
1434          } else {
1435             assert(var->data.mode == nir_var_uniform);
1436             if (var->data.bindless) {
1437                ret->bindless = true;
1438                handle_bindless_var(nir, var, type, bindless);
1439             } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
1440                VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
1441                ztype = zink_desc_type_from_vktype(vktype);
1442                if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
1443                   ret->num_texel_buffers++;
1444                var->data.driver_location = var->data.binding;
1445                var->data.descriptor_set = ztype + 1;
1446                var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location);
1447                ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
1448                ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
1449                ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
1450                if (glsl_type_is_array(var->type))
1451                   ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
1452                else
1453                   ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
1454                ret->num_bindings[ztype]++;
1455             }
1456          }
1457       }
1458    }
1459    bool bindless_lowered = false;
1460    NIR_PASS(bindless_lowered, nir, lower_bindless, bindless);
1461    ret->bindless |= bindless_lowered;
1462 
1463    ret->nir = nir;
1464    if (so_info && nir->info.outputs_written && nir->info.has_transform_feedback_varyings)
1465       update_so_info(ret, so_info, nir->info.outputs_written, have_psiz);
1466 
1467    return ret;
1468 }
1469 
1470 char *
zink_shader_finalize(struct pipe_screen * pscreen,void * nirptr)1471 zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
1472 {
1473    struct zink_screen *screen = zink_screen(pscreen);
1474    nir_shader *nir = nirptr;
1475 
1476    if (!screen->info.feats.features.shaderImageGatherExtended) {
1477       nir_lower_tex_options tex_opts = {0};
1478       tex_opts.lower_tg4_offsets = true;
1479       NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
1480    }
1481    NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, true, false);
1482    if (nir->info.stage == MESA_SHADER_GEOMETRY)
1483       NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream);
1484    optimize_nir(nir);
1485    if (nir->info.num_ubos || nir->info.num_ssbos)
1486       NIR_PASS_V(nir, nir_lower_dynamic_bo_access);
1487    nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1488    if (screen->driconf.inline_uniforms)
1489       nir_find_inlinable_uniforms(nir);
1490 
1491    return NULL;
1492 }
1493 
1494 void
zink_shader_free(struct zink_context * ctx,struct zink_shader * shader)1495 zink_shader_free(struct zink_context *ctx, struct zink_shader *shader)
1496 {
1497    struct zink_screen *screen = zink_screen(ctx->base.screen);
1498    set_foreach(shader->programs, entry) {
1499       if (shader->nir->info.stage == MESA_SHADER_COMPUTE) {
1500          struct zink_compute_program *comp = (void*)entry->key;
1501          if (!comp->base.removed) {
1502             _mesa_hash_table_remove_key(&ctx->compute_program_cache, comp->shader);
1503             comp->base.removed = true;
1504          }
1505          comp->shader = NULL;
1506          zink_compute_program_reference(screen, &comp, NULL);
1507       } else {
1508          struct zink_gfx_program *prog = (void*)entry->key;
1509          enum pipe_shader_type pstage = pipe_shader_type_from_mesa(shader->nir->info.stage);
1510          assert(pstage < ZINK_SHADER_COUNT);
1511          if (!prog->base.removed && (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)) {
1512             _mesa_hash_table_remove_key(&ctx->program_cache[prog->stages_present >> 2], prog->shaders);
1513             prog->base.removed = true;
1514          }
1515          prog->shaders[pstage] = NULL;
1516          if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated)
1517             /* automatically destroy generated tcs shaders when tes is destroyed */
1518             zink_shader_free(ctx, shader->generated);
1519          zink_gfx_program_reference(screen, &prog, NULL);
1520       }
1521    }
1522    _mesa_set_destroy(shader->programs, NULL);
1523    ralloc_free(shader->nir);
1524    FREE(shader);
1525 }
1526 
1527 
1528 /* creating a passthrough tcs shader that's roughly:
1529 
1530 #version 150
1531 #extension GL_ARB_tessellation_shader : require
1532 
1533 in vec4 some_var[gl_MaxPatchVertices];
1534 out vec4 some_var_out;
1535 
1536 layout(push_constant) uniform tcsPushConstants {
1537     layout(offset = 0) float TessLevelInner[2];
1538     layout(offset = 8) float TessLevelOuter[4];
1539 } u_tcsPushConstants;
1540 layout(vertices = $vertices_per_patch) out;
1541 void main()
1542 {
1543   gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
1544   gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
1545   some_var_out = some_var[gl_InvocationID];
1546 }
1547 
1548 */
1549 struct zink_shader *
zink_shader_tcs_create(struct zink_screen * screen,struct zink_shader * vs,unsigned vertices_per_patch)1550 zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsigned vertices_per_patch)
1551 {
1552    struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
1553    ret->hash = _mesa_hash_pointer(ret);
1554    ret->programs = _mesa_pointer_set_create(NULL);
1555    simple_mtx_init(&ret->lock, mtx_plain);
1556 
1557    nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
1558    nir_function *fn = nir_function_create(nir, "main");
1559    fn->is_entrypoint = true;
1560    nir_function_impl *impl = nir_function_impl_create(fn);
1561 
1562    nir_builder b;
1563    nir_builder_init(&b, impl);
1564    b.cursor = nir_before_block(nir_start_block(impl));
1565 
1566    nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
1567 
1568    nir_foreach_shader_out_variable(var, vs->nir) {
1569       const struct glsl_type *type = var->type;
1570       const struct glsl_type *in_type = var->type;
1571       const struct glsl_type *out_type = var->type;
1572       char buf[1024];
1573       snprintf(buf, sizeof(buf), "%s_out", var->name);
1574       in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
1575       out_type = glsl_array_type(type, vertices_per_patch, 0);
1576 
1577       nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
1578       nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
1579       out->data.location = in->data.location = var->data.location;
1580       out->data.location_frac = in->data.location_frac = var->data.location_frac;
1581 
1582       /* gl_in[] receives values from equivalent built-in output
1583          variables written by the vertex shader (section 2.14.7).  Each array
1584          element of gl_in[] is a structure holding values for a specific vertex of
1585          the input patch.  The length of gl_in[] is equal to the
1586          implementation-dependent maximum patch size (gl_MaxPatchVertices).
1587          - ARB_tessellation_shader
1588        */
1589       for (unsigned i = 0; i < vertices_per_patch; i++) {
1590          /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
1591          nir_if *start_block = nir_push_if(&b, nir_ieq(&b, invocation_id, nir_imm_int(&b, i)));
1592          nir_deref_instr *in_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
1593          nir_ssa_def *load = nir_load_deref(&b, in_array_var);
1594          nir_deref_instr *out_array_var = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, out), i);
1595          nir_store_deref(&b, out_array_var, load, 0xff);
1596          nir_pop_if(&b, start_block);
1597       }
1598    }
1599    nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
1600    gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
1601    gl_TessLevelInner->data.patch = 1;
1602    nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
1603    gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
1604    gl_TessLevelOuter->data.patch = 1;
1605 
1606    /* hacks so we can size these right for now */
1607    struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 3);
1608    /* just use a single blob for padding here because it's easier */
1609    fields[0].type = glsl_array_type(glsl_uint_type(), offsetof(struct zink_gfx_push_constant, default_inner_level) / 4, 0);
1610    fields[0].name = ralloc_asprintf(nir, "padding");
1611    fields[0].offset = 0;
1612    fields[1].type = glsl_array_type(glsl_uint_type(), 2, 0);
1613    fields[1].name = ralloc_asprintf(nir, "gl_TessLevelInner");
1614    fields[1].offset = offsetof(struct zink_gfx_push_constant, default_inner_level);
1615    fields[2].type = glsl_array_type(glsl_uint_type(), 4, 0);
1616    fields[2].name = ralloc_asprintf(nir, "gl_TessLevelOuter");
1617    fields[2].offset = offsetof(struct zink_gfx_push_constant, default_outer_level);
1618    nir_variable *pushconst = nir_variable_create(nir, nir_var_mem_push_const,
1619                                                  glsl_struct_type(fields, 3, "struct", false), "pushconst");
1620    pushconst->data.location = VARYING_SLOT_VAR0;
1621 
1622    nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 1), .base = 1, .range = 8);
1623    nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 2), .base = 2, .range = 16);
1624 
1625    for (unsigned i = 0; i < 2; i++) {
1626       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
1627       nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
1628    }
1629    for (unsigned i = 0; i < 4; i++) {
1630       nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
1631       nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
1632    }
1633 
1634    nir->info.tess.tcs_vertices_out = vertices_per_patch;
1635    nir_validate_shader(nir, "created");
1636 
1637    NIR_PASS_V(nir, nir_lower_regs_to_ssa);
1638    optimize_nir(nir);
1639    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
1640    NIR_PASS_V(nir, lower_discard_if);
1641    NIR_PASS_V(nir, nir_convert_from_ssa, true);
1642 
1643    ret->nir = nir;
1644    ret->is_generated = true;
1645    return ret;
1646 }
1647