1 /*
2 * Copyright 2018 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "zink_context.h"
25 #include "zink_compiler.h"
26 #include "zink_program.h"
27 #include "zink_screen.h"
28 #include "nir_to_spirv/nir_to_spirv.h"
29
30 #include "pipe/p_state.h"
31
32 #include "nir.h"
33 #include "compiler/nir/nir_builder.h"
34
35 #include "nir/tgsi_to_nir.h"
36 #include "tgsi/tgsi_dump.h"
37 #include "tgsi/tgsi_from_mesa.h"
38
39 #include "util/u_memory.h"
40
41 static void
create_vs_pushconst(nir_shader * nir)42 create_vs_pushconst(nir_shader *nir)
43 {
44 nir_variable *vs_pushconst;
45 /* create compatible layout for the ntv push constant loader */
46 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 2);
47 fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
48 fields[0].name = ralloc_asprintf(nir, "draw_mode_is_indexed");
49 fields[0].offset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed);
50 fields[1].type = glsl_array_type(glsl_uint_type(), 1, 0);
51 fields[1].name = ralloc_asprintf(nir, "draw_id");
52 fields[1].offset = offsetof(struct zink_gfx_push_constant, draw_id);
53 vs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
54 glsl_struct_type(fields, 2, "struct", false), "vs_pushconst");
55 vs_pushconst->data.location = INT_MAX; //doesn't really matter
56 }
57
58 static void
create_cs_pushconst(nir_shader * nir)59 create_cs_pushconst(nir_shader *nir)
60 {
61 nir_variable *cs_pushconst;
62 /* create compatible layout for the ntv push constant loader */
63 struct glsl_struct_field *fields = rzalloc_size(nir, 1 * sizeof(struct glsl_struct_field));
64 fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
65 fields[0].name = ralloc_asprintf(nir, "work_dim");
66 fields[0].offset = 0;
67 cs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
68 glsl_struct_type(fields, 1, "struct", false), "cs_pushconst");
69 cs_pushconst->data.location = INT_MAX; //doesn't really matter
70 }
71
72 static bool
reads_work_dim(nir_shader * shader)73 reads_work_dim(nir_shader *shader)
74 {
75 return BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_WORK_DIM);
76 }
77
78 static bool
lower_discard_if_instr(nir_builder * b,nir_instr * instr_,UNUSED void * cb_data)79 lower_discard_if_instr(nir_builder *b, nir_instr *instr_, UNUSED void *cb_data)
80 {
81 if (instr_->type != nir_instr_type_intrinsic)
82 return false;
83
84 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(instr_);
85
86 if (instr->intrinsic == nir_intrinsic_discard_if) {
87 b->cursor = nir_before_instr(&instr->instr);
88
89 nir_if *if_stmt = nir_push_if(b, nir_ssa_for_src(b, instr->src[0], 1));
90 nir_discard(b);
91 nir_pop_if(b, if_stmt);
92 nir_instr_remove(&instr->instr);
93 return true;
94 }
95 /* a shader like this (shaders@glsl-fs-discard-04):
96
97 uniform int j, k;
98
99 void main()
100 {
101 for (int i = 0; i < j; i++) {
102 if (i > k)
103 continue;
104 discard;
105 }
106 gl_FragColor = vec4(0.0, 1.0, 0.0, 0.0);
107 }
108
109
110
111 will generate nir like:
112
113 loop {
114 //snip
115 if ssa_11 {
116 block block_5:
117 / preds: block_4 /
118 vec1 32 ssa_17 = iadd ssa_50, ssa_31
119 / succs: block_7 /
120 } else {
121 block block_6:
122 / preds: block_4 /
123 intrinsic discard () () <-- not last instruction
124 vec1 32 ssa_23 = iadd ssa_50, ssa_31 <-- dead code loop itr increment
125 / succs: block_7 /
126 }
127 //snip
128 }
129
130 which means that we can't assert like this:
131
132 assert(instr->intrinsic != nir_intrinsic_discard ||
133 nir_block_last_instr(instr->instr.block) == &instr->instr);
134
135
136 and it's unnecessary anyway since post-vtn optimizing will dce the instructions following the discard
137 */
138
139 return false;
140 }
141
142 static bool
lower_discard_if(nir_shader * shader)143 lower_discard_if(nir_shader *shader)
144 {
145 return nir_shader_instructions_pass(shader,
146 lower_discard_if_instr,
147 nir_metadata_dominance,
148 NULL);
149 }
150
151 static bool
lower_work_dim_instr(nir_builder * b,nir_instr * in,void * data)152 lower_work_dim_instr(nir_builder *b, nir_instr *in, void *data)
153 {
154 if (in->type != nir_instr_type_intrinsic)
155 return false;
156 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
157 if (instr->intrinsic != nir_intrinsic_load_work_dim)
158 return false;
159
160 if (instr->intrinsic == nir_intrinsic_load_work_dim) {
161 b->cursor = nir_after_instr(&instr->instr);
162 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
163 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
164 nir_intrinsic_set_range(load, 3 * sizeof(uint32_t));
165 load->num_components = 1;
166 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "work_dim");
167 nir_builder_instr_insert(b, &load->instr);
168
169 nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
170 }
171
172 return true;
173 }
174
175 static bool
lower_work_dim(nir_shader * shader)176 lower_work_dim(nir_shader *shader)
177 {
178 if (shader->info.stage != MESA_SHADER_KERNEL)
179 return false;
180
181 if (!reads_work_dim(shader))
182 return false;
183
184 return nir_shader_instructions_pass(shader, lower_work_dim_instr, nir_metadata_dominance, NULL);
185 }
186
187 static bool
lower_64bit_vertex_attribs_instr(nir_builder * b,nir_instr * instr,void * data)188 lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
189 {
190 if (instr->type != nir_instr_type_deref)
191 return false;
192 nir_deref_instr *deref = nir_instr_as_deref(instr);
193 if (deref->deref_type != nir_deref_type_var)
194 return false;
195 nir_variable *var = nir_deref_instr_get_variable(deref);
196 if (var->data.mode != nir_var_shader_in)
197 return false;
198 if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
199 return false;
200
201 /* create second variable for the split */
202 nir_variable *var2 = nir_variable_clone(var, b->shader);
203 /* split new variable into second slot */
204 var2->data.driver_location++;
205 nir_shader_add_variable(b->shader, var2);
206
207 unsigned total_num_components = glsl_get_vector_elements(var->type);
208 /* new variable is the second half of the dvec */
209 var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
210 /* clamp original variable to a dvec2 */
211 deref->type = var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
212
213 /* create deref instr for new variable */
214 b->cursor = nir_after_instr(instr);
215 nir_deref_instr *deref2 = nir_build_deref_var(b, var2);
216
217 nir_foreach_use_safe(use_src, &deref->dest.ssa) {
218 nir_instr *use_instr = use_src->parent_instr;
219 assert(use_instr->type == nir_instr_type_intrinsic &&
220 nir_instr_as_intrinsic(use_instr)->intrinsic == nir_intrinsic_load_deref);
221
222 /* this is a load instruction for the deref, and we need to split it into two instructions that we can
223 * then zip back into a single ssa def */
224 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(use_instr);
225 /* clamp the first load to 2 64bit components */
226 intr->num_components = intr->dest.ssa.num_components = 2;
227 b->cursor = nir_after_instr(use_instr);
228 /* this is the second load instruction for the second half of the dvec3/4 components */
229 nir_intrinsic_instr *intr2 = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref);
230 intr2->src[0] = nir_src_for_ssa(&deref2->dest.ssa);
231 intr2->num_components = total_num_components - 2;
232 nir_ssa_dest_init(&intr2->instr, &intr2->dest, intr2->num_components, 64, NULL);
233 nir_builder_instr_insert(b, &intr2->instr);
234
235 nir_ssa_def *def[4];
236 /* create a new dvec3/4 comprised of all the loaded components from both variables */
237 def[0] = nir_vector_extract(b, &intr->dest.ssa, nir_imm_int(b, 0));
238 def[1] = nir_vector_extract(b, &intr->dest.ssa, nir_imm_int(b, 1));
239 def[2] = nir_vector_extract(b, &intr2->dest.ssa, nir_imm_int(b, 0));
240 if (total_num_components == 4)
241 def[3] = nir_vector_extract(b, &intr2->dest.ssa, nir_imm_int(b, 1));
242 nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
243 /* use the assembled dvec3/4 for all other uses of the load */
244 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
245 new_vec->parent_instr);
246 }
247
248 return true;
249 }
250
251 /* "64-bit three- and four-component vectors consume two consecutive locations."
252 * - 14.1.4. Location Assignment
253 *
254 * this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
255 * are assigned to consecutive locations, loaded separately, and then assembled back into a
256 * composite value that's used in place of the original loaded ssa src
257 */
258 static bool
lower_64bit_vertex_attribs(nir_shader * shader)259 lower_64bit_vertex_attribs(nir_shader *shader)
260 {
261 if (shader->info.stage != MESA_SHADER_VERTEX)
262 return false;
263
264 return nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
265 }
266
267 static bool
lower_basevertex_instr(nir_builder * b,nir_instr * in,void * data)268 lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
269 {
270 if (in->type != nir_instr_type_intrinsic)
271 return false;
272 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
273 if (instr->intrinsic != nir_intrinsic_load_base_vertex)
274 return false;
275
276 b->cursor = nir_after_instr(&instr->instr);
277 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
278 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
279 nir_intrinsic_set_range(load, 4);
280 load->num_components = 1;
281 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
282 nir_builder_instr_insert(b, &load->instr);
283
284 nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
285 nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
286 &instr->dest.ssa,
287 nir_imm_int(b, 0),
288 NULL);
289
290 nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
291 composite->parent_instr);
292 return true;
293 }
294
295 static bool
lower_basevertex(nir_shader * shader)296 lower_basevertex(nir_shader *shader)
297 {
298 if (shader->info.stage != MESA_SHADER_VERTEX)
299 return false;
300
301 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
302 return false;
303
304 return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
305 }
306
307
308 static bool
lower_drawid_instr(nir_builder * b,nir_instr * in,void * data)309 lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
310 {
311 if (in->type != nir_instr_type_intrinsic)
312 return false;
313 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
314 if (instr->intrinsic != nir_intrinsic_load_draw_id)
315 return false;
316
317 b->cursor = nir_before_instr(&instr->instr);
318 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
319 load->src[0] = nir_src_for_ssa(nir_imm_int(b, 1));
320 nir_intrinsic_set_range(load, 4);
321 load->num_components = 1;
322 nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
323 nir_builder_instr_insert(b, &load->instr);
324
325 nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
326
327 return true;
328 }
329
330 static bool
lower_drawid(nir_shader * shader)331 lower_drawid(nir_shader *shader)
332 {
333 if (shader->info.stage != MESA_SHADER_VERTEX)
334 return false;
335
336 if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
337 return false;
338
339 return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
340 }
341
342 static bool
lower_dual_blend(nir_shader * shader)343 lower_dual_blend(nir_shader *shader)
344 {
345 bool progress = false;
346 nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
347 if (var) {
348 var->data.location = FRAG_RESULT_DATA0;
349 var->data.index = 1;
350 progress = true;
351 }
352 nir_shader_preserve_all_metadata(shader);
353 return progress;
354 }
355
356 void
zink_screen_init_compiler(struct zink_screen * screen)357 zink_screen_init_compiler(struct zink_screen *screen)
358 {
359 static const struct nir_shader_compiler_options
360 default_options = {
361 .lower_ffma16 = true,
362 .lower_ffma32 = true,
363 .lower_ffma64 = true,
364 .lower_scmp = true,
365 .lower_fdph = true,
366 .lower_flrp32 = true,
367 .lower_fpow = true,
368 .lower_fsat = true,
369 .lower_extract_byte = true,
370 .lower_extract_word = true,
371 .lower_insert_byte = true,
372 .lower_insert_word = true,
373 .lower_mul_high = true,
374 .lower_rotate = true,
375 .lower_uadd_carry = true,
376 .lower_pack_64_2x32_split = true,
377 .lower_unpack_64_2x32_split = true,
378 .lower_pack_32_2x16_split = true,
379 .lower_unpack_32_2x16_split = true,
380 .lower_vector_cmp = true,
381 .lower_int64_options = 0,
382 .lower_doubles_options = ~nir_lower_fp64_full_software,
383 .lower_uniforms_to_ubo = true,
384 .has_fsub = true,
385 .has_isub = true,
386 .lower_mul_2x32_64 = true,
387 .support_16bit_alu = true, /* not quite what it sounds like */
388 };
389
390 screen->nir_options = default_options;
391
392 if (!screen->info.feats.features.shaderInt64)
393 screen->nir_options.lower_int64_options = ~0;
394
395 if (!screen->info.feats.features.shaderFloat64) {
396 screen->nir_options.lower_doubles_options = ~0;
397 screen->nir_options.lower_flrp64 = true;
398 screen->nir_options.lower_ffma64 = true;
399 }
400 }
401
402 const void *
zink_get_compiler_options(struct pipe_screen * pscreen,enum pipe_shader_ir ir,enum pipe_shader_type shader)403 zink_get_compiler_options(struct pipe_screen *pscreen,
404 enum pipe_shader_ir ir,
405 enum pipe_shader_type shader)
406 {
407 assert(ir == PIPE_SHADER_IR_NIR);
408 return &zink_screen(pscreen)->nir_options;
409 }
410
411 struct nir_shader *
zink_tgsi_to_nir(struct pipe_screen * screen,const struct tgsi_token * tokens)412 zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
413 {
414 if (zink_debug & ZINK_DEBUG_TGSI) {
415 fprintf(stderr, "TGSI shader:\n---8<---\n");
416 tgsi_dump_to_file(tokens, 0, stderr);
417 fprintf(stderr, "---8<---\n\n");
418 }
419
420 return tgsi_to_nir(tokens, screen, false);
421 }
422
423 static void
optimize_nir(struct nir_shader * s)424 optimize_nir(struct nir_shader *s)
425 {
426 bool progress;
427 do {
428 progress = false;
429 NIR_PASS_V(s, nir_lower_vars_to_ssa);
430 NIR_PASS(progress, s, nir_copy_prop);
431 NIR_PASS(progress, s, nir_opt_remove_phis);
432 NIR_PASS(progress, s, nir_opt_dce);
433 NIR_PASS(progress, s, nir_opt_dead_cf);
434 NIR_PASS(progress, s, nir_opt_cse);
435 NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
436 NIR_PASS(progress, s, nir_opt_algebraic);
437 NIR_PASS(progress, s, nir_opt_constant_folding);
438 NIR_PASS(progress, s, nir_opt_undef);
439 NIR_PASS(progress, s, zink_nir_lower_b2b);
440 } while (progress);
441
442 do {
443 progress = false;
444 NIR_PASS(progress, s, nir_opt_algebraic_late);
445 if (progress) {
446 NIR_PASS_V(s, nir_copy_prop);
447 NIR_PASS_V(s, nir_opt_dce);
448 NIR_PASS_V(s, nir_opt_cse);
449 }
450 } while (progress);
451 }
452
453 /* - copy the lowered fbfetch variable
454 * - set the new one up as an input attachment for descriptor 0.6
455 * - load it as an image
456 * - overwrite the previous load
457 */
458 static bool
lower_fbfetch_instr(nir_builder * b,nir_instr * instr,void * data)459 lower_fbfetch_instr(nir_builder *b, nir_instr *instr, void *data)
460 {
461 if (instr->type != nir_instr_type_intrinsic)
462 return false;
463 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
464 if (intr->intrinsic != nir_intrinsic_load_deref)
465 return false;
466 nir_variable *var = nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
467 if (var != data)
468 return false;
469 b->cursor = nir_after_instr(instr);
470 nir_variable *fbfetch = nir_variable_clone(data, b->shader);
471 /* If Dim is SubpassData, ... Image Format must be Unknown
472 * - SPIRV OpTypeImage specification
473 */
474 fbfetch->data.image.format = 0;
475 fbfetch->data.index = 0; /* fix this if more than 1 fbfetch target is supported */
476 fbfetch->data.mode = nir_var_uniform;
477 fbfetch->data.binding = ZINK_FBFETCH_BINDING;
478 fbfetch->type = glsl_image_type(GLSL_SAMPLER_DIM_SUBPASS, false, GLSL_TYPE_FLOAT);
479 nir_shader_add_variable(b->shader, fbfetch);
480 nir_ssa_def *deref = &nir_build_deref_var(b, fbfetch)->dest.ssa;
481 nir_ssa_def *load = nir_image_deref_load(b, 4, 32, deref, nir_imm_vec4(b, 0, 0, 0, 1), nir_ssa_undef(b, 1, 32), nir_imm_int(b, 0));
482 unsigned swiz[4] = {2, 1, 0, 3};
483 nir_ssa_def *swizzle = nir_swizzle(b, load, swiz, 4);
484 nir_ssa_def_rewrite_uses(&intr->dest.ssa, swizzle);
485 return true;
486 }
487
488 static bool
lower_fbfetch(nir_shader * shader,nir_variable ** fbfetch)489 lower_fbfetch(nir_shader *shader, nir_variable **fbfetch)
490 {
491 nir_foreach_shader_out_variable(var, shader) {
492 if (var->data.fb_fetch_output) {
493 *fbfetch = var;
494 break;
495 }
496 }
497 assert(*fbfetch);
498 if (!*fbfetch)
499 return false;
500 return nir_shader_instructions_pass(shader, lower_fbfetch_instr, nir_metadata_dominance, *fbfetch);
501 }
502
503 /* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
504 static bool
check_psiz(struct nir_shader * s)505 check_psiz(struct nir_shader *s)
506 {
507 nir_foreach_shader_out_variable(var, s) {
508 if (var->data.location == VARYING_SLOT_PSIZ) {
509 /* genuine PSIZ outputs will have this set */
510 return !!var->data.explicit_location;
511 }
512 }
513 return false;
514 }
515
516 static void
update_so_info(struct zink_shader * zs,const struct pipe_stream_output_info * so_info,uint64_t outputs_written,bool have_psiz)517 update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_info,
518 uint64_t outputs_written, bool have_psiz)
519 {
520 uint8_t reverse_map[64] = {0};
521 unsigned slot = 0;
522 /* semi-copied from iris */
523 while (outputs_written) {
524 int bit = u_bit_scan64(&outputs_written);
525 /* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
526 if (bit == VARYING_SLOT_PSIZ && !have_psiz)
527 continue;
528 reverse_map[slot++] = bit;
529 }
530
531 nir_foreach_shader_out_variable(var, zs->nir)
532 var->data.explicit_xfb_buffer = 0;
533
534 bool inlined[64] = {0};
535 for (unsigned i = 0; i < so_info->num_outputs; i++) {
536 const struct pipe_stream_output *output = &so_info->output[i];
537 unsigned slot = reverse_map[output->register_index];
538 /* always set stride to be used during draw */
539 zs->streamout.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
540 if ((zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) &&
541 !output->start_component) {
542 nir_variable *var = NULL;
543 while (!var)
544 var = nir_find_variable_with_location(zs->nir, nir_var_shader_out, slot--);
545 slot++;
546 if (inlined[slot])
547 continue;
548 assert(var && var->data.location == slot);
549 /* if this is the entire variable, try to blast it out during the initial declaration */
550 if (glsl_get_components(var->type) == output->num_components) {
551 var->data.explicit_xfb_buffer = 1;
552 var->data.xfb.buffer = output->output_buffer;
553 var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
554 var->data.offset = output->dst_offset * 4;
555 var->data.stream = output->stream;
556 inlined[slot] = true;
557 continue;
558 }
559 }
560 zs->streamout.so_info.output[zs->streamout.so_info.num_outputs] = *output;
561 /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
562 zs->streamout.so_info_slots[zs->streamout.so_info.num_outputs++] = reverse_map[output->register_index];
563 }
564 zs->streamout.have_xfb = !!zs->streamout.so_info.num_outputs;
565 }
566
567 struct decompose_state {
568 nir_variable **split;
569 bool needs_w;
570 };
571
572 static bool
lower_attrib(nir_builder * b,nir_instr * instr,void * data)573 lower_attrib(nir_builder *b, nir_instr *instr, void *data)
574 {
575 struct decompose_state *state = data;
576 nir_variable **split = state->split;
577 if (instr->type != nir_instr_type_intrinsic)
578 return false;
579 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
580 if (intr->intrinsic != nir_intrinsic_load_deref)
581 return false;
582 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
583 nir_variable *var = nir_deref_instr_get_variable(deref);
584 if (var != split[0])
585 return false;
586 unsigned num_components = glsl_get_vector_elements(split[0]->type);
587 b->cursor = nir_after_instr(instr);
588 nir_ssa_def *loads[4];
589 for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++)
590 loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1]));
591 if (state->needs_w) {
592 /* oob load w comopnent to get correct value for int/float */
593 loads[3] = nir_channel(b, loads[0], 3);
594 loads[0] = nir_channel(b, loads[0], 0);
595 }
596 nir_ssa_def *new_load = nir_vec(b, loads, num_components);
597 nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load);
598 nir_instr_remove_v(instr);
599 return true;
600 }
601
602 static bool
decompose_attribs(nir_shader * nir,uint32_t decomposed_attrs,uint32_t decomposed_attrs_without_w)603 decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w)
604 {
605 uint32_t bits = 0;
606 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in)
607 bits |= BITFIELD_BIT(var->data.driver_location);
608 bits = ~bits;
609 u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) {
610 nir_variable *split[5];
611 struct decompose_state state;
612 state.split = split;
613 nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location);
614 assert(var);
615 split[0] = var;
616 bits |= BITFIELD_BIT(var->data.driver_location);
617 const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type);
618 unsigned num_components = glsl_get_vector_elements(var->type);
619 state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4;
620 for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) {
621 split[i+1] = nir_variable_clone(var, nir);
622 split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i);
623 if (decomposed_attrs_without_w & BITFIELD_BIT(location))
624 split[i+1]->type = !i && num_components == 4 ? var->type : new_type;
625 else
626 split[i+1]->type = new_type;
627 split[i+1]->data.driver_location = ffs(bits) - 1;
628 bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location);
629 nir_shader_add_variable(nir, split[i+1]);
630 }
631 var->data.mode = nir_var_shader_temp;
632 nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state);
633 }
634 nir_fixup_deref_modes(nir);
635 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
636 optimize_nir(nir);
637 return true;
638 }
639
640 static void
assign_producer_var_io(gl_shader_stage stage,nir_variable * var,unsigned * reserved,unsigned char * slot_map)641 assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
642 {
643 unsigned slot = var->data.location;
644 switch (var->data.location) {
645 case VARYING_SLOT_POS:
646 case VARYING_SLOT_PNTC:
647 case VARYING_SLOT_PSIZ:
648 case VARYING_SLOT_LAYER:
649 case VARYING_SLOT_PRIMITIVE_ID:
650 case VARYING_SLOT_CLIP_DIST0:
651 case VARYING_SLOT_CULL_DIST0:
652 case VARYING_SLOT_VIEWPORT:
653 case VARYING_SLOT_FACE:
654 case VARYING_SLOT_TESS_LEVEL_OUTER:
655 case VARYING_SLOT_TESS_LEVEL_INNER:
656 /* use a sentinel value to avoid counting later */
657 var->data.driver_location = UINT_MAX;
658 break;
659
660 default:
661 if (var->data.patch) {
662 assert(var->data.location >= VARYING_SLOT_PATCH0);
663 slot = var->data.location - VARYING_SLOT_PATCH0;
664 } else if (var->data.location >= VARYING_SLOT_VAR0 &&
665 var->data.mode == nir_var_shader_in &&
666 stage == MESA_SHADER_TESS_EVAL) {
667 slot = var->data.location - VARYING_SLOT_VAR0;
668 } else {
669 if (slot_map[var->data.location] == 0xff) {
670 assert(*reserved < MAX_VARYING);
671 slot_map[var->data.location] = *reserved;
672 *reserved += glsl_count_vec4_slots(var->type, false, false);
673 }
674 slot = slot_map[var->data.location];
675 assert(slot < MAX_VARYING);
676 }
677 var->data.driver_location = slot;
678 }
679 }
680
681 ALWAYS_INLINE static bool
is_texcoord(gl_shader_stage stage,const nir_variable * var)682 is_texcoord(gl_shader_stage stage, const nir_variable *var)
683 {
684 if (stage != MESA_SHADER_FRAGMENT)
685 return false;
686 return var->data.location >= VARYING_SLOT_TEX0 &&
687 var->data.location <= VARYING_SLOT_TEX7;
688 }
689
690 static bool
assign_consumer_var_io(gl_shader_stage stage,nir_variable * var,unsigned * reserved,unsigned char * slot_map)691 assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
692 {
693 switch (var->data.location) {
694 case VARYING_SLOT_POS:
695 case VARYING_SLOT_PNTC:
696 case VARYING_SLOT_PSIZ:
697 case VARYING_SLOT_LAYER:
698 case VARYING_SLOT_PRIMITIVE_ID:
699 case VARYING_SLOT_CLIP_DIST0:
700 case VARYING_SLOT_CULL_DIST0:
701 case VARYING_SLOT_VIEWPORT:
702 case VARYING_SLOT_FACE:
703 case VARYING_SLOT_TESS_LEVEL_OUTER:
704 case VARYING_SLOT_TESS_LEVEL_INNER:
705 /* use a sentinel value to avoid counting later */
706 var->data.driver_location = UINT_MAX;
707 break;
708 default:
709 if (var->data.patch) {
710 assert(var->data.location >= VARYING_SLOT_PATCH0);
711 var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
712 } else if (var->data.location >= VARYING_SLOT_VAR0 &&
713 stage == MESA_SHADER_TESS_CTRL &&
714 var->data.mode == nir_var_shader_out)
715 var->data.driver_location = var->data.location - VARYING_SLOT_VAR0;
716 else {
717 if (slot_map[var->data.location] == (unsigned char)-1) {
718 if (!is_texcoord(stage, var))
719 /* dead io */
720 return false;
721 /* texcoords can't be eliminated in fs due to GL_COORD_REPLACE */
722 slot_map[var->data.location] = (*reserved)++;
723 }
724 var->data.driver_location = slot_map[var->data.location];
725 }
726 }
727 return true;
728 }
729
730
731 static bool
rewrite_and_discard_read(nir_builder * b,nir_instr * instr,void * data)732 rewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data)
733 {
734 nir_variable *var = data;
735 if (instr->type != nir_instr_type_intrinsic)
736 return false;
737
738 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
739 if (intr->intrinsic != nir_intrinsic_load_deref)
740 return false;
741 nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
742 if (deref_var != var)
743 return false;
744 nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
745 nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef);
746 return true;
747 }
748
749 void
zink_compiler_assign_io(nir_shader * producer,nir_shader * consumer)750 zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer)
751 {
752 unsigned reserved = 0;
753 unsigned char slot_map[VARYING_SLOT_MAX];
754 memset(slot_map, -1, sizeof(slot_map));
755 bool do_fixup = false;
756 nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
757 if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
758 /* never assign from tcs -> tes, always invert */
759 nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
760 assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
761 nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
762 if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
763 /* this is an output, nothing more needs to be done for it to be dropped */
764 do_fixup = true;
765 }
766 } else {
767 nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
768 assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
769 nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
770 if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
771 do_fixup = true;
772 /* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */
773 nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var);
774 }
775 }
776 }
777 if (!do_fixup)
778 return;
779 nir_fixup_deref_modes(nir);
780 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
781 optimize_nir(nir);
782 }
783
784 VkShaderModule
zink_shader_compile(struct zink_screen * screen,struct zink_shader * zs,nir_shader * base_nir,const struct zink_shader_key * key)785 zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, const struct zink_shader_key *key)
786 {
787 VkShaderModule mod = VK_NULL_HANDLE;
788 void *streamout = NULL;
789 nir_shader *nir = nir_shader_clone(NULL, base_nir);
790
791 if (key) {
792 if (key->inline_uniforms) {
793 NIR_PASS_V(nir, nir_inline_uniforms,
794 nir->info.num_inlinable_uniforms,
795 key->base.inlined_uniform_values,
796 nir->info.inlinable_uniform_dw_offsets);
797
798 optimize_nir(nir);
799
800 /* This must be done again. */
801 NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
802 nir_var_shader_out);
803 }
804
805 /* TODO: use a separate mem ctx here for ralloc */
806 switch (zs->nir->info.stage) {
807 case MESA_SHADER_VERTEX: {
808 uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0;
809 const struct zink_vs_key *vs_key = zink_vs_key(key);
810 switch (vs_key->size) {
811 case 4:
812 decomposed_attrs = vs_key->u32.decomposed_attrs;
813 decomposed_attrs_without_w = vs_key->u32.decomposed_attrs_without_w;
814 break;
815 case 2:
816 decomposed_attrs = vs_key->u16.decomposed_attrs;
817 decomposed_attrs_without_w = vs_key->u16.decomposed_attrs_without_w;
818 break;
819 case 1:
820 decomposed_attrs = vs_key->u8.decomposed_attrs;
821 decomposed_attrs_without_w = vs_key->u8.decomposed_attrs_without_w;
822 break;
823 default: break;
824 }
825 if (decomposed_attrs || decomposed_attrs_without_w)
826 NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w);
827 FALLTHROUGH;
828 }
829 case MESA_SHADER_TESS_EVAL:
830 case MESA_SHADER_GEOMETRY:
831 if (zink_vs_key_base(key)->last_vertex_stage) {
832 if (zs->streamout.have_xfb)
833 streamout = &zs->streamout;
834
835 if (!zink_vs_key_base(key)->clip_halfz) {
836 NIR_PASS_V(nir, nir_lower_clip_halfz);
837 }
838 if (zink_vs_key_base(key)->push_drawid) {
839 NIR_PASS_V(nir, lower_drawid);
840 }
841 }
842 break;
843 case MESA_SHADER_FRAGMENT:
844 if (!zink_fs_key(key)->samples &&
845 nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
846 /* VK will always use gl_SampleMask[] values even if sample count is 0,
847 * so we need to skip this write here to mimic GL's behavior of ignoring it
848 */
849 nir_foreach_shader_out_variable(var, nir) {
850 if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
851 var->data.mode = nir_var_shader_temp;
852 }
853 nir_fixup_deref_modes(nir);
854 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
855 optimize_nir(nir);
856 }
857 if (zink_fs_key(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
858 NIR_PASS_V(nir, lower_dual_blend);
859 }
860 if (zink_fs_key(key)->coord_replace_bits) {
861 NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits,
862 false, zink_fs_key(key)->coord_replace_yinvert);
863 }
864 if (nir->info.fs.uses_fbfetch_output) {
865 nir_variable *fbfetch = NULL;
866 NIR_PASS_V(nir, lower_fbfetch, &fbfetch);
867 /* old variable must be deleted to avoid spirv errors */
868 fbfetch->data.mode = nir_var_shader_temp;
869 nir_fixup_deref_modes(nir);
870 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
871 optimize_nir(nir);
872 }
873 break;
874 default: break;
875 }
876 }
877 NIR_PASS_V(nir, nir_convert_from_ssa, true);
878
879 struct spirv_shader *spirv = nir_to_spirv(nir, streamout, screen->spirv_version);
880 if (!spirv)
881 goto done;
882
883 if (zink_debug & ZINK_DEBUG_SPIRV) {
884 char buf[256];
885 static int i;
886 snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
887 FILE *fp = fopen(buf, "wb");
888 if (fp) {
889 fwrite(spirv->words, sizeof(uint32_t), spirv->num_words, fp);
890 fclose(fp);
891 fprintf(stderr, "wrote '%s'...\n", buf);
892 }
893 }
894
895 VkShaderModuleCreateInfo smci = {0};
896 smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
897 smci.codeSize = spirv->num_words * sizeof(uint32_t);
898 smci.pCode = spirv->words;
899
900 if (VKSCR(CreateShaderModule)(screen->dev, &smci, NULL, &mod) != VK_SUCCESS)
901 mod = VK_NULL_HANDLE;
902
903 done:
904 ralloc_free(nir);
905
906 /* TODO: determine if there's any reason to cache spirv output? */
907 ralloc_free(spirv);
908 return mod;
909 }
910
911 static bool
lower_baseinstance_instr(nir_builder * b,nir_instr * instr,void * data)912 lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
913 {
914 if (instr->type != nir_instr_type_intrinsic)
915 return false;
916 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
917 if (intr->intrinsic != nir_intrinsic_load_instance_id)
918 return false;
919 b->cursor = nir_after_instr(instr);
920 nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
921 nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
922 return true;
923 }
924
925 static bool
lower_baseinstance(nir_shader * shader)926 lower_baseinstance(nir_shader *shader)
927 {
928 if (shader->info.stage != MESA_SHADER_VERTEX)
929 return false;
930 return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
931 }
932
933 bool nir_lower_dynamic_bo_access(nir_shader *shader);
934
935 /* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
936 * so instead we delete all those broken variables and just make new ones
937 */
938 static bool
unbreak_bos(nir_shader * shader)939 unbreak_bos(nir_shader *shader)
940 {
941 uint32_t ssbo_used = 0;
942 uint32_t ubo_used = 0;
943 uint64_t max_ssbo_size = 0;
944 uint64_t max_ubo_size = 0;
945 bool ssbo_sizes[PIPE_MAX_SHADER_BUFFERS] = {false};
946
947 if (!shader->info.num_ssbos && !shader->info.num_ubos && !shader->num_uniforms)
948 return false;
949 nir_function_impl *impl = nir_shader_get_entrypoint(shader);
950 nir_foreach_block(block, impl) {
951 nir_foreach_instr(instr, block) {
952 if (instr->type != nir_instr_type_intrinsic)
953 continue;
954
955 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
956 switch (intrin->intrinsic) {
957 case nir_intrinsic_store_ssbo:
958 ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[1]));
959 break;
960
961 case nir_intrinsic_get_ssbo_size: {
962 uint32_t slot = nir_src_as_uint(intrin->src[0]);
963 ssbo_used |= BITFIELD_BIT(slot);
964 ssbo_sizes[slot] = true;
965 break;
966 }
967 case nir_intrinsic_ssbo_atomic_add:
968 case nir_intrinsic_ssbo_atomic_imin:
969 case nir_intrinsic_ssbo_atomic_umin:
970 case nir_intrinsic_ssbo_atomic_imax:
971 case nir_intrinsic_ssbo_atomic_umax:
972 case nir_intrinsic_ssbo_atomic_and:
973 case nir_intrinsic_ssbo_atomic_or:
974 case nir_intrinsic_ssbo_atomic_xor:
975 case nir_intrinsic_ssbo_atomic_exchange:
976 case nir_intrinsic_ssbo_atomic_comp_swap:
977 case nir_intrinsic_ssbo_atomic_fmin:
978 case nir_intrinsic_ssbo_atomic_fmax:
979 case nir_intrinsic_ssbo_atomic_fcomp_swap:
980 case nir_intrinsic_load_ssbo:
981 ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0]));
982 break;
983 case nir_intrinsic_load_ubo:
984 case nir_intrinsic_load_ubo_vec4:
985 ubo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0]));
986 break;
987 default:
988 break;
989 }
990 }
991 }
992
993 nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
994 const struct glsl_type *type = glsl_without_array(var->type);
995 if (type_is_counter(type))
996 continue;
997 unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
998 if (var->data.mode == nir_var_mem_ubo)
999 max_ubo_size = MAX2(max_ubo_size, size);
1000 else
1001 max_ssbo_size = MAX2(max_ssbo_size, size);
1002 var->data.mode = nir_var_shader_temp;
1003 }
1004 nir_fixup_deref_modes(shader);
1005 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1006 optimize_nir(shader);
1007
1008 if (!ssbo_used && !ubo_used)
1009 return false;
1010
1011 struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
1012 fields[0].name = ralloc_strdup(shader, "base");
1013 fields[1].name = ralloc_strdup(shader, "unsized");
1014 if (ubo_used) {
1015 const struct glsl_type *ubo_type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
1016 fields[0].type = ubo_type;
1017 u_foreach_bit(slot, ubo_used) {
1018 char buf[64];
1019 snprintf(buf, sizeof(buf), "ubo_slot_%u", slot);
1020 nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo, glsl_struct_type(fields, 1, "struct", false), buf);
1021 var->interface_type = var->type;
1022 var->data.driver_location = slot;
1023 }
1024 }
1025 if (ssbo_used) {
1026 const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), max_ssbo_size * 4, 4);
1027 const struct glsl_type *unsized = glsl_array_type(glsl_uint_type(), 0, 4);
1028 fields[0].type = ssbo_type;
1029 u_foreach_bit(slot, ssbo_used) {
1030 char buf[64];
1031 snprintf(buf, sizeof(buf), "ssbo_slot_%u", slot);
1032 if (ssbo_sizes[slot])
1033 fields[1].type = unsized;
1034 else
1035 fields[1].type = NULL;
1036 nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
1037 glsl_struct_type(fields, 1 + !!ssbo_sizes[slot], "struct", false), buf);
1038 var->interface_type = var->type;
1039 var->data.driver_location = slot;
1040 }
1041 }
1042 return true;
1043 }
1044
1045 /* this is a "default" bindless texture used if the shader has no texture variables */
1046 static nir_variable *
create_bindless_texture(nir_shader * nir,nir_tex_instr * tex)1047 create_bindless_texture(nir_shader *nir, nir_tex_instr *tex)
1048 {
1049 unsigned binding = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? 1 : 0;
1050 nir_variable *var;
1051
1052 const struct glsl_type *sampler_type = glsl_sampler_type(tex->sampler_dim, tex->is_shadow, tex->is_array, GLSL_TYPE_FLOAT);
1053 var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(sampler_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_texture");
1054 var->data.descriptor_set = ZINK_DESCRIPTOR_BINDLESS;
1055 var->data.driver_location = var->data.binding = binding;
1056 return var;
1057 }
1058
1059 /* this is a "default" bindless image used if the shader has no image variables */
1060 static nir_variable *
create_bindless_image(nir_shader * nir,enum glsl_sampler_dim dim)1061 create_bindless_image(nir_shader *nir, enum glsl_sampler_dim dim)
1062 {
1063 unsigned binding = dim == GLSL_SAMPLER_DIM_BUF ? 3 : 2;
1064 nir_variable *var;
1065
1066 const struct glsl_type *image_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
1067 var = nir_variable_create(nir, nir_var_uniform, glsl_array_type(image_type, ZINK_MAX_BINDLESS_HANDLES, 0), "bindless_image");
1068 var->data.descriptor_set = ZINK_DESCRIPTOR_BINDLESS;
1069 var->data.driver_location = var->data.binding = binding;
1070 var->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
1071 return var;
1072 }
1073
1074 /* rewrite bindless instructions as array deref instructions */
1075 static bool
lower_bindless_instr(nir_builder * b,nir_instr * in,void * data)1076 lower_bindless_instr(nir_builder *b, nir_instr *in, void *data)
1077 {
1078 nir_variable **bindless = data;
1079
1080 if (in->type == nir_instr_type_tex) {
1081 nir_tex_instr *tex = nir_instr_as_tex(in);
1082 int idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_handle);
1083 if (idx == -1)
1084 return false;
1085
1086 nir_variable *var = tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ? bindless[1] : bindless[0];
1087 if (!var)
1088 var = create_bindless_texture(b->shader, tex);
1089 b->cursor = nir_before_instr(in);
1090 nir_deref_instr *deref = nir_build_deref_var(b, var);
1091 if (glsl_type_is_array(var->type))
1092 deref = nir_build_deref_array(b, deref, nir_u2uN(b, tex->src[idx].src.ssa, 32));
1093 nir_instr_rewrite_src_ssa(in, &tex->src[idx].src, &deref->dest.ssa);
1094
1095 /* bindless sampling uses the variable type directly, which means the tex instr has to exactly
1096 * match up with it in contrast to normal sampler ops where things are a bit more flexible;
1097 * this results in cases where a shader is passed with sampler2DArray but the tex instr only has
1098 * 2 components, which explodes spirv compilation even though it doesn't trigger validation errors
1099 *
1100 * to fix this, pad the coord src here and fix the tex instr so that ntv will do the "right" thing
1101 * - Warhammer 40k: Dawn of War III
1102 */
1103 unsigned needed_components = glsl_get_sampler_coordinate_components(glsl_without_array(var->type));
1104 unsigned c = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1105 unsigned coord_components = nir_src_num_components(tex->src[c].src);
1106 if (coord_components < needed_components) {
1107 nir_ssa_def *def = nir_pad_vector(b, tex->src[c].src.ssa, needed_components);
1108 nir_instr_rewrite_src_ssa(in, &tex->src[c].src, def);
1109 tex->coord_components = needed_components;
1110 }
1111 return true;
1112 }
1113 if (in->type != nir_instr_type_intrinsic)
1114 return false;
1115 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
1116
1117 nir_intrinsic_op op;
1118 #define OP_SWAP(OP) \
1119 case nir_intrinsic_bindless_image_##OP: \
1120 op = nir_intrinsic_image_deref_##OP; \
1121 break;
1122
1123
1124 /* convert bindless intrinsics to deref intrinsics */
1125 switch (instr->intrinsic) {
1126 OP_SWAP(atomic_add)
1127 OP_SWAP(atomic_and)
1128 OP_SWAP(atomic_comp_swap)
1129 OP_SWAP(atomic_dec_wrap)
1130 OP_SWAP(atomic_exchange)
1131 OP_SWAP(atomic_fadd)
1132 OP_SWAP(atomic_fmax)
1133 OP_SWAP(atomic_fmin)
1134 OP_SWAP(atomic_imax)
1135 OP_SWAP(atomic_imin)
1136 OP_SWAP(atomic_inc_wrap)
1137 OP_SWAP(atomic_or)
1138 OP_SWAP(atomic_umax)
1139 OP_SWAP(atomic_umin)
1140 OP_SWAP(atomic_xor)
1141 OP_SWAP(format)
1142 OP_SWAP(load)
1143 OP_SWAP(order)
1144 OP_SWAP(samples)
1145 OP_SWAP(size)
1146 OP_SWAP(store)
1147 default:
1148 return false;
1149 }
1150
1151 enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
1152 nir_variable *var = dim == GLSL_SAMPLER_DIM_BUF ? bindless[3] : bindless[2];
1153 if (!var)
1154 var = create_bindless_image(b->shader, dim);
1155 instr->intrinsic = op;
1156 b->cursor = nir_before_instr(in);
1157 nir_deref_instr *deref = nir_build_deref_var(b, var);
1158 if (glsl_type_is_array(var->type))
1159 deref = nir_build_deref_array(b, deref, nir_u2uN(b, instr->src[0].ssa, 32));
1160 nir_instr_rewrite_src_ssa(in, &instr->src[0], &deref->dest.ssa);
1161 return true;
1162 }
1163
1164 static bool
lower_bindless(nir_shader * shader,nir_variable ** bindless)1165 lower_bindless(nir_shader *shader, nir_variable **bindless)
1166 {
1167 if (!nir_shader_instructions_pass(shader, lower_bindless_instr, nir_metadata_dominance, bindless))
1168 return false;
1169 nir_fixup_deref_modes(shader);
1170 NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
1171 optimize_nir(shader);
1172 return true;
1173 }
1174
1175 /* convert shader image/texture io variables to int64 handles for bindless indexing */
1176 static bool
lower_bindless_io_instr(nir_builder * b,nir_instr * in,void * data)1177 lower_bindless_io_instr(nir_builder *b, nir_instr *in, void *data)
1178 {
1179 if (in->type != nir_instr_type_intrinsic)
1180 return false;
1181 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
1182 if (instr->intrinsic != nir_intrinsic_load_deref &&
1183 instr->intrinsic != nir_intrinsic_store_deref)
1184 return false;
1185
1186 nir_deref_instr *src_deref = nir_src_as_deref(instr->src[0]);
1187 nir_variable *var = nir_deref_instr_get_variable(src_deref);
1188 if (var->data.bindless)
1189 return false;
1190 if (var->data.mode != nir_var_shader_in && var->data.mode != nir_var_shader_out)
1191 return false;
1192 if (!glsl_type_is_image(var->type) && !glsl_type_is_sampler(var->type))
1193 return false;
1194
1195 var->type = glsl_int64_t_type();
1196 var->data.bindless = 1;
1197 b->cursor = nir_before_instr(in);
1198 nir_deref_instr *deref = nir_build_deref_var(b, var);
1199 if (instr->intrinsic == nir_intrinsic_load_deref) {
1200 nir_ssa_def *def = nir_load_deref(b, deref);
1201 nir_instr_rewrite_src_ssa(in, &instr->src[0], def);
1202 nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);
1203 } else {
1204 nir_store_deref(b, deref, instr->src[1].ssa, nir_intrinsic_write_mask(instr));
1205 }
1206 nir_instr_remove(in);
1207 nir_instr_remove(&src_deref->instr);
1208 return true;
1209 }
1210
1211 static bool
lower_bindless_io(nir_shader * shader)1212 lower_bindless_io(nir_shader *shader)
1213 {
1214 return nir_shader_instructions_pass(shader, lower_bindless_io_instr, nir_metadata_dominance, NULL);
1215 }
1216
1217 static uint32_t
zink_binding(gl_shader_stage stage,VkDescriptorType type,int index)1218 zink_binding(gl_shader_stage stage, VkDescriptorType type, int index)
1219 {
1220 if (stage == MESA_SHADER_NONE) {
1221 unreachable("not supported");
1222 } else {
1223 switch (type) {
1224 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1225 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1226 assert(index < PIPE_MAX_CONSTANT_BUFFERS);
1227 return (stage * PIPE_MAX_CONSTANT_BUFFERS) + index;
1228
1229 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1230 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1231 assert(index < PIPE_MAX_SAMPLERS);
1232 return (stage * PIPE_MAX_SAMPLERS) + index;
1233
1234 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1235 assert(index < PIPE_MAX_SHADER_BUFFERS);
1236 return (stage * PIPE_MAX_SHADER_BUFFERS) + index;
1237
1238 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1239 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1240 assert(index < PIPE_MAX_SHADER_IMAGES);
1241 return (stage * PIPE_MAX_SHADER_IMAGES) + index;
1242
1243 default:
1244 unreachable("unexpected type");
1245 }
1246 }
1247 }
1248
1249 static void
handle_bindless_var(nir_shader * nir,nir_variable * var,const struct glsl_type * type,nir_variable ** bindless)1250 handle_bindless_var(nir_shader *nir, nir_variable *var, const struct glsl_type *type, nir_variable **bindless)
1251 {
1252 if (glsl_type_is_struct(type)) {
1253 for (unsigned i = 0; i < glsl_get_length(type); i++)
1254 handle_bindless_var(nir, var, glsl_get_struct_field(type, i), bindless);
1255 return;
1256 }
1257
1258 /* just a random scalar in a struct */
1259 if (!glsl_type_is_image(type) && !glsl_type_is_sampler(type))
1260 return;
1261
1262 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
1263 unsigned binding;
1264 switch (vktype) {
1265 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1266 binding = 0;
1267 break;
1268 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1269 binding = 1;
1270 break;
1271 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1272 binding = 2;
1273 break;
1274 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1275 binding = 3;
1276 break;
1277 default:
1278 unreachable("unknown");
1279 }
1280 if (!bindless[binding]) {
1281 bindless[binding] = nir_variable_clone(var, nir);
1282 bindless[binding]->data.bindless = 0;
1283 bindless[binding]->data.descriptor_set = ZINK_DESCRIPTOR_BINDLESS;
1284 bindless[binding]->type = glsl_array_type(type, ZINK_MAX_BINDLESS_HANDLES, 0);
1285 bindless[binding]->data.driver_location = bindless[binding]->data.binding = binding;
1286 if (!bindless[binding]->data.image.format)
1287 bindless[binding]->data.image.format = PIPE_FORMAT_R8G8B8A8_UNORM;
1288 nir_shader_add_variable(nir, bindless[binding]);
1289 } else {
1290 assert(glsl_get_sampler_dim(glsl_without_array(bindless[binding]->type)) == glsl_get_sampler_dim(glsl_without_array(var->type)));
1291 }
1292 var->data.mode = nir_var_shader_temp;
1293 }
1294
1295 static enum pipe_prim_type
gl_prim_to_pipe(unsigned primitive_type)1296 gl_prim_to_pipe(unsigned primitive_type)
1297 {
1298 switch (primitive_type) {
1299 case GL_POINTS:
1300 return PIPE_PRIM_POINTS;
1301 case GL_LINES:
1302 case GL_LINE_LOOP:
1303 case GL_LINE_STRIP:
1304 case GL_LINES_ADJACENCY:
1305 case GL_LINE_STRIP_ADJACENCY:
1306 case GL_ISOLINES:
1307 return PIPE_PRIM_LINES;
1308 default:
1309 return PIPE_PRIM_TRIANGLES;
1310 }
1311 }
1312
1313 static enum pipe_prim_type
get_shader_base_prim_type(struct nir_shader * nir)1314 get_shader_base_prim_type(struct nir_shader *nir)
1315 {
1316 switch (nir->info.stage) {
1317 case MESA_SHADER_GEOMETRY:
1318 return gl_prim_to_pipe(nir->info.gs.output_primitive);
1319 case MESA_SHADER_TESS_EVAL:
1320 return nir->info.tess.point_mode ? PIPE_PRIM_POINTS : gl_prim_to_pipe(nir->info.tess.primitive_mode);
1321 default:
1322 break;
1323 }
1324 return PIPE_PRIM_MAX;
1325 }
1326
1327 struct zink_shader *
zink_shader_create(struct zink_screen * screen,struct nir_shader * nir,const struct pipe_stream_output_info * so_info)1328 zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
1329 const struct pipe_stream_output_info *so_info)
1330 {
1331 struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
1332 bool have_psiz = false;
1333
1334 ret->hash = _mesa_hash_pointer(ret);
1335 ret->reduced_prim = get_shader_base_prim_type(nir);
1336
1337 ret->programs = _mesa_pointer_set_create(NULL);
1338 simple_mtx_init(&ret->lock, mtx_plain);
1339
1340 nir_variable_mode indirect_derefs_modes = nir_var_function_temp;
1341 if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
1342 nir->info.stage == MESA_SHADER_TESS_EVAL)
1343 indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
1344
1345 NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
1346 UINT32_MAX);
1347
1348 if (nir->info.stage == MESA_SHADER_VERTEX)
1349 create_vs_pushconst(nir);
1350 else if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
1351 nir->info.stage == MESA_SHADER_TESS_EVAL)
1352 NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
1353 else if (nir->info.stage == MESA_SHADER_KERNEL)
1354 create_cs_pushconst(nir);
1355
1356 if (nir->info.stage < MESA_SHADER_FRAGMENT)
1357 have_psiz = check_psiz(nir);
1358 NIR_PASS_V(nir, lower_basevertex);
1359 NIR_PASS_V(nir, lower_work_dim);
1360 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
1361 NIR_PASS_V(nir, lower_baseinstance);
1362
1363 {
1364 nir_lower_subgroups_options subgroup_options = {0};
1365 subgroup_options.lower_to_scalar = true;
1366 subgroup_options.subgroup_size = screen->info.props11.subgroupSize;
1367 subgroup_options.ballot_bit_size = 32;
1368 subgroup_options.ballot_components = 4;
1369 subgroup_options.lower_subgroup_masks = true;
1370 NIR_PASS_V(nir, nir_lower_subgroups, &subgroup_options);
1371 }
1372
1373 optimize_nir(nir);
1374 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
1375 NIR_PASS_V(nir, lower_discard_if);
1376 NIR_PASS_V(nir, nir_lower_fragcolor,
1377 nir->info.fs.color_is_dual_source ? 1 : 8);
1378 NIR_PASS_V(nir, lower_64bit_vertex_attribs);
1379 NIR_PASS_V(nir, unbreak_bos);
1380
1381 if (zink_debug & ZINK_DEBUG_NIR) {
1382 fprintf(stderr, "NIR shader:\n---8<---\n");
1383 nir_print_shader(nir, stderr);
1384 fprintf(stderr, "---8<---\n");
1385 }
1386
1387 nir_variable *bindless[4] = {0};
1388 bool has_bindless_io = false;
1389 nir_foreach_variable_with_modes(var, nir, nir_var_shader_in | nir_var_shader_out) {
1390 if (glsl_type_is_image(var->type) || glsl_type_is_sampler(var->type)) {
1391 has_bindless_io = true;
1392 break;
1393 }
1394 }
1395 if (has_bindless_io)
1396 NIR_PASS_V(nir, lower_bindless_io);
1397
1398 foreach_list_typed_reverse_safe(nir_variable, var, node, &nir->variables) {
1399 if (_nir_shader_variable_has_mode(var, nir_var_uniform |
1400 nir_var_mem_ubo |
1401 nir_var_mem_ssbo)) {
1402 enum zink_descriptor_type ztype;
1403 const struct glsl_type *type = glsl_without_array(var->type);
1404 if (var->data.mode == nir_var_mem_ubo) {
1405 ztype = ZINK_DESCRIPTOR_TYPE_UBO;
1406 /* buffer 0 is a push descriptor */
1407 var->data.descriptor_set = !!var->data.driver_location;
1408 var->data.binding = !var->data.driver_location ? nir->info.stage :
1409 zink_binding(nir->info.stage,
1410 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
1411 var->data.driver_location);
1412 assert(var->data.driver_location || var->data.binding < 10);
1413 VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
1414 int binding = var->data.binding;
1415
1416 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
1417 ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
1418 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
1419 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
1420 ret->ubos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index);
1421 ret->num_bindings[ztype]++;
1422 } else if (var->data.mode == nir_var_mem_ssbo) {
1423 ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
1424 var->data.descriptor_set = ztype + 1;
1425 var->data.binding = zink_binding(nir->info.stage,
1426 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1427 var->data.driver_location);
1428 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
1429 ret->ssbos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index);
1430 ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
1431 ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1432 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
1433 ret->num_bindings[ztype]++;
1434 } else {
1435 assert(var->data.mode == nir_var_uniform);
1436 if (var->data.bindless) {
1437 ret->bindless = true;
1438 handle_bindless_var(nir, var, type, bindless);
1439 } else if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
1440 VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
1441 ztype = zink_desc_type_from_vktype(vktype);
1442 if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
1443 ret->num_texel_buffers++;
1444 var->data.driver_location = var->data.binding;
1445 var->data.descriptor_set = ztype + 1;
1446 var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location);
1447 ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
1448 ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
1449 ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
1450 if (glsl_type_is_array(var->type))
1451 ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
1452 else
1453 ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
1454 ret->num_bindings[ztype]++;
1455 }
1456 }
1457 }
1458 }
1459 bool bindless_lowered = false;
1460 NIR_PASS(bindless_lowered, nir, lower_bindless, bindless);
1461 ret->bindless |= bindless_lowered;
1462
1463 ret->nir = nir;
1464 if (so_info && nir->info.outputs_written && nir->info.has_transform_feedback_varyings)
1465 update_so_info(ret, so_info, nir->info.outputs_written, have_psiz);
1466
1467 return ret;
1468 }
1469
1470 char *
zink_shader_finalize(struct pipe_screen * pscreen,void * nirptr)1471 zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr)
1472 {
1473 struct zink_screen *screen = zink_screen(pscreen);
1474 nir_shader *nir = nirptr;
1475
1476 if (!screen->info.feats.features.shaderImageGatherExtended) {
1477 nir_lower_tex_options tex_opts = {0};
1478 tex_opts.lower_tg4_offsets = true;
1479 NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
1480 }
1481 NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, true, false);
1482 if (nir->info.stage == MESA_SHADER_GEOMETRY)
1483 NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream);
1484 optimize_nir(nir);
1485 if (nir->info.num_ubos || nir->info.num_ssbos)
1486 NIR_PASS_V(nir, nir_lower_dynamic_bo_access);
1487 nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1488 if (screen->driconf.inline_uniforms)
1489 nir_find_inlinable_uniforms(nir);
1490
1491 return NULL;
1492 }
1493
1494 void
zink_shader_free(struct zink_context * ctx,struct zink_shader * shader)1495 zink_shader_free(struct zink_context *ctx, struct zink_shader *shader)
1496 {
1497 struct zink_screen *screen = zink_screen(ctx->base.screen);
1498 set_foreach(shader->programs, entry) {
1499 if (shader->nir->info.stage == MESA_SHADER_COMPUTE) {
1500 struct zink_compute_program *comp = (void*)entry->key;
1501 if (!comp->base.removed) {
1502 _mesa_hash_table_remove_key(&ctx->compute_program_cache, comp->shader);
1503 comp->base.removed = true;
1504 }
1505 comp->shader = NULL;
1506 zink_compute_program_reference(screen, &comp, NULL);
1507 } else {
1508 struct zink_gfx_program *prog = (void*)entry->key;
1509 enum pipe_shader_type pstage = pipe_shader_type_from_mesa(shader->nir->info.stage);
1510 assert(pstage < ZINK_SHADER_COUNT);
1511 if (!prog->base.removed && (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)) {
1512 _mesa_hash_table_remove_key(&ctx->program_cache[prog->stages_present >> 2], prog->shaders);
1513 prog->base.removed = true;
1514 }
1515 prog->shaders[pstage] = NULL;
1516 if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated)
1517 /* automatically destroy generated tcs shaders when tes is destroyed */
1518 zink_shader_free(ctx, shader->generated);
1519 zink_gfx_program_reference(screen, &prog, NULL);
1520 }
1521 }
1522 _mesa_set_destroy(shader->programs, NULL);
1523 ralloc_free(shader->nir);
1524 FREE(shader);
1525 }
1526
1527
1528 /* creating a passthrough tcs shader that's roughly:
1529
1530 #version 150
1531 #extension GL_ARB_tessellation_shader : require
1532
1533 in vec4 some_var[gl_MaxPatchVertices];
1534 out vec4 some_var_out;
1535
1536 layout(push_constant) uniform tcsPushConstants {
1537 layout(offset = 0) float TessLevelInner[2];
1538 layout(offset = 8) float TessLevelOuter[4];
1539 } u_tcsPushConstants;
1540 layout(vertices = $vertices_per_patch) out;
1541 void main()
1542 {
1543 gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
1544 gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
1545 some_var_out = some_var[gl_InvocationID];
1546 }
1547
1548 */
1549 struct zink_shader *
zink_shader_tcs_create(struct zink_screen * screen,struct zink_shader * vs,unsigned vertices_per_patch)1550 zink_shader_tcs_create(struct zink_screen *screen, struct zink_shader *vs, unsigned vertices_per_patch)
1551 {
1552 struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
1553 ret->hash = _mesa_hash_pointer(ret);
1554 ret->programs = _mesa_pointer_set_create(NULL);
1555 simple_mtx_init(&ret->lock, mtx_plain);
1556
1557 nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &screen->nir_options, NULL);
1558 nir_function *fn = nir_function_create(nir, "main");
1559 fn->is_entrypoint = true;
1560 nir_function_impl *impl = nir_function_impl_create(fn);
1561
1562 nir_builder b;
1563 nir_builder_init(&b, impl);
1564 b.cursor = nir_before_block(nir_start_block(impl));
1565
1566 nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
1567
1568 nir_foreach_shader_out_variable(var, vs->nir) {
1569 const struct glsl_type *type = var->type;
1570 const struct glsl_type *in_type = var->type;
1571 const struct glsl_type *out_type = var->type;
1572 char buf[1024];
1573 snprintf(buf, sizeof(buf), "%s_out", var->name);
1574 in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
1575 out_type = glsl_array_type(type, vertices_per_patch, 0);
1576
1577 nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
1578 nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
1579 out->data.location = in->data.location = var->data.location;
1580 out->data.location_frac = in->data.location_frac = var->data.location_frac;
1581
1582 /* gl_in[] receives values from equivalent built-in output
1583 variables written by the vertex shader (section 2.14.7). Each array
1584 element of gl_in[] is a structure holding values for a specific vertex of
1585 the input patch. The length of gl_in[] is equal to the
1586 implementation-dependent maximum patch size (gl_MaxPatchVertices).
1587 - ARB_tessellation_shader
1588 */
1589 for (unsigned i = 0; i < vertices_per_patch; i++) {
1590 /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
1591 nir_if *start_block = nir_push_if(&b, nir_ieq(&b, invocation_id, nir_imm_int(&b, i)));
1592 nir_deref_instr *in_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
1593 nir_ssa_def *load = nir_load_deref(&b, in_array_var);
1594 nir_deref_instr *out_array_var = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, out), i);
1595 nir_store_deref(&b, out_array_var, load, 0xff);
1596 nir_pop_if(&b, start_block);
1597 }
1598 }
1599 nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
1600 gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
1601 gl_TessLevelInner->data.patch = 1;
1602 nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
1603 gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
1604 gl_TessLevelOuter->data.patch = 1;
1605
1606 /* hacks so we can size these right for now */
1607 struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 3);
1608 /* just use a single blob for padding here because it's easier */
1609 fields[0].type = glsl_array_type(glsl_uint_type(), offsetof(struct zink_gfx_push_constant, default_inner_level) / 4, 0);
1610 fields[0].name = ralloc_asprintf(nir, "padding");
1611 fields[0].offset = 0;
1612 fields[1].type = glsl_array_type(glsl_uint_type(), 2, 0);
1613 fields[1].name = ralloc_asprintf(nir, "gl_TessLevelInner");
1614 fields[1].offset = offsetof(struct zink_gfx_push_constant, default_inner_level);
1615 fields[2].type = glsl_array_type(glsl_uint_type(), 4, 0);
1616 fields[2].name = ralloc_asprintf(nir, "gl_TessLevelOuter");
1617 fields[2].offset = offsetof(struct zink_gfx_push_constant, default_outer_level);
1618 nir_variable *pushconst = nir_variable_create(nir, nir_var_mem_push_const,
1619 glsl_struct_type(fields, 3, "struct", false), "pushconst");
1620 pushconst->data.location = VARYING_SLOT_VAR0;
1621
1622 nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 1), .base = 1, .range = 8);
1623 nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 2), .base = 2, .range = 16);
1624
1625 for (unsigned i = 0; i < 2; i++) {
1626 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
1627 nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
1628 }
1629 for (unsigned i = 0; i < 4; i++) {
1630 nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
1631 nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
1632 }
1633
1634 nir->info.tess.tcs_vertices_out = vertices_per_patch;
1635 nir_validate_shader(nir, "created");
1636
1637 NIR_PASS_V(nir, nir_lower_regs_to_ssa);
1638 optimize_nir(nir);
1639 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
1640 NIR_PASS_V(nir, lower_discard_if);
1641 NIR_PASS_V(nir, nir_convert_from_ssa, true);
1642
1643 ret->nir = nir;
1644 ret->is_generated = true;
1645 return ret;
1646 }
1647