1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Connor Abbott (cwabbott0@gmail.com)
25 * Jason Ekstrand (jason@jlekstrand.net)
26 *
27 */
28
29 /*
30 * This lowering pass converts references to input/output variables with
31 * loads/stores to actual input/output intrinsics.
32 */
33
34 #include "nir.h"
35 #include "nir_builder.h"
36 #include "nir_deref.h"
37 #include "nir_xfb_info.h"
38
39 #include "util/u_math.h"
40
41 struct lower_io_state {
42 void *dead_ctx;
43 nir_builder builder;
44 int (*type_size)(const struct glsl_type *type, bool);
45 nir_variable_mode modes;
46 nir_lower_io_options options;
47 };
48
49 static nir_intrinsic_op
ssbo_atomic_for_deref(nir_intrinsic_op deref_op)50 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
51 {
52 switch (deref_op) {
53 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O;
54 OP(atomic_exchange)
55 OP(atomic_comp_swap)
56 OP(atomic_add)
57 OP(atomic_imin)
58 OP(atomic_umin)
59 OP(atomic_imax)
60 OP(atomic_umax)
61 OP(atomic_and)
62 OP(atomic_or)
63 OP(atomic_xor)
64 OP(atomic_fadd)
65 OP(atomic_fmin)
66 OP(atomic_fmax)
67 OP(atomic_fcomp_swap)
68 #undef OP
69 default:
70 unreachable("Invalid SSBO atomic");
71 }
72 }
73
74 static nir_intrinsic_op
global_atomic_for_deref(nir_intrinsic_op deref_op)75 global_atomic_for_deref(nir_intrinsic_op deref_op)
76 {
77 switch (deref_op) {
78 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O;
79 OP(atomic_exchange)
80 OP(atomic_comp_swap)
81 OP(atomic_add)
82 OP(atomic_imin)
83 OP(atomic_umin)
84 OP(atomic_imax)
85 OP(atomic_umax)
86 OP(atomic_and)
87 OP(atomic_or)
88 OP(atomic_xor)
89 OP(atomic_fadd)
90 OP(atomic_fmin)
91 OP(atomic_fmax)
92 OP(atomic_fcomp_swap)
93 #undef OP
94 default:
95 unreachable("Invalid SSBO atomic");
96 }
97 }
98
99 static nir_intrinsic_op
shared_atomic_for_deref(nir_intrinsic_op deref_op)100 shared_atomic_for_deref(nir_intrinsic_op deref_op)
101 {
102 switch (deref_op) {
103 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O;
104 OP(atomic_exchange)
105 OP(atomic_comp_swap)
106 OP(atomic_add)
107 OP(atomic_imin)
108 OP(atomic_umin)
109 OP(atomic_imax)
110 OP(atomic_umax)
111 OP(atomic_and)
112 OP(atomic_or)
113 OP(atomic_xor)
114 OP(atomic_fadd)
115 OP(atomic_fmin)
116 OP(atomic_fmax)
117 OP(atomic_fcomp_swap)
118 #undef OP
119 default:
120 unreachable("Invalid shared atomic");
121 }
122 }
123
124 void
nir_assign_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,int (* type_size)(const struct glsl_type *,bool))125 nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
126 unsigned *size,
127 int (*type_size)(const struct glsl_type *, bool))
128 {
129 unsigned location = 0;
130
131 nir_foreach_variable_with_modes(var, shader, mode) {
132 var->data.driver_location = location;
133 bool bindless_type_size = var->data.mode == nir_var_shader_in ||
134 var->data.mode == nir_var_shader_out ||
135 var->data.bindless;
136 location += type_size(var->type, bindless_type_size);
137 }
138
139 *size = location;
140 }
141
142 /**
143 * Some inputs and outputs are arrayed, meaning that there is an extra level
144 * of array indexing to handle mismatches between the shader interface and the
145 * dispatch pattern of the shader. For instance, geometry shaders are
146 * executed per-primitive while their inputs and outputs are specified
147 * per-vertex so all inputs and outputs have to be additionally indexed with
148 * the vertex index within the primitive.
149 */
150 bool
nir_is_arrayed_io(const nir_variable * var,gl_shader_stage stage)151 nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage)
152 {
153 if (var->data.patch || !glsl_type_is_array(var->type))
154 return false;
155
156 if (stage == MESA_SHADER_MESH) {
157 /* NV_mesh_shader: this is flat array for the whole workgroup. */
158 if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES)
159 return var->data.per_primitive;
160 }
161
162 if (var->data.mode == nir_var_shader_in)
163 return stage == MESA_SHADER_GEOMETRY ||
164 stage == MESA_SHADER_TESS_CTRL ||
165 stage == MESA_SHADER_TESS_EVAL;
166
167 if (var->data.mode == nir_var_shader_out)
168 return stage == MESA_SHADER_TESS_CTRL ||
169 stage == MESA_SHADER_MESH;
170
171 return false;
172 }
173
get_number_of_slots(struct lower_io_state * state,const nir_variable * var)174 static unsigned get_number_of_slots(struct lower_io_state *state,
175 const nir_variable *var)
176 {
177 const struct glsl_type *type = var->type;
178
179 if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) {
180 assert(glsl_type_is_array(type));
181 type = glsl_get_array_element(type);
182 }
183
184 /* NV_mesh_shader:
185 * PRIMITIVE_INDICES is a flat array, not a proper arrayed output,
186 * as opposed to D3D-style mesh shaders where it's addressed by
187 * the primitive index.
188 * Prevent assigning several slots to primitive indices,
189 * to avoid some issues.
190 */
191 if (state->builder.shader->info.stage == MESA_SHADER_MESH &&
192 var->data.location == VARYING_SLOT_PRIMITIVE_INDICES &&
193 !nir_is_arrayed_io(var, state->builder.shader->info.stage))
194 return 1;
195
196 return state->type_size(type, var->data.bindless);
197 }
198
199 static nir_ssa_def *
get_io_offset(nir_builder * b,nir_deref_instr * deref,nir_ssa_def ** array_index,int (* type_size)(const struct glsl_type *,bool),unsigned * component,bool bts)200 get_io_offset(nir_builder *b, nir_deref_instr *deref,
201 nir_ssa_def **array_index,
202 int (*type_size)(const struct glsl_type *, bool),
203 unsigned *component, bool bts)
204 {
205 nir_deref_path path;
206 nir_deref_path_init(&path, deref, NULL);
207
208 assert(path.path[0]->deref_type == nir_deref_type_var);
209 nir_deref_instr **p = &path.path[1];
210
211 /* For arrayed I/O (e.g., per-vertex input arrays in geometry shader
212 * inputs), skip the outermost array index. Process the rest normally.
213 */
214 if (array_index != NULL) {
215 assert((*p)->deref_type == nir_deref_type_array);
216 *array_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
217 p++;
218 }
219
220 if (path.path[0]->var->data.compact) {
221 assert((*p)->deref_type == nir_deref_type_array);
222 assert(glsl_type_is_scalar((*p)->type));
223
224 /* We always lower indirect dereferences for "compact" array vars. */
225 const unsigned index = nir_src_as_uint((*p)->arr.index);
226 const unsigned total_offset = *component + index;
227 const unsigned slot_offset = total_offset / 4;
228 *component = total_offset % 4;
229 return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
230 }
231
232 /* Just emit code and let constant-folding go to town */
233 nir_ssa_def *offset = nir_imm_int(b, 0);
234
235 for (; *p; p++) {
236 if ((*p)->deref_type == nir_deref_type_array) {
237 unsigned size = type_size((*p)->type, bts);
238
239 nir_ssa_def *mul =
240 nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
241
242 offset = nir_iadd(b, offset, mul);
243 } else if ((*p)->deref_type == nir_deref_type_struct) {
244 /* p starts at path[1], so this is safe */
245 nir_deref_instr *parent = *(p - 1);
246
247 unsigned field_offset = 0;
248 for (unsigned i = 0; i < (*p)->strct.index; i++) {
249 field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
250 }
251 offset = nir_iadd_imm(b, offset, field_offset);
252 } else {
253 unreachable("Unsupported deref type");
254 }
255 }
256
257 nir_deref_path_finish(&path);
258
259 return offset;
260 }
261
262 static nir_ssa_def *
emit_load(struct lower_io_state * state,nir_ssa_def * array_index,nir_variable * var,nir_ssa_def * offset,unsigned component,unsigned num_components,unsigned bit_size,nir_alu_type dest_type)263 emit_load(struct lower_io_state *state,
264 nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
265 unsigned component, unsigned num_components, unsigned bit_size,
266 nir_alu_type dest_type)
267 {
268 nir_builder *b = &state->builder;
269 const nir_shader *nir = b->shader;
270 nir_variable_mode mode = var->data.mode;
271 nir_ssa_def *barycentric = NULL;
272
273 nir_intrinsic_op op;
274 switch (mode) {
275 case nir_var_shader_in:
276 if (nir->info.stage == MESA_SHADER_FRAGMENT &&
277 nir->options->use_interpolated_input_intrinsics &&
278 var->data.interpolation != INTERP_MODE_FLAT &&
279 !var->data.per_primitive) {
280 if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
281 assert(array_index != NULL);
282 op = nir_intrinsic_load_input_vertex;
283 } else {
284 assert(array_index == NULL);
285
286 nir_intrinsic_op bary_op;
287 if (var->data.sample ||
288 (state->options & nir_lower_io_force_sample_interpolation))
289 bary_op = nir_intrinsic_load_barycentric_sample;
290 else if (var->data.centroid)
291 bary_op = nir_intrinsic_load_barycentric_centroid;
292 else
293 bary_op = nir_intrinsic_load_barycentric_pixel;
294
295 barycentric = nir_load_barycentric(&state->builder, bary_op,
296 var->data.interpolation);
297 op = nir_intrinsic_load_interpolated_input;
298 }
299 } else {
300 op = array_index ? nir_intrinsic_load_per_vertex_input :
301 nir_intrinsic_load_input;
302 }
303 break;
304 case nir_var_shader_out:
305 op = !array_index ? nir_intrinsic_load_output :
306 var->data.per_primitive ? nir_intrinsic_load_per_primitive_output :
307 nir_intrinsic_load_per_vertex_output;
308 break;
309 case nir_var_uniform:
310 op = nir_intrinsic_load_uniform;
311 break;
312 default:
313 unreachable("Unknown variable mode");
314 }
315
316 nir_intrinsic_instr *load =
317 nir_intrinsic_instr_create(state->builder.shader, op);
318 load->num_components = num_components;
319
320 nir_intrinsic_set_base(load, var->data.driver_location);
321 if (mode == nir_var_shader_in || mode == nir_var_shader_out)
322 nir_intrinsic_set_component(load, component);
323
324 if (load->intrinsic == nir_intrinsic_load_uniform)
325 nir_intrinsic_set_range(load,
326 state->type_size(var->type, var->data.bindless));
327
328 if (nir_intrinsic_has_access(load))
329 nir_intrinsic_set_access(load, var->data.access);
330
331 nir_intrinsic_set_dest_type(load, dest_type);
332
333 if (load->intrinsic != nir_intrinsic_load_uniform) {
334 nir_io_semantics semantics = {0};
335 semantics.location = var->data.location;
336 semantics.num_slots = get_number_of_slots(state, var);
337 semantics.fb_fetch_output = var->data.fb_fetch_output;
338 semantics.medium_precision =
339 var->data.precision == GLSL_PRECISION_MEDIUM ||
340 var->data.precision == GLSL_PRECISION_LOW;
341 nir_intrinsic_set_io_semantics(load, semantics);
342 }
343
344 if (array_index) {
345 load->src[0] = nir_src_for_ssa(array_index);
346 load->src[1] = nir_src_for_ssa(offset);
347 } else if (barycentric) {
348 load->src[0] = nir_src_for_ssa(barycentric);
349 load->src[1] = nir_src_for_ssa(offset);
350 } else {
351 load->src[0] = nir_src_for_ssa(offset);
352 }
353
354 nir_ssa_dest_init(&load->instr, &load->dest,
355 num_components, bit_size, NULL);
356 nir_builder_instr_insert(b, &load->instr);
357
358 return &load->dest.ssa;
359 }
360
361 static nir_ssa_def *
lower_load(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_ssa_def * array_index,nir_variable * var,nir_ssa_def * offset,unsigned component,const struct glsl_type * type)362 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
363 nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
364 unsigned component, const struct glsl_type *type)
365 {
366 assert(intrin->dest.is_ssa);
367 if (intrin->dest.ssa.bit_size == 64 &&
368 (state->options & nir_lower_io_lower_64bit_to_32)) {
369 nir_builder *b = &state->builder;
370
371 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
372
373 nir_ssa_def *comp64[4];
374 assert(component == 0 || component == 2);
375 unsigned dest_comp = 0;
376 while (dest_comp < intrin->dest.ssa.num_components) {
377 const unsigned num_comps =
378 MIN2(intrin->dest.ssa.num_components - dest_comp,
379 (4 - component) / 2);
380
381 nir_ssa_def *data32 =
382 emit_load(state, array_index, var, offset, component,
383 num_comps * 2, 32, nir_type_uint32);
384 for (unsigned i = 0; i < num_comps; i++) {
385 comp64[dest_comp + i] =
386 nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
387 }
388
389 /* Only the first store has a component offset */
390 component = 0;
391 dest_comp += num_comps;
392 offset = nir_iadd_imm(b, offset, slot_size);
393 }
394
395 return nir_vec(b, comp64, intrin->dest.ssa.num_components);
396 } else if (intrin->dest.ssa.bit_size == 1) {
397 /* Booleans are 32-bit */
398 assert(glsl_type_is_boolean(type));
399 return nir_b2b1(&state->builder,
400 emit_load(state, array_index, var, offset, component,
401 intrin->dest.ssa.num_components, 32,
402 nir_type_bool32));
403 } else {
404 return emit_load(state, array_index, var, offset, component,
405 intrin->dest.ssa.num_components,
406 intrin->dest.ssa.bit_size,
407 nir_get_nir_type_for_glsl_type(type));
408 }
409 }
410
411 static void
emit_store(struct lower_io_state * state,nir_ssa_def * data,nir_ssa_def * array_index,nir_variable * var,nir_ssa_def * offset,unsigned component,unsigned num_components,nir_component_mask_t write_mask,nir_alu_type src_type)412 emit_store(struct lower_io_state *state, nir_ssa_def *data,
413 nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
414 unsigned component, unsigned num_components,
415 nir_component_mask_t write_mask, nir_alu_type src_type)
416 {
417 nir_builder *b = &state->builder;
418
419 assert(var->data.mode == nir_var_shader_out);
420 nir_intrinsic_op op =
421 !array_index ? nir_intrinsic_store_output :
422 var->data.per_primitive ? nir_intrinsic_store_per_primitive_output :
423 nir_intrinsic_store_per_vertex_output;
424
425 nir_intrinsic_instr *store =
426 nir_intrinsic_instr_create(state->builder.shader, op);
427 store->num_components = num_components;
428
429 store->src[0] = nir_src_for_ssa(data);
430
431 nir_intrinsic_set_base(store, var->data.driver_location);
432 nir_intrinsic_set_component(store, component);
433 nir_intrinsic_set_src_type(store, src_type);
434
435 nir_intrinsic_set_write_mask(store, write_mask);
436
437 if (nir_intrinsic_has_access(store))
438 nir_intrinsic_set_access(store, var->data.access);
439
440 if (array_index)
441 store->src[1] = nir_src_for_ssa(array_index);
442
443 store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset);
444
445 unsigned gs_streams = 0;
446 if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) {
447 if (var->data.stream & NIR_STREAM_PACKED) {
448 gs_streams = var->data.stream & ~NIR_STREAM_PACKED;
449 } else {
450 assert(var->data.stream < 4);
451 gs_streams = 0;
452 for (unsigned i = 0; i < num_components; ++i)
453 gs_streams |= var->data.stream << (2 * i);
454 }
455 }
456
457 nir_io_semantics semantics = {0};
458 semantics.location = var->data.location;
459 semantics.num_slots = get_number_of_slots(state, var);
460 semantics.dual_source_blend_index = var->data.index;
461 semantics.gs_streams = gs_streams;
462 semantics.medium_precision =
463 var->data.precision == GLSL_PRECISION_MEDIUM ||
464 var->data.precision == GLSL_PRECISION_LOW;
465 semantics.per_view = var->data.per_view;
466 semantics.invariant = var->data.invariant;
467
468 nir_intrinsic_set_io_semantics(store, semantics);
469
470 nir_builder_instr_insert(b, &store->instr);
471 }
472
473 static void
lower_store(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_ssa_def * array_index,nir_variable * var,nir_ssa_def * offset,unsigned component,const struct glsl_type * type)474 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
475 nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
476 unsigned component, const struct glsl_type *type)
477 {
478 assert(intrin->src[1].is_ssa);
479 if (intrin->src[1].ssa->bit_size == 64 &&
480 (state->options & nir_lower_io_lower_64bit_to_32)) {
481 nir_builder *b = &state->builder;
482
483 const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
484
485 assert(component == 0 || component == 2);
486 unsigned src_comp = 0;
487 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
488 while (src_comp < intrin->num_components) {
489 const unsigned num_comps =
490 MIN2(intrin->num_components - src_comp,
491 (4 - component) / 2);
492
493 if (write_mask & BITFIELD_MASK(num_comps)) {
494 nir_ssa_def *data =
495 nir_channels(b, intrin->src[1].ssa,
496 BITFIELD_RANGE(src_comp, num_comps));
497 nir_ssa_def *data32 = nir_bitcast_vector(b, data, 32);
498
499 nir_component_mask_t write_mask32 = 0;
500 for (unsigned i = 0; i < num_comps; i++) {
501 if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
502 write_mask32 |= 3 << (i * 2);
503 }
504
505 emit_store(state, data32, array_index, var, offset,
506 component, data32->num_components, write_mask32,
507 nir_type_uint32);
508 }
509
510 /* Only the first store has a component offset */
511 component = 0;
512 src_comp += num_comps;
513 write_mask >>= num_comps;
514 offset = nir_iadd_imm(b, offset, slot_size);
515 }
516 } else if (intrin->dest.ssa.bit_size == 1) {
517 /* Booleans are 32-bit */
518 assert(glsl_type_is_boolean(type));
519 nir_ssa_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
520 emit_store(state, b32_val, array_index, var, offset,
521 component, intrin->num_components,
522 nir_intrinsic_write_mask(intrin),
523 nir_type_bool32);
524 } else {
525 emit_store(state, intrin->src[1].ssa, array_index, var, offset,
526 component, intrin->num_components,
527 nir_intrinsic_write_mask(intrin),
528 nir_get_nir_type_for_glsl_type(type));
529 }
530 }
531
532 static nir_ssa_def *
lower_interpolate_at(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_variable * var,nir_ssa_def * offset,unsigned component,const struct glsl_type * type)533 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
534 nir_variable *var, nir_ssa_def *offset, unsigned component,
535 const struct glsl_type *type)
536 {
537 nir_builder *b = &state->builder;
538 assert(var->data.mode == nir_var_shader_in);
539
540 /* Ignore interpolateAt() for flat variables - flat is flat. Lower
541 * interpolateAtVertex() for explicit variables.
542 */
543 if (var->data.interpolation == INTERP_MODE_FLAT ||
544 var->data.interpolation == INTERP_MODE_EXPLICIT) {
545 nir_ssa_def *vertex_index = NULL;
546
547 if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
548 assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
549 vertex_index = intrin->src[1].ssa;
550 }
551
552 return lower_load(intrin, state, vertex_index, var, offset, component, type);
553 }
554
555 /* None of the supported APIs allow interpolation on 64-bit things */
556 assert(intrin->dest.is_ssa && intrin->dest.ssa.bit_size <= 32);
557
558 nir_intrinsic_op bary_op;
559 switch (intrin->intrinsic) {
560 case nir_intrinsic_interp_deref_at_centroid:
561 bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
562 nir_intrinsic_load_barycentric_sample :
563 nir_intrinsic_load_barycentric_centroid;
564 break;
565 case nir_intrinsic_interp_deref_at_sample:
566 bary_op = nir_intrinsic_load_barycentric_at_sample;
567 break;
568 case nir_intrinsic_interp_deref_at_offset:
569 bary_op = nir_intrinsic_load_barycentric_at_offset;
570 break;
571 default:
572 unreachable("Bogus interpolateAt() intrinsic.");
573 }
574
575 nir_intrinsic_instr *bary_setup =
576 nir_intrinsic_instr_create(state->builder.shader, bary_op);
577
578 nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
579 nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
580
581 if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
582 intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
583 intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
584 nir_src_copy(&bary_setup->src[0], &intrin->src[1]);
585
586 nir_builder_instr_insert(b, &bary_setup->instr);
587
588 nir_io_semantics semantics = {0};
589 semantics.location = var->data.location;
590 semantics.num_slots = get_number_of_slots(state, var);
591 semantics.medium_precision =
592 var->data.precision == GLSL_PRECISION_MEDIUM ||
593 var->data.precision == GLSL_PRECISION_LOW;
594
595 assert(intrin->dest.is_ssa);
596 nir_ssa_def *load =
597 nir_load_interpolated_input(&state->builder,
598 intrin->dest.ssa.num_components,
599 intrin->dest.ssa.bit_size,
600 &bary_setup->dest.ssa,
601 offset,
602 .base = var->data.driver_location,
603 .component = component,
604 .io_semantics = semantics);
605
606 return load;
607 }
608
609 static bool
nir_lower_io_block(nir_block * block,struct lower_io_state * state)610 nir_lower_io_block(nir_block *block,
611 struct lower_io_state *state)
612 {
613 nir_builder *b = &state->builder;
614 const nir_shader_compiler_options *options = b->shader->options;
615 bool progress = false;
616
617 nir_foreach_instr_safe(instr, block) {
618 if (instr->type != nir_instr_type_intrinsic)
619 continue;
620
621 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
622
623 switch (intrin->intrinsic) {
624 case nir_intrinsic_load_deref:
625 case nir_intrinsic_store_deref:
626 /* We can lower the io for this nir instrinsic */
627 break;
628 case nir_intrinsic_interp_deref_at_centroid:
629 case nir_intrinsic_interp_deref_at_sample:
630 case nir_intrinsic_interp_deref_at_offset:
631 case nir_intrinsic_interp_deref_at_vertex:
632 /* We can optionally lower these to load_interpolated_input */
633 if (options->use_interpolated_input_intrinsics ||
634 options->lower_interpolate_at)
635 break;
636 FALLTHROUGH;
637 default:
638 /* We can't lower the io for this nir instrinsic, so skip it */
639 continue;
640 }
641
642 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
643 if (!nir_deref_mode_is_one_of(deref, state->modes))
644 continue;
645
646 nir_variable *var = nir_deref_instr_get_variable(deref);
647
648 b->cursor = nir_before_instr(instr);
649
650 const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage);
651
652 nir_ssa_def *offset;
653 nir_ssa_def *array_index = NULL;
654 unsigned component_offset = var->data.location_frac;
655 bool bindless_type_size = var->data.mode == nir_var_shader_in ||
656 var->data.mode == nir_var_shader_out ||
657 var->data.bindless;
658
659 if (nir_deref_instr_is_known_out_of_bounds(deref)) {
660 /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
661 *
662 * In the subsections described above for array, vector, matrix and
663 * structure accesses, any out-of-bounds access produced undefined
664 * behavior....
665 * Out-of-bounds reads return undefined values, which
666 * include values from other variables of the active program or zero.
667 * Out-of-bounds writes may be discarded or overwrite
668 * other variables of the active program.
669 *
670 * GL_KHR_robustness and GL_ARB_robustness encourage us to return zero
671 * for reads.
672 *
673 * Otherwise get_io_offset would return out-of-bound offset which may
674 * result in out-of-bound loading/storing of inputs/outputs,
675 * that could cause issues in drivers down the line.
676 */
677 if (intrin->intrinsic != nir_intrinsic_store_deref) {
678 nir_ssa_def *zero =
679 nir_imm_zero(b, intrin->dest.ssa.num_components,
680 intrin->dest.ssa.bit_size);
681 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
682 zero);
683 }
684
685 nir_instr_remove(&intrin->instr);
686 progress = true;
687 continue;
688 }
689
690 offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL,
691 state->type_size, &component_offset,
692 bindless_type_size);
693
694 nir_ssa_def *replacement = NULL;
695
696 switch (intrin->intrinsic) {
697 case nir_intrinsic_load_deref:
698 replacement = lower_load(intrin, state, array_index, var, offset,
699 component_offset, deref->type);
700 break;
701
702 case nir_intrinsic_store_deref:
703 lower_store(intrin, state, array_index, var, offset,
704 component_offset, deref->type);
705 break;
706
707 case nir_intrinsic_interp_deref_at_centroid:
708 case nir_intrinsic_interp_deref_at_sample:
709 case nir_intrinsic_interp_deref_at_offset:
710 case nir_intrinsic_interp_deref_at_vertex:
711 assert(array_index == NULL);
712 replacement = lower_interpolate_at(intrin, state, var, offset,
713 component_offset, deref->type);
714 break;
715
716 default:
717 continue;
718 }
719
720 if (replacement) {
721 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
722 replacement);
723 }
724 nir_instr_remove(&intrin->instr);
725 progress = true;
726 }
727
728 return progress;
729 }
730
731 static bool
nir_lower_io_impl(nir_function_impl * impl,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)732 nir_lower_io_impl(nir_function_impl *impl,
733 nir_variable_mode modes,
734 int (*type_size)(const struct glsl_type *, bool),
735 nir_lower_io_options options)
736 {
737 struct lower_io_state state;
738 bool progress = false;
739
740 nir_builder_init(&state.builder, impl);
741 state.dead_ctx = ralloc_context(NULL);
742 state.modes = modes;
743 state.type_size = type_size;
744 state.options = options;
745
746 ASSERTED nir_variable_mode supported_modes =
747 nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
748 assert(!(modes & ~supported_modes));
749
750 nir_foreach_block(block, impl) {
751 progress |= nir_lower_io_block(block, &state);
752 }
753
754 ralloc_free(state.dead_ctx);
755
756 nir_metadata_preserve(impl, nir_metadata_none);
757
758 return progress;
759 }
760
761 /** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
762 *
763 * This pass is intended to be used for cross-stage shader I/O and driver-
764 * managed uniforms to turn deref-based access into a simpler model using
765 * locations or offsets. For fragment shader inputs, it can optionally turn
766 * load_deref into an explicit interpolation using barycentrics coming from
767 * one of the load_barycentric_* intrinsics. This pass requires that all
768 * deref chains are complete and contain no casts.
769 */
770 bool
nir_lower_io(nir_shader * shader,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)771 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
772 int (*type_size)(const struct glsl_type *, bool),
773 nir_lower_io_options options)
774 {
775 bool progress = false;
776
777 nir_foreach_function(function, shader) {
778 if (function->impl) {
779 progress |= nir_lower_io_impl(function->impl, modes,
780 type_size, options);
781 }
782 }
783
784 return progress;
785 }
786
787 static unsigned
type_scalar_size_bytes(const struct glsl_type * type)788 type_scalar_size_bytes(const struct glsl_type *type)
789 {
790 assert(glsl_type_is_vector_or_scalar(type) ||
791 glsl_type_is_matrix(type));
792 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
793 }
794
795 static nir_ssa_def *
build_addr_iadd(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,nir_ssa_def * offset)796 build_addr_iadd(nir_builder *b, nir_ssa_def *addr,
797 nir_address_format addr_format,
798 nir_variable_mode modes,
799 nir_ssa_def *offset)
800 {
801 assert(offset->num_components == 1);
802
803 switch (addr_format) {
804 case nir_address_format_32bit_global:
805 case nir_address_format_64bit_global:
806 case nir_address_format_32bit_offset:
807 assert(addr->bit_size == offset->bit_size);
808 assert(addr->num_components == 1);
809 return nir_iadd(b, addr, offset);
810
811 case nir_address_format_32bit_offset_as_64bit:
812 assert(addr->num_components == 1);
813 assert(offset->bit_size == 32);
814 return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset));
815
816 case nir_address_format_64bit_global_32bit_offset:
817 case nir_address_format_64bit_bounded_global:
818 assert(addr->num_components == 4);
819 assert(addr->bit_size == offset->bit_size);
820 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 3), offset), 3);
821
822 case nir_address_format_32bit_index_offset:
823 assert(addr->num_components == 2);
824 assert(addr->bit_size == offset->bit_size);
825 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 1), offset), 1);
826
827 case nir_address_format_32bit_index_offset_pack64:
828 assert(addr->num_components == 1);
829 assert(offset->bit_size == 32);
830 return nir_pack_64_2x32_split(b,
831 nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset),
832 nir_unpack_64_2x32_split_y(b, addr));
833
834 case nir_address_format_vec2_index_32bit_offset:
835 assert(addr->num_components == 3);
836 assert(offset->bit_size == 32);
837 return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 2), offset), 2);
838
839 case nir_address_format_62bit_generic:
840 assert(addr->num_components == 1);
841 assert(addr->bit_size == 64);
842 assert(offset->bit_size == 64);
843 if (!(modes & ~(nir_var_function_temp |
844 nir_var_shader_temp |
845 nir_var_mem_shared))) {
846 /* If we're sure it's one of these modes, we can do an easy 32-bit
847 * addition and don't need to bother with 64-bit math.
848 */
849 nir_ssa_def *addr32 = nir_unpack_64_2x32_split_x(b, addr);
850 nir_ssa_def *type = nir_unpack_64_2x32_split_y(b, addr);
851 addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset));
852 return nir_pack_64_2x32_split(b, addr32, type);
853 } else {
854 return nir_iadd(b, addr, offset);
855 }
856
857 case nir_address_format_logical:
858 unreachable("Unsupported address format");
859 }
860 unreachable("Invalid address format");
861 }
862
863 static unsigned
addr_get_offset_bit_size(nir_ssa_def * addr,nir_address_format addr_format)864 addr_get_offset_bit_size(nir_ssa_def *addr, nir_address_format addr_format)
865 {
866 if (addr_format == nir_address_format_32bit_offset_as_64bit ||
867 addr_format == nir_address_format_32bit_index_offset_pack64)
868 return 32;
869 return addr->bit_size;
870 }
871
872 static nir_ssa_def *
build_addr_iadd_imm(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,int64_t offset)873 build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
874 nir_address_format addr_format,
875 nir_variable_mode modes,
876 int64_t offset)
877 {
878 return build_addr_iadd(b, addr, addr_format, modes,
879 nir_imm_intN_t(b, offset,
880 addr_get_offset_bit_size(addr, addr_format)));
881 }
882
883 static nir_ssa_def *
build_addr_for_var(nir_builder * b,nir_variable * var,nir_address_format addr_format)884 build_addr_for_var(nir_builder *b, nir_variable *var,
885 nir_address_format addr_format)
886 {
887 assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared |
888 nir_var_mem_task_payload |
889 nir_var_mem_global |
890 nir_var_shader_temp | nir_var_function_temp |
891 nir_var_mem_push_const | nir_var_mem_constant));
892
893 const unsigned num_comps = nir_address_format_num_components(addr_format);
894 const unsigned bit_size = nir_address_format_bit_size(addr_format);
895
896 switch (addr_format) {
897 case nir_address_format_32bit_global:
898 case nir_address_format_64bit_global: {
899 nir_ssa_def *base_addr;
900 switch (var->data.mode) {
901 case nir_var_shader_temp:
902 base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0);
903 break;
904
905 case nir_var_function_temp:
906 base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1);
907 break;
908
909 case nir_var_mem_constant:
910 base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size);
911 break;
912
913 case nir_var_mem_shared:
914 base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size);
915 break;
916
917 case nir_var_mem_global:
918 base_addr = nir_load_global_base_ptr(b, num_comps, bit_size);
919 break;
920
921 default:
922 unreachable("Unsupported variable mode");
923 }
924
925 return build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode,
926 var->data.driver_location);
927 }
928
929 case nir_address_format_32bit_offset:
930 assert(var->data.driver_location <= UINT32_MAX);
931 return nir_imm_int(b, var->data.driver_location);
932
933 case nir_address_format_32bit_offset_as_64bit:
934 assert(var->data.driver_location <= UINT32_MAX);
935 return nir_imm_int64(b, var->data.driver_location);
936
937 case nir_address_format_62bit_generic:
938 switch (var->data.mode) {
939 case nir_var_shader_temp:
940 case nir_var_function_temp:
941 assert(var->data.driver_location <= UINT32_MAX);
942 return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64);
943
944 case nir_var_mem_shared:
945 assert(var->data.driver_location <= UINT32_MAX);
946 return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64);
947
948 case nir_var_mem_global:
949 return nir_iadd_imm(b, nir_load_global_base_ptr(b, num_comps, bit_size),
950 var->data.driver_location);
951
952 default:
953 unreachable("Unsupported variable mode");
954 }
955
956 default:
957 unreachable("Unsupported address format");
958 }
959 }
960
961 static nir_ssa_def *
build_runtime_addr_mode_check(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode mode)962 build_runtime_addr_mode_check(nir_builder *b, nir_ssa_def *addr,
963 nir_address_format addr_format,
964 nir_variable_mode mode)
965 {
966 /* The compile-time check failed; do a run-time check */
967 switch (addr_format) {
968 case nir_address_format_62bit_generic: {
969 assert(addr->num_components == 1);
970 assert(addr->bit_size == 64);
971 nir_ssa_def *mode_enum = nir_ushr(b, addr, nir_imm_int(b, 62));
972 switch (mode) {
973 case nir_var_function_temp:
974 case nir_var_shader_temp:
975 return nir_ieq_imm(b, mode_enum, 0x2);
976
977 case nir_var_mem_shared:
978 return nir_ieq_imm(b, mode_enum, 0x1);
979
980 case nir_var_mem_global:
981 return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0),
982 nir_ieq_imm(b, mode_enum, 0x3));
983
984 default:
985 unreachable("Invalid mode check intrinsic");
986 }
987 }
988
989 default:
990 unreachable("Unsupported address mode");
991 }
992 }
993
994 unsigned
nir_address_format_bit_size(nir_address_format addr_format)995 nir_address_format_bit_size(nir_address_format addr_format)
996 {
997 switch (addr_format) {
998 case nir_address_format_32bit_global: return 32;
999 case nir_address_format_64bit_global: return 64;
1000 case nir_address_format_64bit_global_32bit_offset: return 32;
1001 case nir_address_format_64bit_bounded_global: return 32;
1002 case nir_address_format_32bit_index_offset: return 32;
1003 case nir_address_format_32bit_index_offset_pack64: return 64;
1004 case nir_address_format_vec2_index_32bit_offset: return 32;
1005 case nir_address_format_62bit_generic: return 64;
1006 case nir_address_format_32bit_offset: return 32;
1007 case nir_address_format_32bit_offset_as_64bit: return 64;
1008 case nir_address_format_logical: return 32;
1009 }
1010 unreachable("Invalid address format");
1011 }
1012
1013 unsigned
nir_address_format_num_components(nir_address_format addr_format)1014 nir_address_format_num_components(nir_address_format addr_format)
1015 {
1016 switch (addr_format) {
1017 case nir_address_format_32bit_global: return 1;
1018 case nir_address_format_64bit_global: return 1;
1019 case nir_address_format_64bit_global_32bit_offset: return 4;
1020 case nir_address_format_64bit_bounded_global: return 4;
1021 case nir_address_format_32bit_index_offset: return 2;
1022 case nir_address_format_32bit_index_offset_pack64: return 1;
1023 case nir_address_format_vec2_index_32bit_offset: return 3;
1024 case nir_address_format_62bit_generic: return 1;
1025 case nir_address_format_32bit_offset: return 1;
1026 case nir_address_format_32bit_offset_as_64bit: return 1;
1027 case nir_address_format_logical: return 1;
1028 }
1029 unreachable("Invalid address format");
1030 }
1031
1032 static nir_ssa_def *
addr_to_index(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format)1033 addr_to_index(nir_builder *b, nir_ssa_def *addr,
1034 nir_address_format addr_format)
1035 {
1036 switch (addr_format) {
1037 case nir_address_format_32bit_index_offset:
1038 assert(addr->num_components == 2);
1039 return nir_channel(b, addr, 0);
1040 case nir_address_format_32bit_index_offset_pack64:
1041 return nir_unpack_64_2x32_split_y(b, addr);
1042 case nir_address_format_vec2_index_32bit_offset:
1043 assert(addr->num_components == 3);
1044 return nir_channels(b, addr, 0x3);
1045 default: unreachable("Invalid address format");
1046 }
1047 }
1048
1049 static nir_ssa_def *
addr_to_offset(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format)1050 addr_to_offset(nir_builder *b, nir_ssa_def *addr,
1051 nir_address_format addr_format)
1052 {
1053 switch (addr_format) {
1054 case nir_address_format_32bit_index_offset:
1055 assert(addr->num_components == 2);
1056 return nir_channel(b, addr, 1);
1057 case nir_address_format_32bit_index_offset_pack64:
1058 return nir_unpack_64_2x32_split_x(b, addr);
1059 case nir_address_format_vec2_index_32bit_offset:
1060 assert(addr->num_components == 3);
1061 return nir_channel(b, addr, 2);
1062 case nir_address_format_32bit_offset:
1063 return addr;
1064 case nir_address_format_32bit_offset_as_64bit:
1065 case nir_address_format_62bit_generic:
1066 return nir_u2u32(b, addr);
1067 default:
1068 unreachable("Invalid address format");
1069 }
1070 }
1071
1072 /** Returns true if the given address format resolves to a global address */
1073 static bool
addr_format_is_global(nir_address_format addr_format,nir_variable_mode mode)1074 addr_format_is_global(nir_address_format addr_format,
1075 nir_variable_mode mode)
1076 {
1077 if (addr_format == nir_address_format_62bit_generic)
1078 return mode == nir_var_mem_global;
1079
1080 return addr_format == nir_address_format_32bit_global ||
1081 addr_format == nir_address_format_64bit_global ||
1082 addr_format == nir_address_format_64bit_global_32bit_offset ||
1083 addr_format == nir_address_format_64bit_bounded_global;
1084 }
1085
1086 static bool
addr_format_is_offset(nir_address_format addr_format,nir_variable_mode mode)1087 addr_format_is_offset(nir_address_format addr_format,
1088 nir_variable_mode mode)
1089 {
1090 if (addr_format == nir_address_format_62bit_generic)
1091 return mode != nir_var_mem_global;
1092
1093 return addr_format == nir_address_format_32bit_offset ||
1094 addr_format == nir_address_format_32bit_offset_as_64bit;
1095 }
1096
1097 static nir_ssa_def *
addr_to_global(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format)1098 addr_to_global(nir_builder *b, nir_ssa_def *addr,
1099 nir_address_format addr_format)
1100 {
1101 switch (addr_format) {
1102 case nir_address_format_32bit_global:
1103 case nir_address_format_64bit_global:
1104 case nir_address_format_62bit_generic:
1105 assert(addr->num_components == 1);
1106 return addr;
1107
1108 case nir_address_format_64bit_global_32bit_offset:
1109 case nir_address_format_64bit_bounded_global:
1110 assert(addr->num_components == 4);
1111 return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)),
1112 nir_u2u64(b, nir_channel(b, addr, 3)));
1113
1114 case nir_address_format_32bit_index_offset:
1115 case nir_address_format_32bit_index_offset_pack64:
1116 case nir_address_format_vec2_index_32bit_offset:
1117 case nir_address_format_32bit_offset:
1118 case nir_address_format_32bit_offset_as_64bit:
1119 case nir_address_format_logical:
1120 unreachable("Cannot get a 64-bit address with this address format");
1121 }
1122
1123 unreachable("Invalid address format");
1124 }
1125
1126 static bool
addr_format_needs_bounds_check(nir_address_format addr_format)1127 addr_format_needs_bounds_check(nir_address_format addr_format)
1128 {
1129 return addr_format == nir_address_format_64bit_bounded_global;
1130 }
1131
1132 static nir_ssa_def *
addr_is_in_bounds(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,unsigned size)1133 addr_is_in_bounds(nir_builder *b, nir_ssa_def *addr,
1134 nir_address_format addr_format, unsigned size)
1135 {
1136 assert(addr_format == nir_address_format_64bit_bounded_global);
1137 assert(addr->num_components == 4);
1138 return nir_ige(b, nir_channel(b, addr, 2),
1139 nir_iadd_imm(b, nir_channel(b, addr, 3), size));
1140 }
1141
1142 static void
nir_get_explicit_deref_range(nir_deref_instr * deref,nir_address_format addr_format,uint32_t * out_base,uint32_t * out_range)1143 nir_get_explicit_deref_range(nir_deref_instr *deref,
1144 nir_address_format addr_format,
1145 uint32_t *out_base,
1146 uint32_t *out_range)
1147 {
1148 uint32_t base = 0;
1149 uint32_t range = glsl_get_explicit_size(deref->type, false);
1150
1151 while (true) {
1152 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1153
1154 switch (deref->deref_type) {
1155 case nir_deref_type_array:
1156 case nir_deref_type_array_wildcard:
1157 case nir_deref_type_ptr_as_array: {
1158 const unsigned stride = nir_deref_instr_array_stride(deref);
1159 if (stride == 0)
1160 goto fail;
1161
1162 if (!parent)
1163 goto fail;
1164
1165 if (deref->deref_type != nir_deref_type_array_wildcard &&
1166 nir_src_is_const(deref->arr.index)) {
1167 base += stride * nir_src_as_uint(deref->arr.index);
1168 } else {
1169 if (glsl_get_length(parent->type) == 0)
1170 goto fail;
1171 range += stride * (glsl_get_length(parent->type) - 1);
1172 }
1173 break;
1174 }
1175
1176 case nir_deref_type_struct: {
1177 if (!parent)
1178 goto fail;
1179
1180 base += glsl_get_struct_field_offset(parent->type, deref->strct.index);
1181 break;
1182 }
1183
1184 case nir_deref_type_cast: {
1185 nir_instr *parent_instr = deref->parent.ssa->parent_instr;
1186
1187 switch (parent_instr->type) {
1188 case nir_instr_type_load_const: {
1189 nir_load_const_instr *load = nir_instr_as_load_const(parent_instr);
1190
1191 switch (addr_format) {
1192 case nir_address_format_32bit_offset:
1193 base += load->value[1].u32;
1194 break;
1195 case nir_address_format_32bit_index_offset:
1196 base += load->value[1].u32;
1197 break;
1198 case nir_address_format_vec2_index_32bit_offset:
1199 base += load->value[2].u32;
1200 break;
1201 default:
1202 goto fail;
1203 }
1204
1205 *out_base = base;
1206 *out_range = range;
1207 return;
1208 }
1209
1210 case nir_instr_type_intrinsic: {
1211 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr);
1212 switch (intr->intrinsic) {
1213 case nir_intrinsic_load_vulkan_descriptor:
1214 /* Assume that a load_vulkan_descriptor won't contribute to an
1215 * offset within the resource.
1216 */
1217 break;
1218 default:
1219 goto fail;
1220 }
1221
1222 *out_base = base;
1223 *out_range = range;
1224 return;
1225 }
1226
1227 default:
1228 goto fail;
1229 }
1230 }
1231
1232 default:
1233 goto fail;
1234 }
1235
1236 deref = parent;
1237 }
1238
1239 fail:
1240 *out_base = 0;
1241 *out_range = ~0;
1242 }
1243
1244 static nir_variable_mode
canonicalize_generic_modes(nir_variable_mode modes)1245 canonicalize_generic_modes(nir_variable_mode modes)
1246 {
1247 assert(modes != 0);
1248 if (util_bitcount(modes) == 1)
1249 return modes;
1250
1251 assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp |
1252 nir_var_mem_shared | nir_var_mem_global)));
1253
1254 /* Canonicalize by converting shader_temp to function_temp */
1255 if (modes & nir_var_shader_temp) {
1256 modes &= ~nir_var_shader_temp;
1257 modes |= nir_var_function_temp;
1258 }
1259
1260 return modes;
1261 }
1262
1263 static nir_ssa_def *
build_explicit_io_load(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,unsigned num_components)1264 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
1265 nir_ssa_def *addr, nir_address_format addr_format,
1266 nir_variable_mode modes,
1267 uint32_t align_mul, uint32_t align_offset,
1268 unsigned num_components)
1269 {
1270 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1271 modes = canonicalize_generic_modes(modes);
1272
1273 if (util_bitcount(modes) > 1) {
1274 if (addr_format_is_global(addr_format, modes)) {
1275 return build_explicit_io_load(b, intrin, addr, addr_format,
1276 nir_var_mem_global,
1277 align_mul, align_offset,
1278 num_components);
1279 } else if (modes & nir_var_function_temp) {
1280 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1281 nir_var_function_temp));
1282 nir_ssa_def *res1 =
1283 build_explicit_io_load(b, intrin, addr, addr_format,
1284 nir_var_function_temp,
1285 align_mul, align_offset,
1286 num_components);
1287 nir_push_else(b, NULL);
1288 nir_ssa_def *res2 =
1289 build_explicit_io_load(b, intrin, addr, addr_format,
1290 modes & ~nir_var_function_temp,
1291 align_mul, align_offset,
1292 num_components);
1293 nir_pop_if(b, NULL);
1294 return nir_if_phi(b, res1, res2);
1295 } else {
1296 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1297 nir_var_mem_shared));
1298 assert(modes & nir_var_mem_shared);
1299 nir_ssa_def *res1 =
1300 build_explicit_io_load(b, intrin, addr, addr_format,
1301 nir_var_mem_shared,
1302 align_mul, align_offset,
1303 num_components);
1304 nir_push_else(b, NULL);
1305 assert(modes & nir_var_mem_global);
1306 nir_ssa_def *res2 =
1307 build_explicit_io_load(b, intrin, addr, addr_format,
1308 nir_var_mem_global,
1309 align_mul, align_offset,
1310 num_components);
1311 nir_pop_if(b, NULL);
1312 return nir_if_phi(b, res1, res2);
1313 }
1314 }
1315
1316 assert(util_bitcount(modes) == 1);
1317 const nir_variable_mode mode = modes;
1318
1319 nir_intrinsic_op op;
1320 switch (intrin->intrinsic) {
1321 case nir_intrinsic_load_deref:
1322 switch (mode) {
1323 case nir_var_mem_ubo:
1324 if (addr_format == nir_address_format_64bit_global_32bit_offset)
1325 op = nir_intrinsic_load_global_constant_offset;
1326 else if (addr_format == nir_address_format_64bit_bounded_global)
1327 op = nir_intrinsic_load_global_constant_bounded;
1328 else if (addr_format_is_global(addr_format, mode))
1329 op = nir_intrinsic_load_global_constant;
1330 else
1331 op = nir_intrinsic_load_ubo;
1332 break;
1333 case nir_var_mem_ssbo:
1334 if (addr_format_is_global(addr_format, mode))
1335 op = nir_intrinsic_load_global;
1336 else
1337 op = nir_intrinsic_load_ssbo;
1338 break;
1339 case nir_var_mem_global:
1340 assert(addr_format_is_global(addr_format, mode));
1341 op = nir_intrinsic_load_global;
1342 break;
1343 case nir_var_uniform:
1344 assert(addr_format_is_offset(addr_format, mode));
1345 assert(b->shader->info.stage == MESA_SHADER_KERNEL);
1346 op = nir_intrinsic_load_kernel_input;
1347 break;
1348 case nir_var_mem_shared:
1349 assert(addr_format_is_offset(addr_format, mode));
1350 op = nir_intrinsic_load_shared;
1351 break;
1352 case nir_var_mem_task_payload:
1353 assert(addr_format_is_offset(addr_format, mode));
1354 op = nir_intrinsic_load_task_payload;
1355 break;
1356 case nir_var_shader_temp:
1357 case nir_var_function_temp:
1358 if (addr_format_is_offset(addr_format, mode)) {
1359 op = nir_intrinsic_load_scratch;
1360 } else {
1361 assert(addr_format_is_global(addr_format, mode));
1362 op = nir_intrinsic_load_global;
1363 }
1364 break;
1365 case nir_var_mem_push_const:
1366 assert(addr_format == nir_address_format_32bit_offset);
1367 op = nir_intrinsic_load_push_constant;
1368 break;
1369 case nir_var_mem_constant:
1370 if (addr_format_is_offset(addr_format, mode)) {
1371 op = nir_intrinsic_load_constant;
1372 } else {
1373 assert(addr_format_is_global(addr_format, mode));
1374 op = nir_intrinsic_load_global_constant;
1375 }
1376 break;
1377 default:
1378 unreachable("Unsupported explicit IO variable mode");
1379 }
1380 break;
1381
1382 case nir_intrinsic_load_deref_block_intel:
1383 switch (mode) {
1384 case nir_var_mem_ssbo:
1385 if (addr_format_is_global(addr_format, mode))
1386 op = nir_intrinsic_load_global_block_intel;
1387 else
1388 op = nir_intrinsic_load_ssbo_block_intel;
1389 break;
1390 case nir_var_mem_global:
1391 op = nir_intrinsic_load_global_block_intel;
1392 break;
1393 case nir_var_mem_shared:
1394 op = nir_intrinsic_load_shared_block_intel;
1395 break;
1396 default:
1397 unreachable("Unsupported explicit IO variable mode");
1398 }
1399 break;
1400
1401 default:
1402 unreachable("Invalid intrinsic");
1403 }
1404
1405 nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
1406
1407 if (op == nir_intrinsic_load_global_constant_offset) {
1408 assert(addr_format == nir_address_format_64bit_global_32bit_offset);
1409 load->src[0] = nir_src_for_ssa(
1410 nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)));
1411 load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1412 } else if (op == nir_intrinsic_load_global_constant_bounded) {
1413 assert(addr_format == nir_address_format_64bit_bounded_global);
1414 load->src[0] = nir_src_for_ssa(
1415 nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)));
1416 load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1417 load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2));
1418 } else if (addr_format_is_global(addr_format, mode)) {
1419 load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1420 } else if (addr_format_is_offset(addr_format, mode)) {
1421 assert(addr->num_components == 1);
1422 load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1423 } else {
1424 load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1425 load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1426 }
1427
1428 if (nir_intrinsic_has_access(load))
1429 nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
1430
1431 if (op == nir_intrinsic_load_constant) {
1432 nir_intrinsic_set_base(load, 0);
1433 nir_intrinsic_set_range(load, b->shader->constant_data_size);
1434 } else if (mode == nir_var_mem_push_const) {
1435 /* Push constants are required to be able to be chased back to the
1436 * variable so we can provide a base/range.
1437 */
1438 nir_variable *var = nir_deref_instr_get_variable(deref);
1439 nir_intrinsic_set_base(load, 0);
1440 nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false));
1441 }
1442
1443 unsigned bit_size = intrin->dest.ssa.bit_size;
1444 if (bit_size == 1) {
1445 /* TODO: Make the native bool bit_size an option. */
1446 bit_size = 32;
1447 }
1448
1449 if (nir_intrinsic_has_align(load))
1450 nir_intrinsic_set_align(load, align_mul, align_offset);
1451
1452 if (nir_intrinsic_has_range_base(load)) {
1453 unsigned base, range;
1454 nir_get_explicit_deref_range(deref, addr_format, &base, &range);
1455 nir_intrinsic_set_range_base(load, base);
1456 nir_intrinsic_set_range(load, range);
1457 }
1458
1459 assert(intrin->dest.is_ssa);
1460 load->num_components = num_components;
1461 nir_ssa_dest_init(&load->instr, &load->dest, num_components,
1462 bit_size, NULL);
1463
1464 assert(bit_size % 8 == 0);
1465
1466 nir_ssa_def *result;
1467 if (addr_format_needs_bounds_check(addr_format) &&
1468 op != nir_intrinsic_load_global_constant_bounded) {
1469 /* We don't need to bounds-check global_constant_bounded because bounds
1470 * checking is handled by the intrinsic itself.
1471 *
1472 * The Vulkan spec for robustBufferAccess gives us quite a few options
1473 * as to what we can do with an OOB read. Unfortunately, returning
1474 * undefined values isn't one of them so we return an actual zero.
1475 */
1476 nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
1477
1478 /* TODO: Better handle block_intel. */
1479 const unsigned load_size = (bit_size / 8) * load->num_components;
1480 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
1481
1482 nir_builder_instr_insert(b, &load->instr);
1483
1484 nir_pop_if(b, NULL);
1485
1486 result = nir_if_phi(b, &load->dest.ssa, zero);
1487 } else {
1488 nir_builder_instr_insert(b, &load->instr);
1489 result = &load->dest.ssa;
1490 }
1491
1492 if (intrin->dest.ssa.bit_size == 1) {
1493 /* For shared, we can go ahead and use NIR's and/or the back-end's
1494 * standard encoding for booleans rather than forcing a 0/1 boolean.
1495 * This should save an instruction or two.
1496 */
1497 if (mode == nir_var_mem_shared ||
1498 mode == nir_var_shader_temp ||
1499 mode == nir_var_function_temp)
1500 result = nir_b2b1(b, result);
1501 else
1502 result = nir_i2b(b, result);
1503 }
1504
1505 return result;
1506 }
1507
1508 static void
build_explicit_io_store(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,nir_ssa_def * value,nir_component_mask_t write_mask)1509 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
1510 nir_ssa_def *addr, nir_address_format addr_format,
1511 nir_variable_mode modes,
1512 uint32_t align_mul, uint32_t align_offset,
1513 nir_ssa_def *value, nir_component_mask_t write_mask)
1514 {
1515 modes = canonicalize_generic_modes(modes);
1516
1517 if (util_bitcount(modes) > 1) {
1518 if (addr_format_is_global(addr_format, modes)) {
1519 build_explicit_io_store(b, intrin, addr, addr_format,
1520 nir_var_mem_global,
1521 align_mul, align_offset,
1522 value, write_mask);
1523 } else if (modes & nir_var_function_temp) {
1524 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1525 nir_var_function_temp));
1526 build_explicit_io_store(b, intrin, addr, addr_format,
1527 nir_var_function_temp,
1528 align_mul, align_offset,
1529 value, write_mask);
1530 nir_push_else(b, NULL);
1531 build_explicit_io_store(b, intrin, addr, addr_format,
1532 modes & ~nir_var_function_temp,
1533 align_mul, align_offset,
1534 value, write_mask);
1535 nir_pop_if(b, NULL);
1536 } else {
1537 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1538 nir_var_mem_shared));
1539 assert(modes & nir_var_mem_shared);
1540 build_explicit_io_store(b, intrin, addr, addr_format,
1541 nir_var_mem_shared,
1542 align_mul, align_offset,
1543 value, write_mask);
1544 nir_push_else(b, NULL);
1545 assert(modes & nir_var_mem_global);
1546 build_explicit_io_store(b, intrin, addr, addr_format,
1547 nir_var_mem_global,
1548 align_mul, align_offset,
1549 value, write_mask);
1550 nir_pop_if(b, NULL);
1551 }
1552 return;
1553 }
1554
1555 assert(util_bitcount(modes) == 1);
1556 const nir_variable_mode mode = modes;
1557
1558 nir_intrinsic_op op;
1559 switch (intrin->intrinsic) {
1560 case nir_intrinsic_store_deref:
1561 assert(write_mask != 0);
1562
1563 switch (mode) {
1564 case nir_var_mem_ssbo:
1565 if (addr_format_is_global(addr_format, mode))
1566 op = nir_intrinsic_store_global;
1567 else
1568 op = nir_intrinsic_store_ssbo;
1569 break;
1570 case nir_var_mem_global:
1571 assert(addr_format_is_global(addr_format, mode));
1572 op = nir_intrinsic_store_global;
1573 break;
1574 case nir_var_mem_shared:
1575 assert(addr_format_is_offset(addr_format, mode));
1576 op = nir_intrinsic_store_shared;
1577 break;
1578 case nir_var_mem_task_payload:
1579 assert(addr_format_is_offset(addr_format, mode));
1580 op = nir_intrinsic_store_task_payload;
1581 break;
1582 case nir_var_shader_temp:
1583 case nir_var_function_temp:
1584 if (addr_format_is_offset(addr_format, mode)) {
1585 op = nir_intrinsic_store_scratch;
1586 } else {
1587 assert(addr_format_is_global(addr_format, mode));
1588 op = nir_intrinsic_store_global;
1589 }
1590 break;
1591 default:
1592 unreachable("Unsupported explicit IO variable mode");
1593 }
1594 break;
1595
1596 case nir_intrinsic_store_deref_block_intel:
1597 assert(write_mask == 0);
1598
1599 switch (mode) {
1600 case nir_var_mem_ssbo:
1601 if (addr_format_is_global(addr_format, mode))
1602 op = nir_intrinsic_store_global_block_intel;
1603 else
1604 op = nir_intrinsic_store_ssbo_block_intel;
1605 break;
1606 case nir_var_mem_global:
1607 op = nir_intrinsic_store_global_block_intel;
1608 break;
1609 case nir_var_mem_shared:
1610 op = nir_intrinsic_store_shared_block_intel;
1611 break;
1612 default:
1613 unreachable("Unsupported explicit IO variable mode");
1614 }
1615 break;
1616
1617 default:
1618 unreachable("Invalid intrinsic");
1619 }
1620
1621 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
1622
1623 if (value->bit_size == 1) {
1624 /* For shared, we can go ahead and use NIR's and/or the back-end's
1625 * standard encoding for booleans rather than forcing a 0/1 boolean.
1626 * This should save an instruction or two.
1627 *
1628 * TODO: Make the native bool bit_size an option.
1629 */
1630 if (mode == nir_var_mem_shared ||
1631 mode == nir_var_shader_temp ||
1632 mode == nir_var_function_temp)
1633 value = nir_b2b32(b, value);
1634 else
1635 value = nir_b2i(b, value, 32);
1636 }
1637
1638 store->src[0] = nir_src_for_ssa(value);
1639 if (addr_format_is_global(addr_format, mode)) {
1640 store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1641 } else if (addr_format_is_offset(addr_format, mode)) {
1642 assert(addr->num_components == 1);
1643 store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1644 } else {
1645 store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1646 store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1647 }
1648
1649 nir_intrinsic_set_write_mask(store, write_mask);
1650
1651 if (nir_intrinsic_has_access(store))
1652 nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
1653
1654 nir_intrinsic_set_align(store, align_mul, align_offset);
1655
1656 assert(value->num_components == 1 ||
1657 value->num_components == intrin->num_components);
1658 store->num_components = value->num_components;
1659
1660 assert(value->bit_size % 8 == 0);
1661
1662 if (addr_format_needs_bounds_check(addr_format)) {
1663 /* TODO: Better handle block_intel. */
1664 const unsigned store_size = (value->bit_size / 8) * store->num_components;
1665 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
1666
1667 nir_builder_instr_insert(b, &store->instr);
1668
1669 nir_pop_if(b, NULL);
1670 } else {
1671 nir_builder_instr_insert(b, &store->instr);
1672 }
1673 }
1674
1675 static nir_ssa_def *
build_explicit_io_atomic(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes)1676 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
1677 nir_ssa_def *addr, nir_address_format addr_format,
1678 nir_variable_mode modes)
1679 {
1680 modes = canonicalize_generic_modes(modes);
1681
1682 if (util_bitcount(modes) > 1) {
1683 if (addr_format_is_global(addr_format, modes)) {
1684 return build_explicit_io_atomic(b, intrin, addr, addr_format,
1685 nir_var_mem_global);
1686 } else if (modes & nir_var_function_temp) {
1687 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1688 nir_var_function_temp));
1689 nir_ssa_def *res1 =
1690 build_explicit_io_atomic(b, intrin, addr, addr_format,
1691 nir_var_function_temp);
1692 nir_push_else(b, NULL);
1693 nir_ssa_def *res2 =
1694 build_explicit_io_atomic(b, intrin, addr, addr_format,
1695 modes & ~nir_var_function_temp);
1696 nir_pop_if(b, NULL);
1697 return nir_if_phi(b, res1, res2);
1698 } else {
1699 nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1700 nir_var_mem_shared));
1701 assert(modes & nir_var_mem_shared);
1702 nir_ssa_def *res1 =
1703 build_explicit_io_atomic(b, intrin, addr, addr_format,
1704 nir_var_mem_shared);
1705 nir_push_else(b, NULL);
1706 assert(modes & nir_var_mem_global);
1707 nir_ssa_def *res2 =
1708 build_explicit_io_atomic(b, intrin, addr, addr_format,
1709 nir_var_mem_global);
1710 nir_pop_if(b, NULL);
1711 return nir_if_phi(b, res1, res2);
1712 }
1713 }
1714
1715 assert(util_bitcount(modes) == 1);
1716 const nir_variable_mode mode = modes;
1717
1718 const unsigned num_data_srcs =
1719 nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
1720
1721 nir_intrinsic_op op;
1722 switch (mode) {
1723 case nir_var_mem_ssbo:
1724 if (addr_format_is_global(addr_format, mode))
1725 op = global_atomic_for_deref(intrin->intrinsic);
1726 else
1727 op = ssbo_atomic_for_deref(intrin->intrinsic);
1728 break;
1729 case nir_var_mem_global:
1730 assert(addr_format_is_global(addr_format, mode));
1731 op = global_atomic_for_deref(intrin->intrinsic);
1732 break;
1733 case nir_var_mem_shared:
1734 assert(addr_format_is_offset(addr_format, mode));
1735 op = shared_atomic_for_deref(intrin->intrinsic);
1736 break;
1737 default:
1738 unreachable("Unsupported explicit IO variable mode");
1739 }
1740
1741 nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
1742
1743 unsigned src = 0;
1744 if (addr_format_is_global(addr_format, mode)) {
1745 atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1746 } else if (addr_format_is_offset(addr_format, mode)) {
1747 assert(addr->num_components == 1);
1748 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1749 } else {
1750 atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1751 atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1752 }
1753 for (unsigned i = 0; i < num_data_srcs; i++) {
1754 atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
1755 }
1756
1757 /* Global atomics don't have access flags because they assume that the
1758 * address may be non-uniform.
1759 */
1760 if (nir_intrinsic_has_access(atomic))
1761 nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
1762
1763 assert(intrin->dest.ssa.num_components == 1);
1764 nir_ssa_dest_init(&atomic->instr, &atomic->dest,
1765 1, intrin->dest.ssa.bit_size, NULL);
1766
1767 assert(atomic->dest.ssa.bit_size % 8 == 0);
1768
1769 if (addr_format_needs_bounds_check(addr_format)) {
1770 const unsigned atomic_size = atomic->dest.ssa.bit_size / 8;
1771 nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
1772
1773 nir_builder_instr_insert(b, &atomic->instr);
1774
1775 nir_pop_if(b, NULL);
1776 return nir_if_phi(b, &atomic->dest.ssa,
1777 nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size));
1778 } else {
1779 nir_builder_instr_insert(b, &atomic->instr);
1780 return &atomic->dest.ssa;
1781 }
1782 }
1783
1784 nir_ssa_def *
nir_explicit_io_address_from_deref(nir_builder * b,nir_deref_instr * deref,nir_ssa_def * base_addr,nir_address_format addr_format)1785 nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
1786 nir_ssa_def *base_addr,
1787 nir_address_format addr_format)
1788 {
1789 assert(deref->dest.is_ssa);
1790 switch (deref->deref_type) {
1791 case nir_deref_type_var:
1792 return build_addr_for_var(b, deref->var, addr_format);
1793
1794 case nir_deref_type_array: {
1795 unsigned stride = nir_deref_instr_array_stride(deref);
1796 assert(stride > 0);
1797
1798 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
1799 index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format));
1800 return build_addr_iadd(b, base_addr, addr_format, deref->modes,
1801 nir_amul_imm(b, index, stride));
1802 }
1803
1804 case nir_deref_type_ptr_as_array: {
1805 nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
1806 index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format));
1807 unsigned stride = nir_deref_instr_array_stride(deref);
1808 return build_addr_iadd(b, base_addr, addr_format, deref->modes,
1809 nir_amul_imm(b, index, stride));
1810 }
1811
1812 case nir_deref_type_array_wildcard:
1813 unreachable("Wildcards should be lowered by now");
1814 break;
1815
1816 case nir_deref_type_struct: {
1817 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1818 int offset = glsl_get_struct_field_offset(parent->type,
1819 deref->strct.index);
1820 assert(offset >= 0);
1821 return build_addr_iadd_imm(b, base_addr, addr_format,
1822 deref->modes, offset);
1823 }
1824
1825 case nir_deref_type_cast:
1826 /* Nothing to do here */
1827 return base_addr;
1828 }
1829
1830 unreachable("Invalid NIR deref type");
1831 }
1832
1833 void
nir_lower_explicit_io_instr(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format)1834 nir_lower_explicit_io_instr(nir_builder *b,
1835 nir_intrinsic_instr *intrin,
1836 nir_ssa_def *addr,
1837 nir_address_format addr_format)
1838 {
1839 b->cursor = nir_after_instr(&intrin->instr);
1840
1841 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1842 unsigned vec_stride = glsl_get_explicit_stride(deref->type);
1843 unsigned scalar_size = type_scalar_size_bytes(deref->type);
1844 assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
1845 assert(vec_stride == 0 || vec_stride >= scalar_size);
1846
1847 uint32_t align_mul, align_offset;
1848 if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) {
1849 /* If we don't have an alignment from the deref, assume scalar */
1850 align_mul = scalar_size;
1851 align_offset = 0;
1852 }
1853
1854 switch (intrin->intrinsic) {
1855 case nir_intrinsic_load_deref: {
1856 nir_ssa_def *value;
1857 if (vec_stride > scalar_size) {
1858 nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { NULL, };
1859 for (unsigned i = 0; i < intrin->num_components; i++) {
1860 unsigned comp_offset = i * vec_stride;
1861 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1862 deref->modes,
1863 comp_offset);
1864 comps[i] = build_explicit_io_load(b, intrin, comp_addr,
1865 addr_format, deref->modes,
1866 align_mul,
1867 (align_offset + comp_offset) %
1868 align_mul,
1869 1);
1870 }
1871 value = nir_vec(b, comps, intrin->num_components);
1872 } else {
1873 value = build_explicit_io_load(b, intrin, addr, addr_format,
1874 deref->modes, align_mul, align_offset,
1875 intrin->num_components);
1876 }
1877 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
1878 break;
1879 }
1880
1881 case nir_intrinsic_store_deref: {
1882 assert(intrin->src[1].is_ssa);
1883 nir_ssa_def *value = intrin->src[1].ssa;
1884 nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
1885 if (vec_stride > scalar_size) {
1886 for (unsigned i = 0; i < intrin->num_components; i++) {
1887 if (!(write_mask & (1 << i)))
1888 continue;
1889
1890 unsigned comp_offset = i * vec_stride;
1891 nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1892 deref->modes,
1893 comp_offset);
1894 build_explicit_io_store(b, intrin, comp_addr, addr_format,
1895 deref->modes, align_mul,
1896 (align_offset + comp_offset) % align_mul,
1897 nir_channel(b, value, i), 1);
1898 }
1899 } else {
1900 build_explicit_io_store(b, intrin, addr, addr_format,
1901 deref->modes, align_mul, align_offset,
1902 value, write_mask);
1903 }
1904 break;
1905 }
1906
1907 case nir_intrinsic_load_deref_block_intel: {
1908 nir_ssa_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
1909 deref->modes,
1910 align_mul, align_offset,
1911 intrin->num_components);
1912 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
1913 break;
1914 }
1915
1916 case nir_intrinsic_store_deref_block_intel: {
1917 assert(intrin->src[1].is_ssa);
1918 nir_ssa_def *value = intrin->src[1].ssa;
1919 const nir_component_mask_t write_mask = 0;
1920 build_explicit_io_store(b, intrin, addr, addr_format,
1921 deref->modes, align_mul, align_offset,
1922 value, write_mask);
1923 break;
1924 }
1925
1926 default: {
1927 nir_ssa_def *value =
1928 build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
1929 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
1930 break;
1931 }
1932 }
1933
1934 nir_instr_remove(&intrin->instr);
1935 }
1936
1937 bool
nir_get_explicit_deref_align(nir_deref_instr * deref,bool default_to_type_align,uint32_t * align_mul,uint32_t * align_offset)1938 nir_get_explicit_deref_align(nir_deref_instr *deref,
1939 bool default_to_type_align,
1940 uint32_t *align_mul,
1941 uint32_t *align_offset)
1942 {
1943 if (deref->deref_type == nir_deref_type_var) {
1944 /* If we see a variable, align_mul is effectively infinite because we
1945 * know the offset exactly (up to the offset of the base pointer for the
1946 * given variable mode). We have to pick something so we choose 256B
1947 * as an arbitrary alignment which seems high enough for any reasonable
1948 * wide-load use-case. Back-ends should clamp alignments down if 256B
1949 * is too large for some reason.
1950 */
1951 *align_mul = 256;
1952 *align_offset = deref->var->data.driver_location % 256;
1953 return true;
1954 }
1955
1956 /* If we're a cast deref that has an alignment, use that. */
1957 if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) {
1958 *align_mul = deref->cast.align_mul;
1959 *align_offset = deref->cast.align_offset;
1960 return true;
1961 }
1962
1963 /* Otherwise, we need to compute the alignment based on the parent */
1964 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1965 if (parent == NULL) {
1966 assert(deref->deref_type == nir_deref_type_cast);
1967 if (default_to_type_align) {
1968 /* If we don't have a parent, assume the type's alignment, if any. */
1969 unsigned type_align = glsl_get_explicit_alignment(deref->type);
1970 if (type_align == 0)
1971 return false;
1972
1973 *align_mul = type_align;
1974 *align_offset = 0;
1975 return true;
1976 } else {
1977 return false;
1978 }
1979 }
1980
1981 uint32_t parent_mul, parent_offset;
1982 if (!nir_get_explicit_deref_align(parent, default_to_type_align,
1983 &parent_mul, &parent_offset))
1984 return false;
1985
1986 switch (deref->deref_type) {
1987 case nir_deref_type_var:
1988 unreachable("Handled above");
1989
1990 case nir_deref_type_array:
1991 case nir_deref_type_array_wildcard:
1992 case nir_deref_type_ptr_as_array: {
1993 const unsigned stride = nir_deref_instr_array_stride(deref);
1994 if (stride == 0)
1995 return false;
1996
1997 if (deref->deref_type != nir_deref_type_array_wildcard &&
1998 nir_src_is_const(deref->arr.index)) {
1999 unsigned offset = nir_src_as_uint(deref->arr.index) * stride;
2000 *align_mul = parent_mul;
2001 *align_offset = (parent_offset + offset) % parent_mul;
2002 } else {
2003 /* If this is a wildcard or an indirect deref, we have to go with the
2004 * power-of-two gcd.
2005 */
2006 *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1));
2007 *align_offset = parent_offset % *align_mul;
2008 }
2009 return true;
2010 }
2011
2012 case nir_deref_type_struct: {
2013 const int offset = glsl_get_struct_field_offset(parent->type,
2014 deref->strct.index);
2015 if (offset < 0)
2016 return false;
2017
2018 *align_mul = parent_mul;
2019 *align_offset = (parent_offset + offset) % parent_mul;
2020 return true;
2021 }
2022
2023 case nir_deref_type_cast:
2024 /* We handled the explicit alignment case above. */
2025 assert(deref->cast.align_mul == 0);
2026 *align_mul = parent_mul;
2027 *align_offset = parent_offset;
2028 return true;
2029 }
2030
2031 unreachable("Invalid deref_instr_type");
2032 }
2033
2034 static void
lower_explicit_io_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format)2035 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
2036 nir_address_format addr_format)
2037 {
2038 /* Just delete the deref if it's not used. We can't use
2039 * nir_deref_instr_remove_if_unused here because it may remove more than
2040 * one deref which could break our list walking since we walk the list
2041 * backwards.
2042 */
2043 assert(list_is_empty(&deref->dest.ssa.if_uses));
2044 if (list_is_empty(&deref->dest.ssa.uses)) {
2045 nir_instr_remove(&deref->instr);
2046 return;
2047 }
2048
2049 b->cursor = nir_after_instr(&deref->instr);
2050
2051 nir_ssa_def *base_addr = NULL;
2052 if (deref->deref_type != nir_deref_type_var) {
2053 assert(deref->parent.is_ssa);
2054 base_addr = deref->parent.ssa;
2055 }
2056
2057 nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
2058 addr_format);
2059 assert(addr->bit_size == deref->dest.ssa.bit_size);
2060 assert(addr->num_components == deref->dest.ssa.num_components);
2061
2062 nir_instr_remove(&deref->instr);
2063 nir_ssa_def_rewrite_uses(&deref->dest.ssa, addr);
2064 }
2065
2066 static void
lower_explicit_io_access(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2067 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
2068 nir_address_format addr_format)
2069 {
2070 assert(intrin->src[0].is_ssa);
2071 nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
2072 }
2073
2074 static void
lower_explicit_io_array_length(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2075 lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
2076 nir_address_format addr_format)
2077 {
2078 b->cursor = nir_after_instr(&intrin->instr);
2079
2080 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2081
2082 assert(glsl_type_is_array(deref->type));
2083 assert(glsl_get_length(deref->type) == 0);
2084 assert(nir_deref_mode_is(deref, nir_var_mem_ssbo));
2085 unsigned stride = glsl_get_explicit_stride(deref->type);
2086 assert(stride > 0);
2087
2088 nir_ssa_def *addr = &deref->dest.ssa;
2089 nir_ssa_def *index = addr_to_index(b, addr, addr_format);
2090 nir_ssa_def *offset = addr_to_offset(b, addr, addr_format);
2091 unsigned access = nir_intrinsic_access(intrin);
2092
2093 nir_ssa_def *arr_size = nir_get_ssbo_size(b, index, .access=access);
2094 arr_size = nir_imax(b, nir_isub(b, arr_size, offset), nir_imm_int(b, 0u));
2095 arr_size = nir_idiv(b, arr_size, nir_imm_int(b, stride));
2096
2097 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, arr_size);
2098 nir_instr_remove(&intrin->instr);
2099 }
2100
2101 static void
lower_explicit_io_mode_check(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2102 lower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin,
2103 nir_address_format addr_format)
2104 {
2105 if (addr_format_is_global(addr_format, 0)) {
2106 /* If the address format is always global, then the driver can use
2107 * global addresses regardless of the mode. In that case, don't create
2108 * a check, just whack the intrinsic to addr_mode_is and delegate to the
2109 * driver lowering.
2110 */
2111 intrin->intrinsic = nir_intrinsic_addr_mode_is;
2112 return;
2113 }
2114
2115 assert(intrin->src[0].is_ssa);
2116 nir_ssa_def *addr = intrin->src[0].ssa;
2117
2118 b->cursor = nir_instr_remove(&intrin->instr);
2119
2120 nir_ssa_def *is_mode =
2121 build_runtime_addr_mode_check(b, addr, addr_format,
2122 nir_intrinsic_memory_modes(intrin));
2123
2124 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, is_mode);
2125 }
2126
2127 static bool
nir_lower_explicit_io_impl(nir_function_impl * impl,nir_variable_mode modes,nir_address_format addr_format)2128 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
2129 nir_address_format addr_format)
2130 {
2131 bool progress = false;
2132
2133 nir_builder b;
2134 nir_builder_init(&b, impl);
2135
2136 /* Walk in reverse order so that we can see the full deref chain when we
2137 * lower the access operations. We lower them assuming that the derefs
2138 * will be turned into address calculations later.
2139 */
2140 nir_foreach_block_reverse(block, impl) {
2141 nir_foreach_instr_reverse_safe(instr, block) {
2142 switch (instr->type) {
2143 case nir_instr_type_deref: {
2144 nir_deref_instr *deref = nir_instr_as_deref(instr);
2145 if (nir_deref_mode_is_in_set(deref, modes)) {
2146 lower_explicit_io_deref(&b, deref, addr_format);
2147 progress = true;
2148 }
2149 break;
2150 }
2151
2152 case nir_instr_type_intrinsic: {
2153 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2154 switch (intrin->intrinsic) {
2155 case nir_intrinsic_load_deref:
2156 case nir_intrinsic_store_deref:
2157 case nir_intrinsic_load_deref_block_intel:
2158 case nir_intrinsic_store_deref_block_intel:
2159 case nir_intrinsic_deref_atomic_add:
2160 case nir_intrinsic_deref_atomic_imin:
2161 case nir_intrinsic_deref_atomic_umin:
2162 case nir_intrinsic_deref_atomic_imax:
2163 case nir_intrinsic_deref_atomic_umax:
2164 case nir_intrinsic_deref_atomic_and:
2165 case nir_intrinsic_deref_atomic_or:
2166 case nir_intrinsic_deref_atomic_xor:
2167 case nir_intrinsic_deref_atomic_exchange:
2168 case nir_intrinsic_deref_atomic_comp_swap:
2169 case nir_intrinsic_deref_atomic_fadd:
2170 case nir_intrinsic_deref_atomic_fmin:
2171 case nir_intrinsic_deref_atomic_fmax:
2172 case nir_intrinsic_deref_atomic_fcomp_swap: {
2173 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2174 if (nir_deref_mode_is_in_set(deref, modes)) {
2175 lower_explicit_io_access(&b, intrin, addr_format);
2176 progress = true;
2177 }
2178 break;
2179 }
2180
2181 case nir_intrinsic_deref_buffer_array_length: {
2182 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2183 if (nir_deref_mode_is_in_set(deref, modes)) {
2184 lower_explicit_io_array_length(&b, intrin, addr_format);
2185 progress = true;
2186 }
2187 break;
2188 }
2189
2190 case nir_intrinsic_deref_mode_is: {
2191 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2192 if (nir_deref_mode_is_in_set(deref, modes)) {
2193 lower_explicit_io_mode_check(&b, intrin, addr_format);
2194 progress = true;
2195 }
2196 break;
2197 }
2198
2199 default:
2200 break;
2201 }
2202 break;
2203 }
2204
2205 default:
2206 /* Nothing to do */
2207 break;
2208 }
2209 }
2210 }
2211
2212 if (progress) {
2213 nir_metadata_preserve(impl, nir_metadata_block_index |
2214 nir_metadata_dominance);
2215 } else {
2216 nir_metadata_preserve(impl, nir_metadata_all);
2217 }
2218
2219 return progress;
2220 }
2221
2222 /** Lower explicitly laid out I/O access to byte offset/address intrinsics
2223 *
2224 * This pass is intended to be used for any I/O which touches memory external
2225 * to the shader or which is directly visible to the client. It requires that
2226 * all data types in the given modes have a explicit stride/offset decorations
2227 * to tell it exactly how to calculate the offset/address for the given load,
2228 * store, or atomic operation. If the offset/stride information does not come
2229 * from the client explicitly (as with shared variables in GL or Vulkan),
2230 * nir_lower_vars_to_explicit_types() can be used to add them.
2231 *
2232 * Unlike nir_lower_io, this pass is fully capable of handling incomplete
2233 * pointer chains which may contain cast derefs. It does so by walking the
2234 * deref chain backwards and simply replacing each deref, one at a time, with
2235 * the appropriate address calculation. The pass takes a nir_address_format
2236 * parameter which describes how the offset or address is to be represented
2237 * during calculations. By ensuring that the address is always in a
2238 * consistent format, pointers can safely be conjured from thin air by the
2239 * driver, stored to variables, passed through phis, etc.
2240 *
2241 * The one exception to the simple algorithm described above is for handling
2242 * row-major matrices in which case we may look down one additional level of
2243 * the deref chain.
2244 *
2245 * This pass is also capable of handling OpenCL generic pointers. If the
2246 * address mode is global, it will lower any ambiguous (more than one mode)
2247 * access to global and pass through the deref_mode_is run-time checks as
2248 * addr_mode_is. This assumes the driver has somehow mapped shared and
2249 * scratch memory to the global address space. For other modes such as
2250 * 62bit_generic, there is an enum embedded in the address and we lower
2251 * ambiguous access to an if-ladder and deref_mode_is to a check against the
2252 * embedded enum. If nir_lower_explicit_io is called on any shader that
2253 * contains generic pointers, it must either be used on all of the generic
2254 * modes or none.
2255 */
2256 bool
nir_lower_explicit_io(nir_shader * shader,nir_variable_mode modes,nir_address_format addr_format)2257 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
2258 nir_address_format addr_format)
2259 {
2260 bool progress = false;
2261
2262 nir_foreach_function(function, shader) {
2263 if (function->impl &&
2264 nir_lower_explicit_io_impl(function->impl, modes, addr_format))
2265 progress = true;
2266 }
2267
2268 return progress;
2269 }
2270
2271 static bool
nir_lower_vars_to_explicit_types_impl(nir_function_impl * impl,nir_variable_mode modes,glsl_type_size_align_func type_info)2272 nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
2273 nir_variable_mode modes,
2274 glsl_type_size_align_func type_info)
2275 {
2276 bool progress = false;
2277
2278 nir_foreach_block(block, impl) {
2279 nir_foreach_instr(instr, block) {
2280 if (instr->type != nir_instr_type_deref)
2281 continue;
2282
2283 nir_deref_instr *deref = nir_instr_as_deref(instr);
2284 if (!nir_deref_mode_is_in_set(deref, modes))
2285 continue;
2286
2287 unsigned size, alignment;
2288 const struct glsl_type *new_type =
2289 glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
2290 if (new_type != deref->type) {
2291 progress = true;
2292 deref->type = new_type;
2293 }
2294 if (deref->deref_type == nir_deref_type_cast) {
2295 /* See also glsl_type::get_explicit_type_for_size_align() */
2296 unsigned new_stride = align(size, alignment);
2297 if (new_stride != deref->cast.ptr_stride) {
2298 deref->cast.ptr_stride = new_stride;
2299 progress = true;
2300 }
2301 }
2302 }
2303 }
2304
2305 if (progress) {
2306 nir_metadata_preserve(impl, nir_metadata_block_index |
2307 nir_metadata_dominance |
2308 nir_metadata_live_ssa_defs |
2309 nir_metadata_loop_analysis);
2310 } else {
2311 nir_metadata_preserve(impl, nir_metadata_all);
2312 }
2313
2314 return progress;
2315 }
2316
2317 static bool
lower_vars_to_explicit(nir_shader * shader,struct exec_list * vars,nir_variable_mode mode,glsl_type_size_align_func type_info)2318 lower_vars_to_explicit(nir_shader *shader,
2319 struct exec_list *vars, nir_variable_mode mode,
2320 glsl_type_size_align_func type_info)
2321 {
2322 bool progress = false;
2323 unsigned offset;
2324 switch (mode) {
2325 case nir_var_uniform:
2326 assert(shader->info.stage == MESA_SHADER_KERNEL);
2327 offset = 0;
2328 break;
2329 case nir_var_function_temp:
2330 case nir_var_shader_temp:
2331 offset = shader->scratch_size;
2332 break;
2333 case nir_var_mem_shared:
2334 offset = shader->info.shared_size;
2335 break;
2336 case nir_var_mem_task_payload:
2337 offset = shader->info.task_payload_size;
2338 break;
2339 case nir_var_mem_global:
2340 offset = shader->global_mem_size;
2341 break;
2342 case nir_var_mem_constant:
2343 offset = shader->constant_data_size;
2344 break;
2345 case nir_var_shader_call_data:
2346 case nir_var_ray_hit_attrib:
2347 offset = 0;
2348 break;
2349 default:
2350 unreachable("Unsupported mode");
2351 }
2352 nir_foreach_variable_in_list(var, vars) {
2353 if (var->data.mode != mode)
2354 continue;
2355
2356 unsigned size, align;
2357 const struct glsl_type *explicit_type =
2358 glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align);
2359
2360 if (explicit_type != var->type)
2361 var->type = explicit_type;
2362
2363 UNUSED bool is_empty_struct =
2364 glsl_type_is_struct_or_ifc(explicit_type) &&
2365 glsl_get_length(explicit_type) == 0;
2366
2367 assert(util_is_power_of_two_nonzero(align) || is_empty_struct);
2368 var->data.driver_location = ALIGN_POT(offset, align);
2369 offset = var->data.driver_location + size;
2370 progress = true;
2371 }
2372
2373 switch (mode) {
2374 case nir_var_uniform:
2375 assert(shader->info.stage == MESA_SHADER_KERNEL);
2376 shader->num_uniforms = offset;
2377 break;
2378 case nir_var_shader_temp:
2379 case nir_var_function_temp:
2380 shader->scratch_size = offset;
2381 break;
2382 case nir_var_mem_shared:
2383 shader->info.shared_size = offset;
2384 break;
2385 case nir_var_mem_task_payload:
2386 shader->info.task_payload_size = offset;
2387 break;
2388 case nir_var_mem_global:
2389 shader->global_mem_size = offset;
2390 break;
2391 case nir_var_mem_constant:
2392 shader->constant_data_size = offset;
2393 break;
2394 case nir_var_shader_call_data:
2395 case nir_var_ray_hit_attrib:
2396 break;
2397 default:
2398 unreachable("Unsupported mode");
2399 }
2400
2401 return progress;
2402 }
2403
2404 /* If nir_lower_vars_to_explicit_types is called on any shader that contains
2405 * generic pointers, it must either be used on all of the generic modes or
2406 * none.
2407 */
2408 bool
nir_lower_vars_to_explicit_types(nir_shader * shader,nir_variable_mode modes,glsl_type_size_align_func type_info)2409 nir_lower_vars_to_explicit_types(nir_shader *shader,
2410 nir_variable_mode modes,
2411 glsl_type_size_align_func type_info)
2412 {
2413 /* TODO: Situations which need to be handled to support more modes:
2414 * - row-major matrices
2415 * - compact shader inputs/outputs
2416 * - interface types
2417 */
2418 ASSERTED nir_variable_mode supported =
2419 nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant |
2420 nir_var_shader_temp | nir_var_function_temp | nir_var_uniform |
2421 nir_var_shader_call_data | nir_var_ray_hit_attrib |
2422 nir_var_mem_task_payload;
2423 assert(!(modes & ~supported) && "unsupported");
2424
2425 bool progress = false;
2426
2427 if (modes & nir_var_uniform)
2428 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info);
2429 if (modes & nir_var_mem_global)
2430 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_global, type_info);
2431
2432 if (modes & nir_var_mem_shared) {
2433 assert(!shader->info.shared_memory_explicit_layout);
2434 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info);
2435 }
2436
2437 if (modes & nir_var_shader_temp)
2438 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info);
2439 if (modes & nir_var_mem_constant)
2440 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info);
2441 if (modes & nir_var_shader_call_data)
2442 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info);
2443 if (modes & nir_var_ray_hit_attrib)
2444 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info);
2445 if (modes & nir_var_mem_task_payload)
2446 progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_task_payload, type_info);
2447
2448 nir_foreach_function(function, shader) {
2449 if (function->impl) {
2450 if (modes & nir_var_function_temp)
2451 progress |= lower_vars_to_explicit(shader, &function->impl->locals, nir_var_function_temp, type_info);
2452
2453 progress |= nir_lower_vars_to_explicit_types_impl(function->impl, modes, type_info);
2454 }
2455 }
2456
2457 return progress;
2458 }
2459
2460 static void
write_constant(void * dst,size_t dst_size,const nir_constant * c,const struct glsl_type * type)2461 write_constant(void *dst, size_t dst_size,
2462 const nir_constant *c, const struct glsl_type *type)
2463 {
2464 if (glsl_type_is_vector_or_scalar(type)) {
2465 const unsigned num_components = glsl_get_vector_elements(type);
2466 const unsigned bit_size = glsl_get_bit_size(type);
2467 if (bit_size == 1) {
2468 /* Booleans are special-cased to be 32-bit
2469 *
2470 * TODO: Make the native bool bit_size an option.
2471 */
2472 assert(num_components * 4 <= dst_size);
2473 for (unsigned i = 0; i < num_components; i++) {
2474 int32_t b32 = -(int)c->values[i].b;
2475 memcpy((char *)dst + i * 4, &b32, 4);
2476 }
2477 } else {
2478 assert(bit_size >= 8 && bit_size % 8 == 0);
2479 const unsigned byte_size = bit_size / 8;
2480 assert(num_components * byte_size <= dst_size);
2481 for (unsigned i = 0; i < num_components; i++) {
2482 /* Annoyingly, thanks to packed structs, we can't make any
2483 * assumptions about the alignment of dst. To avoid any strange
2484 * issues with unaligned writes, we always use memcpy.
2485 */
2486 memcpy((char *)dst + i * byte_size, &c->values[i], byte_size);
2487 }
2488 }
2489 } else if (glsl_type_is_array_or_matrix(type)) {
2490 const unsigned array_len = glsl_get_length(type);
2491 const unsigned stride = glsl_get_explicit_stride(type);
2492 assert(stride > 0);
2493 const struct glsl_type *elem_type = glsl_get_array_element(type);
2494 for (unsigned i = 0; i < array_len; i++) {
2495 unsigned elem_offset = i * stride;
2496 assert(elem_offset < dst_size);
2497 write_constant((char *)dst + elem_offset, dst_size - elem_offset,
2498 c->elements[i], elem_type);
2499 }
2500 } else {
2501 assert(glsl_type_is_struct_or_ifc(type));
2502 const unsigned num_fields = glsl_get_length(type);
2503 for (unsigned i = 0; i < num_fields; i++) {
2504 const int field_offset = glsl_get_struct_field_offset(type, i);
2505 assert(field_offset >= 0 && field_offset < dst_size);
2506 const struct glsl_type *field_type = glsl_get_struct_field(type, i);
2507 write_constant((char *)dst + field_offset, dst_size - field_offset,
2508 c->elements[i], field_type);
2509 }
2510 }
2511 }
2512
2513 void
nir_gather_explicit_io_initializers(nir_shader * shader,void * dst,size_t dst_size,nir_variable_mode mode)2514 nir_gather_explicit_io_initializers(nir_shader *shader,
2515 void *dst, size_t dst_size,
2516 nir_variable_mode mode)
2517 {
2518 /* It doesn't really make sense to gather initializers for more than one
2519 * mode at a time. If this ever becomes well-defined, we can drop the
2520 * assert then.
2521 */
2522 assert(util_bitcount(mode) == 1);
2523
2524 nir_foreach_variable_with_modes(var, shader, mode) {
2525 assert(var->data.driver_location < dst_size);
2526 write_constant((char *)dst + var->data.driver_location,
2527 dst_size - var->data.driver_location,
2528 var->constant_initializer, var->type);
2529 }
2530 }
2531
2532 /**
2533 * Return the offset source for a load/store intrinsic.
2534 */
2535 nir_src *
nir_get_io_offset_src(nir_intrinsic_instr * instr)2536 nir_get_io_offset_src(nir_intrinsic_instr *instr)
2537 {
2538 switch (instr->intrinsic) {
2539 case nir_intrinsic_load_input:
2540 case nir_intrinsic_load_output:
2541 case nir_intrinsic_load_shared:
2542 case nir_intrinsic_load_task_payload:
2543 case nir_intrinsic_load_uniform:
2544 case nir_intrinsic_load_kernel_input:
2545 case nir_intrinsic_load_global:
2546 case nir_intrinsic_load_global_constant:
2547 case nir_intrinsic_load_scratch:
2548 case nir_intrinsic_load_fs_input_interp_deltas:
2549 case nir_intrinsic_shared_atomic_add:
2550 case nir_intrinsic_shared_atomic_and:
2551 case nir_intrinsic_shared_atomic_comp_swap:
2552 case nir_intrinsic_shared_atomic_exchange:
2553 case nir_intrinsic_shared_atomic_fadd:
2554 case nir_intrinsic_shared_atomic_fcomp_swap:
2555 case nir_intrinsic_shared_atomic_fmax:
2556 case nir_intrinsic_shared_atomic_fmin:
2557 case nir_intrinsic_shared_atomic_imax:
2558 case nir_intrinsic_shared_atomic_imin:
2559 case nir_intrinsic_shared_atomic_or:
2560 case nir_intrinsic_shared_atomic_umax:
2561 case nir_intrinsic_shared_atomic_umin:
2562 case nir_intrinsic_shared_atomic_xor:
2563 case nir_intrinsic_global_atomic_add:
2564 case nir_intrinsic_global_atomic_and:
2565 case nir_intrinsic_global_atomic_comp_swap:
2566 case nir_intrinsic_global_atomic_exchange:
2567 case nir_intrinsic_global_atomic_fadd:
2568 case nir_intrinsic_global_atomic_fcomp_swap:
2569 case nir_intrinsic_global_atomic_fmax:
2570 case nir_intrinsic_global_atomic_fmin:
2571 case nir_intrinsic_global_atomic_imax:
2572 case nir_intrinsic_global_atomic_imin:
2573 case nir_intrinsic_global_atomic_or:
2574 case nir_intrinsic_global_atomic_umax:
2575 case nir_intrinsic_global_atomic_umin:
2576 case nir_intrinsic_global_atomic_xor:
2577 return &instr->src[0];
2578 case nir_intrinsic_load_ubo:
2579 case nir_intrinsic_load_ssbo:
2580 case nir_intrinsic_load_input_vertex:
2581 case nir_intrinsic_load_per_vertex_input:
2582 case nir_intrinsic_load_per_vertex_output:
2583 case nir_intrinsic_load_per_primitive_output:
2584 case nir_intrinsic_load_interpolated_input:
2585 case nir_intrinsic_store_output:
2586 case nir_intrinsic_store_shared:
2587 case nir_intrinsic_store_task_payload:
2588 case nir_intrinsic_store_global:
2589 case nir_intrinsic_store_scratch:
2590 case nir_intrinsic_ssbo_atomic_add:
2591 case nir_intrinsic_ssbo_atomic_imin:
2592 case nir_intrinsic_ssbo_atomic_umin:
2593 case nir_intrinsic_ssbo_atomic_imax:
2594 case nir_intrinsic_ssbo_atomic_umax:
2595 case nir_intrinsic_ssbo_atomic_and:
2596 case nir_intrinsic_ssbo_atomic_or:
2597 case nir_intrinsic_ssbo_atomic_xor:
2598 case nir_intrinsic_ssbo_atomic_exchange:
2599 case nir_intrinsic_ssbo_atomic_comp_swap:
2600 case nir_intrinsic_ssbo_atomic_fadd:
2601 case nir_intrinsic_ssbo_atomic_fmin:
2602 case nir_intrinsic_ssbo_atomic_fmax:
2603 case nir_intrinsic_ssbo_atomic_fcomp_swap:
2604 return &instr->src[1];
2605 case nir_intrinsic_store_ssbo:
2606 case nir_intrinsic_store_per_vertex_output:
2607 case nir_intrinsic_store_per_primitive_output:
2608 return &instr->src[2];
2609 default:
2610 return NULL;
2611 }
2612 }
2613
2614 /**
2615 * Return the vertex index source for a load/store per_vertex intrinsic.
2616 */
2617 nir_src *
nir_get_io_arrayed_index_src(nir_intrinsic_instr * instr)2618 nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr)
2619 {
2620 switch (instr->intrinsic) {
2621 case nir_intrinsic_load_per_vertex_input:
2622 case nir_intrinsic_load_per_vertex_output:
2623 case nir_intrinsic_load_per_primitive_output:
2624 return &instr->src[0];
2625 case nir_intrinsic_store_per_vertex_output:
2626 case nir_intrinsic_store_per_primitive_output:
2627 return &instr->src[1];
2628 default:
2629 return NULL;
2630 }
2631 }
2632
2633 /**
2634 * Return the numeric constant that identify a NULL pointer for each address
2635 * format.
2636 */
2637 const nir_const_value *
nir_address_format_null_value(nir_address_format addr_format)2638 nir_address_format_null_value(nir_address_format addr_format)
2639 {
2640 const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
2641 [nir_address_format_32bit_global] = {{0}},
2642 [nir_address_format_64bit_global] = {{0}},
2643 [nir_address_format_64bit_global_32bit_offset] = {{0}},
2644 [nir_address_format_64bit_bounded_global] = {{0}},
2645 [nir_address_format_32bit_index_offset] = {{.u32 = ~0}, {.u32 = ~0}},
2646 [nir_address_format_32bit_index_offset_pack64] = {{.u64 = ~0ull}},
2647 [nir_address_format_vec2_index_32bit_offset] = {{.u32 = ~0}, {.u32 = ~0}, {.u32 = ~0}},
2648 [nir_address_format_32bit_offset] = {{.u32 = ~0}},
2649 [nir_address_format_32bit_offset_as_64bit] = {{.u64 = ~0ull}},
2650 [nir_address_format_62bit_generic] = {{.u64 = 0}},
2651 [nir_address_format_logical] = {{.u32 = ~0}},
2652 };
2653
2654 assert(addr_format < ARRAY_SIZE(null_values));
2655 return null_values[addr_format];
2656 }
2657
2658 nir_ssa_def *
nir_build_addr_ieq(nir_builder * b,nir_ssa_def * addr0,nir_ssa_def * addr1,nir_address_format addr_format)2659 nir_build_addr_ieq(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
2660 nir_address_format addr_format)
2661 {
2662 switch (addr_format) {
2663 case nir_address_format_32bit_global:
2664 case nir_address_format_64bit_global:
2665 case nir_address_format_64bit_bounded_global:
2666 case nir_address_format_32bit_index_offset:
2667 case nir_address_format_vec2_index_32bit_offset:
2668 case nir_address_format_32bit_offset:
2669 case nir_address_format_62bit_generic:
2670 return nir_ball_iequal(b, addr0, addr1);
2671
2672 case nir_address_format_64bit_global_32bit_offset:
2673 return nir_ball_iequal(b, nir_channels(b, addr0, 0xb),
2674 nir_channels(b, addr1, 0xb));
2675
2676 case nir_address_format_32bit_offset_as_64bit:
2677 assert(addr0->num_components == 1 && addr1->num_components == 1);
2678 return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1));
2679
2680 case nir_address_format_32bit_index_offset_pack64:
2681 assert(addr0->num_components == 1 && addr1->num_components == 1);
2682 return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1));
2683
2684 case nir_address_format_logical:
2685 unreachable("Unsupported address format");
2686 }
2687
2688 unreachable("Invalid address format");
2689 }
2690
2691 nir_ssa_def *
nir_build_addr_isub(nir_builder * b,nir_ssa_def * addr0,nir_ssa_def * addr1,nir_address_format addr_format)2692 nir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
2693 nir_address_format addr_format)
2694 {
2695 switch (addr_format) {
2696 case nir_address_format_32bit_global:
2697 case nir_address_format_64bit_global:
2698 case nir_address_format_32bit_offset:
2699 case nir_address_format_32bit_index_offset_pack64:
2700 case nir_address_format_62bit_generic:
2701 assert(addr0->num_components == 1);
2702 assert(addr1->num_components == 1);
2703 return nir_isub(b, addr0, addr1);
2704
2705 case nir_address_format_32bit_offset_as_64bit:
2706 assert(addr0->num_components == 1);
2707 assert(addr1->num_components == 1);
2708 return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)));
2709
2710 case nir_address_format_64bit_global_32bit_offset:
2711 case nir_address_format_64bit_bounded_global:
2712 return nir_isub(b, addr_to_global(b, addr0, addr_format),
2713 addr_to_global(b, addr1, addr_format));
2714
2715 case nir_address_format_32bit_index_offset:
2716 assert(addr0->num_components == 2);
2717 assert(addr1->num_components == 2);
2718 /* Assume the same buffer index. */
2719 return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
2720
2721 case nir_address_format_vec2_index_32bit_offset:
2722 assert(addr0->num_components == 3);
2723 assert(addr1->num_components == 3);
2724 /* Assume the same buffer index. */
2725 return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2));
2726
2727 case nir_address_format_logical:
2728 unreachable("Unsupported address format");
2729 }
2730
2731 unreachable("Invalid address format");
2732 }
2733
2734 static bool
is_input(nir_intrinsic_instr * intrin)2735 is_input(nir_intrinsic_instr *intrin)
2736 {
2737 return intrin->intrinsic == nir_intrinsic_load_input ||
2738 intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
2739 intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
2740 intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
2741 }
2742
2743 static bool
is_output(nir_intrinsic_instr * intrin)2744 is_output(nir_intrinsic_instr *intrin)
2745 {
2746 return intrin->intrinsic == nir_intrinsic_load_output ||
2747 intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
2748 intrin->intrinsic == nir_intrinsic_load_per_primitive_output ||
2749 intrin->intrinsic == nir_intrinsic_store_output ||
2750 intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2751 intrin->intrinsic == nir_intrinsic_store_per_primitive_output;
2752 }
2753
is_dual_slot(nir_intrinsic_instr * intrin)2754 static bool is_dual_slot(nir_intrinsic_instr *intrin)
2755 {
2756 if (intrin->intrinsic == nir_intrinsic_store_output ||
2757 intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2758 intrin->intrinsic == nir_intrinsic_store_per_primitive_output) {
2759 return nir_src_bit_size(intrin->src[0]) == 64 &&
2760 nir_src_num_components(intrin->src[0]) >= 3;
2761 }
2762
2763 return nir_dest_bit_size(intrin->dest) == 64 &&
2764 nir_dest_num_components(intrin->dest) >= 3;
2765 }
2766
2767 /**
2768 * This pass adds constant offsets to instr->const_index[0] for input/output
2769 * intrinsics, and resets the offset source to 0. Non-constant offsets remain
2770 * unchanged - since we don't know what part of a compound variable is
2771 * accessed, we allocate storage for the entire thing. For drivers that use
2772 * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
2773 * the offset source will be 0, so that they don't have to add it in manually.
2774 */
2775
2776 static bool
add_const_offset_to_base_block(nir_block * block,nir_builder * b,nir_variable_mode modes)2777 add_const_offset_to_base_block(nir_block *block, nir_builder *b,
2778 nir_variable_mode modes)
2779 {
2780 bool progress = false;
2781 nir_foreach_instr_safe(instr, block) {
2782 if (instr->type != nir_instr_type_intrinsic)
2783 continue;
2784
2785 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2786
2787 if (((modes & nir_var_shader_in) && is_input(intrin)) ||
2788 ((modes & nir_var_shader_out) && is_output(intrin))) {
2789 nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
2790
2791 /* NV_mesh_shader: ignore MS primitive indices. */
2792 if (b->shader->info.stage == MESA_SHADER_MESH &&
2793 sem.location == VARYING_SLOT_PRIMITIVE_INDICES &&
2794 !(b->shader->info.per_primitive_outputs &
2795 BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES)))
2796 continue;
2797
2798 nir_src *offset = nir_get_io_offset_src(intrin);
2799
2800 /* TODO: Better handling of per-view variables here */
2801 if (nir_src_is_const(*offset) &&
2802 !nir_intrinsic_io_semantics(intrin).per_view) {
2803 unsigned off = nir_src_as_uint(*offset);
2804
2805 nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off);
2806
2807 sem.location += off;
2808 /* non-indirect indexing should reduce num_slots */
2809 sem.num_slots = is_dual_slot(intrin) ? 2 : 1;
2810 nir_intrinsic_set_io_semantics(intrin, sem);
2811
2812 b->cursor = nir_before_instr(&intrin->instr);
2813 nir_instr_rewrite_src(&intrin->instr, offset,
2814 nir_src_for_ssa(nir_imm_int(b, 0)));
2815 progress = true;
2816 }
2817 }
2818 }
2819
2820 return progress;
2821 }
2822
2823 bool
nir_io_add_const_offset_to_base(nir_shader * nir,nir_variable_mode modes)2824 nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes)
2825 {
2826 bool progress = false;
2827
2828 nir_foreach_function(f, nir) {
2829 if (f->impl) {
2830 nir_builder b;
2831 nir_builder_init(&b, f->impl);
2832 nir_foreach_block(block, f->impl) {
2833 progress |= add_const_offset_to_base_block(block, &b, modes);
2834 }
2835 }
2836 }
2837
2838 return progress;
2839 }
2840
2841 static bool
nir_lower_color_inputs(nir_shader * nir)2842 nir_lower_color_inputs(nir_shader *nir)
2843 {
2844 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2845 bool progress = false;
2846
2847 nir_builder b;
2848 nir_builder_init(&b, impl);
2849
2850 nir_foreach_block (block, impl) {
2851 nir_foreach_instr_safe (instr, block) {
2852 if (instr->type != nir_instr_type_intrinsic)
2853 continue;
2854
2855 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2856
2857 if (intrin->intrinsic != nir_intrinsic_load_deref)
2858 continue;
2859
2860 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2861 if (!nir_deref_mode_is(deref, nir_var_shader_in))
2862 continue;
2863
2864 b.cursor = nir_before_instr(instr);
2865 nir_variable *var = nir_deref_instr_get_variable(deref);
2866 nir_ssa_def *def;
2867
2868 if (var->data.location == VARYING_SLOT_COL0) {
2869 def = nir_load_color0(&b);
2870 nir->info.fs.color0_interp = var->data.interpolation;
2871 nir->info.fs.color0_sample = var->data.sample;
2872 nir->info.fs.color0_centroid = var->data.centroid;
2873 } else if (var->data.location == VARYING_SLOT_COL1) {
2874 def = nir_load_color1(&b);
2875 nir->info.fs.color1_interp = var->data.interpolation;
2876 nir->info.fs.color1_sample = var->data.sample;
2877 nir->info.fs.color1_centroid = var->data.centroid;
2878 } else {
2879 continue;
2880 }
2881
2882 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, def);
2883 nir_instr_remove(instr);
2884 progress = true;
2885 }
2886 }
2887
2888 if (progress) {
2889 nir_metadata_preserve(impl, nir_metadata_dominance |
2890 nir_metadata_block_index);
2891 } else {
2892 nir_metadata_preserve(impl, nir_metadata_all);
2893 }
2894 return progress;
2895 }
2896
2897 static bool
nir_add_xfb_info(nir_shader * nir,nir_xfb_info * info)2898 nir_add_xfb_info(nir_shader *nir, nir_xfb_info *info)
2899 {
2900 nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2901 bool progress = false;
2902
2903 for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++)
2904 nir->info.xfb_stride[i] = info->buffers[i].stride;
2905
2906 nir_foreach_block (block, impl) {
2907 nir_foreach_instr_safe (instr, block) {
2908 if (instr->type != nir_instr_type_intrinsic)
2909 continue;
2910
2911 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2912
2913 if (!nir_intrinsic_has_io_xfb(intr))
2914 continue;
2915
2916 /* No indirect indexing allowed. The index is implied to be 0. */
2917 ASSERTED nir_src offset = *nir_get_io_offset_src(intr);
2918 assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
2919
2920 /* Calling this pass for the second time shouldn't do anything. */
2921 if (nir_intrinsic_io_xfb(intr).out[0].num_components ||
2922 nir_intrinsic_io_xfb(intr).out[1].num_components ||
2923 nir_intrinsic_io_xfb2(intr).out[0].num_components ||
2924 nir_intrinsic_io_xfb2(intr).out[1].num_components)
2925 continue;
2926
2927 nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
2928 unsigned writemask = nir_intrinsic_write_mask(intr) <<
2929 nir_intrinsic_component(intr);
2930
2931 nir_io_xfb xfb[2];
2932 memset(xfb, 0, sizeof(xfb));
2933
2934 for (unsigned i = 0; i < info->output_count; i++) {
2935 if (info->outputs[i].location == sem.location) {
2936 nir_xfb_output_info *out = &info->outputs[i];
2937 unsigned xfb_mask = writemask & out->component_mask;
2938
2939 /*fprintf(stdout, "output%u: buffer=%u, offset=%u, location=%u, "
2940 "component_offset=%u, component_mask=0x%x, xfb_mask=0x%x, slots=%u\n",
2941 i, out->buffer,
2942 out->offset,
2943 out->location,
2944 out->component_offset,
2945 out->component_mask,
2946 xfb_mask, sem.num_slots);*/
2947
2948 while (xfb_mask) {
2949 int start, count;
2950 u_bit_scan_consecutive_range(&xfb_mask, &start, &count);
2951
2952 xfb[start / 2].out[start % 2].num_components = count;
2953 xfb[start / 2].out[start % 2].buffer = out->buffer;
2954 /* out->offset is relative to the first stored xfb component */
2955 /* start is relative to component 0 */
2956 xfb[start / 2].out[start % 2].offset =
2957 out->offset / 4 - out->component_offset + start;
2958
2959 progress = true;
2960 }
2961 }
2962 }
2963
2964 nir_intrinsic_set_io_xfb(intr, xfb[0]);
2965 nir_intrinsic_set_io_xfb2(intr, xfb[1]);
2966 }
2967 }
2968
2969 nir_metadata_preserve(impl, nir_metadata_all);
2970 return progress;
2971 }
2972
2973 static int
type_size_vec4(const struct glsl_type * type,bool bindless)2974 type_size_vec4(const struct glsl_type *type, bool bindless)
2975 {
2976 return glsl_count_attribute_slots(type, false);
2977 }
2978
2979 void
nir_lower_io_passes(nir_shader * nir,nir_xfb_info * xfb)2980 nir_lower_io_passes(nir_shader *nir, nir_xfb_info *xfb)
2981 {
2982 if (!nir->options->lower_io_variables)
2983 return;
2984
2985 /* Ignore transform feedback for stages that can't have it. */
2986 if (nir->info.stage != MESA_SHADER_VERTEX &&
2987 nir->info.stage != MESA_SHADER_TESS_EVAL &&
2988 nir->info.stage != MESA_SHADER_GEOMETRY)
2989 xfb = NULL;
2990
2991 bool has_indirect_inputs =
2992 (nir->options->support_indirect_inputs >> nir->info.stage) & 0x1;
2993
2994 /* Transform feedback requires that indirect outputs are lowered. */
2995 bool has_indirect_outputs =
2996 (nir->options->support_indirect_outputs >> nir->info.stage) & 0x1 && !xfb;
2997
2998 if (!has_indirect_inputs || !has_indirect_outputs) {
2999 NIR_PASS_V(nir, nir_lower_io_to_temporaries,
3000 nir_shader_get_entrypoint(nir), !has_indirect_outputs,
3001 !has_indirect_inputs);
3002
3003 /* We need to lower all the copy_deref's introduced by lower_io_to-
3004 * _temporaries before calling nir_lower_io.
3005 */
3006 NIR_PASS_V(nir, nir_split_var_copies);
3007 NIR_PASS_V(nir, nir_lower_var_copies);
3008 NIR_PASS_V(nir, nir_lower_global_vars_to_local);
3009 }
3010
3011 if (nir->info.stage == MESA_SHADER_FRAGMENT &&
3012 nir->options->lower_fs_color_inputs)
3013 NIR_PASS_V(nir, nir_lower_color_inputs);
3014
3015 NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in,
3016 type_size_vec4, nir_lower_io_lower_64bit_to_32);
3017
3018 /* nir_io_add_const_offset_to_base needs actual constants. */
3019 NIR_PASS_V(nir, nir_opt_constant_folding);
3020 NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
3021 nir_var_shader_out);
3022
3023 /* Lower and remove dead derefs and variables to clean up the IR. */
3024 NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3025 NIR_PASS_V(nir, nir_opt_dce);
3026 NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp |
3027 nir_var_shader_in | nir_var_shader_out, NULL);
3028
3029 if (xfb)
3030 NIR_PASS_V(nir, nir_add_xfb_info, xfb);
3031
3032 nir->info.io_lowered = true;
3033 }
3034