1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Connor Abbott (cwabbott0@gmail.com)
25  *    Jason Ekstrand (jason@jlekstrand.net)
26  *
27  */
28 
29 /*
30  * This lowering pass converts references to input/output variables with
31  * loads/stores to actual input/output intrinsics.
32  */
33 
34 #include "nir.h"
35 #include "nir_builder.h"
36 #include "nir_deref.h"
37 #include "nir_xfb_info.h"
38 
39 #include "util/u_math.h"
40 
41 struct lower_io_state {
42    void *dead_ctx;
43    nir_builder builder;
44    int (*type_size)(const struct glsl_type *type, bool);
45    nir_variable_mode modes;
46    nir_lower_io_options options;
47 };
48 
49 static nir_intrinsic_op
ssbo_atomic_for_deref(nir_intrinsic_op deref_op)50 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
51 {
52    switch (deref_op) {
53 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O;
54    OP(atomic_exchange)
55    OP(atomic_comp_swap)
56    OP(atomic_add)
57    OP(atomic_imin)
58    OP(atomic_umin)
59    OP(atomic_imax)
60    OP(atomic_umax)
61    OP(atomic_and)
62    OP(atomic_or)
63    OP(atomic_xor)
64    OP(atomic_fadd)
65    OP(atomic_fmin)
66    OP(atomic_fmax)
67    OP(atomic_fcomp_swap)
68 #undef OP
69    default:
70       unreachable("Invalid SSBO atomic");
71    }
72 }
73 
74 static nir_intrinsic_op
global_atomic_for_deref(nir_intrinsic_op deref_op)75 global_atomic_for_deref(nir_intrinsic_op deref_op)
76 {
77    switch (deref_op) {
78 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O;
79    OP(atomic_exchange)
80    OP(atomic_comp_swap)
81    OP(atomic_add)
82    OP(atomic_imin)
83    OP(atomic_umin)
84    OP(atomic_imax)
85    OP(atomic_umax)
86    OP(atomic_and)
87    OP(atomic_or)
88    OP(atomic_xor)
89    OP(atomic_fadd)
90    OP(atomic_fmin)
91    OP(atomic_fmax)
92    OP(atomic_fcomp_swap)
93 #undef OP
94    default:
95       unreachable("Invalid SSBO atomic");
96    }
97 }
98 
99 static nir_intrinsic_op
shared_atomic_for_deref(nir_intrinsic_op deref_op)100 shared_atomic_for_deref(nir_intrinsic_op deref_op)
101 {
102    switch (deref_op) {
103 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O;
104    OP(atomic_exchange)
105    OP(atomic_comp_swap)
106    OP(atomic_add)
107    OP(atomic_imin)
108    OP(atomic_umin)
109    OP(atomic_imax)
110    OP(atomic_umax)
111    OP(atomic_and)
112    OP(atomic_or)
113    OP(atomic_xor)
114    OP(atomic_fadd)
115    OP(atomic_fmin)
116    OP(atomic_fmax)
117    OP(atomic_fcomp_swap)
118 #undef OP
119    default:
120       unreachable("Invalid shared atomic");
121    }
122 }
123 
124 void
nir_assign_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,int (* type_size)(const struct glsl_type *,bool))125 nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
126                          unsigned *size,
127                          int (*type_size)(const struct glsl_type *, bool))
128 {
129    unsigned location = 0;
130 
131    nir_foreach_variable_with_modes(var, shader, mode) {
132       var->data.driver_location = location;
133       bool bindless_type_size = var->data.mode == nir_var_shader_in ||
134                                 var->data.mode == nir_var_shader_out ||
135                                 var->data.bindless;
136       location += type_size(var->type, bindless_type_size);
137    }
138 
139    *size = location;
140 }
141 
142 /**
143  * Some inputs and outputs are arrayed, meaning that there is an extra level
144  * of array indexing to handle mismatches between the shader interface and the
145  * dispatch pattern of the shader.  For instance, geometry shaders are
146  * executed per-primitive while their inputs and outputs are specified
147  * per-vertex so all inputs and outputs have to be additionally indexed with
148  * the vertex index within the primitive.
149  */
150 bool
nir_is_arrayed_io(const nir_variable * var,gl_shader_stage stage)151 nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage)
152 {
153    if (var->data.patch || !glsl_type_is_array(var->type))
154       return false;
155 
156    if (stage == MESA_SHADER_MESH) {
157       /* NV_mesh_shader: this is flat array for the whole workgroup. */
158       if (var->data.location == VARYING_SLOT_PRIMITIVE_INDICES)
159          return var->data.per_primitive;
160    }
161 
162    if (var->data.mode == nir_var_shader_in)
163       return stage == MESA_SHADER_GEOMETRY ||
164              stage == MESA_SHADER_TESS_CTRL ||
165              stage == MESA_SHADER_TESS_EVAL;
166 
167    if (var->data.mode == nir_var_shader_out)
168       return stage == MESA_SHADER_TESS_CTRL ||
169              stage == MESA_SHADER_MESH;
170 
171    return false;
172 }
173 
get_number_of_slots(struct lower_io_state * state,const nir_variable * var)174 static unsigned get_number_of_slots(struct lower_io_state *state,
175                                     const nir_variable *var)
176 {
177    const struct glsl_type *type = var->type;
178 
179    if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) {
180       assert(glsl_type_is_array(type));
181       type = glsl_get_array_element(type);
182    }
183 
184    /* NV_mesh_shader:
185     * PRIMITIVE_INDICES is a flat array, not a proper arrayed output,
186     * as opposed to D3D-style mesh shaders where it's addressed by
187     * the primitive index.
188     * Prevent assigning several slots to primitive indices,
189     * to avoid some issues.
190     */
191    if (state->builder.shader->info.stage == MESA_SHADER_MESH &&
192        var->data.location == VARYING_SLOT_PRIMITIVE_INDICES &&
193        !nir_is_arrayed_io(var, state->builder.shader->info.stage))
194       return 1;
195 
196    return state->type_size(type, var->data.bindless);
197 }
198 
199 static nir_ssa_def *
get_io_offset(nir_builder * b,nir_deref_instr * deref,nir_ssa_def ** array_index,int (* type_size)(const struct glsl_type *,bool),unsigned * component,bool bts)200 get_io_offset(nir_builder *b, nir_deref_instr *deref,
201               nir_ssa_def **array_index,
202               int (*type_size)(const struct glsl_type *, bool),
203               unsigned *component, bool bts)
204 {
205    nir_deref_path path;
206    nir_deref_path_init(&path, deref, NULL);
207 
208    assert(path.path[0]->deref_type == nir_deref_type_var);
209    nir_deref_instr **p = &path.path[1];
210 
211    /* For arrayed I/O (e.g., per-vertex input arrays in geometry shader
212     * inputs), skip the outermost array index.  Process the rest normally.
213     */
214    if (array_index != NULL) {
215       assert((*p)->deref_type == nir_deref_type_array);
216       *array_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
217       p++;
218    }
219 
220    if (path.path[0]->var->data.compact) {
221       assert((*p)->deref_type == nir_deref_type_array);
222       assert(glsl_type_is_scalar((*p)->type));
223 
224       /* We always lower indirect dereferences for "compact" array vars. */
225       const unsigned index = nir_src_as_uint((*p)->arr.index);
226       const unsigned total_offset = *component + index;
227       const unsigned slot_offset = total_offset / 4;
228       *component = total_offset % 4;
229       return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
230    }
231 
232    /* Just emit code and let constant-folding go to town */
233    nir_ssa_def *offset = nir_imm_int(b, 0);
234 
235    for (; *p; p++) {
236       if ((*p)->deref_type == nir_deref_type_array) {
237          unsigned size = type_size((*p)->type, bts);
238 
239          nir_ssa_def *mul =
240             nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
241 
242          offset = nir_iadd(b, offset, mul);
243       } else if ((*p)->deref_type == nir_deref_type_struct) {
244          /* p starts at path[1], so this is safe */
245          nir_deref_instr *parent = *(p - 1);
246 
247          unsigned field_offset = 0;
248          for (unsigned i = 0; i < (*p)->strct.index; i++) {
249             field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
250          }
251          offset = nir_iadd_imm(b, offset, field_offset);
252       } else {
253          unreachable("Unsupported deref type");
254       }
255    }
256 
257    nir_deref_path_finish(&path);
258 
259    return offset;
260 }
261 
262 static nir_ssa_def *
emit_load(struct lower_io_state * state,nir_ssa_def * array_index,nir_variable * var,nir_ssa_def * offset,unsigned component,unsigned num_components,unsigned bit_size,nir_alu_type dest_type)263 emit_load(struct lower_io_state *state,
264           nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
265           unsigned component, unsigned num_components, unsigned bit_size,
266           nir_alu_type dest_type)
267 {
268    nir_builder *b = &state->builder;
269    const nir_shader *nir = b->shader;
270    nir_variable_mode mode = var->data.mode;
271    nir_ssa_def *barycentric = NULL;
272 
273    nir_intrinsic_op op;
274    switch (mode) {
275    case nir_var_shader_in:
276       if (nir->info.stage == MESA_SHADER_FRAGMENT &&
277           nir->options->use_interpolated_input_intrinsics &&
278           var->data.interpolation != INTERP_MODE_FLAT &&
279           !var->data.per_primitive) {
280          if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
281             assert(array_index != NULL);
282             op = nir_intrinsic_load_input_vertex;
283          } else {
284             assert(array_index == NULL);
285 
286             nir_intrinsic_op bary_op;
287             if (var->data.sample ||
288                 (state->options & nir_lower_io_force_sample_interpolation))
289                bary_op = nir_intrinsic_load_barycentric_sample;
290             else if (var->data.centroid)
291                bary_op = nir_intrinsic_load_barycentric_centroid;
292             else
293                bary_op = nir_intrinsic_load_barycentric_pixel;
294 
295             barycentric = nir_load_barycentric(&state->builder, bary_op,
296                                                var->data.interpolation);
297             op = nir_intrinsic_load_interpolated_input;
298          }
299       } else {
300          op = array_index ? nir_intrinsic_load_per_vertex_input :
301                             nir_intrinsic_load_input;
302       }
303       break;
304    case nir_var_shader_out:
305       op = !array_index            ? nir_intrinsic_load_output :
306            var->data.per_primitive ? nir_intrinsic_load_per_primitive_output :
307                                      nir_intrinsic_load_per_vertex_output;
308       break;
309    case nir_var_uniform:
310       op = nir_intrinsic_load_uniform;
311       break;
312    default:
313       unreachable("Unknown variable mode");
314    }
315 
316    nir_intrinsic_instr *load =
317       nir_intrinsic_instr_create(state->builder.shader, op);
318    load->num_components = num_components;
319 
320    nir_intrinsic_set_base(load, var->data.driver_location);
321    if (mode == nir_var_shader_in || mode == nir_var_shader_out)
322       nir_intrinsic_set_component(load, component);
323 
324    if (load->intrinsic == nir_intrinsic_load_uniform)
325       nir_intrinsic_set_range(load,
326                               state->type_size(var->type, var->data.bindless));
327 
328    if (nir_intrinsic_has_access(load))
329       nir_intrinsic_set_access(load, var->data.access);
330 
331    nir_intrinsic_set_dest_type(load, dest_type);
332 
333    if (load->intrinsic != nir_intrinsic_load_uniform) {
334       nir_io_semantics semantics = {0};
335       semantics.location = var->data.location;
336       semantics.num_slots = get_number_of_slots(state, var);
337       semantics.fb_fetch_output = var->data.fb_fetch_output;
338       semantics.medium_precision =
339          var->data.precision == GLSL_PRECISION_MEDIUM ||
340          var->data.precision == GLSL_PRECISION_LOW;
341       nir_intrinsic_set_io_semantics(load, semantics);
342    }
343 
344    if (array_index) {
345       load->src[0] = nir_src_for_ssa(array_index);
346       load->src[1] = nir_src_for_ssa(offset);
347    } else if (barycentric) {
348       load->src[0] = nir_src_for_ssa(barycentric);
349       load->src[1] = nir_src_for_ssa(offset);
350    } else {
351       load->src[0] = nir_src_for_ssa(offset);
352    }
353 
354    nir_ssa_dest_init(&load->instr, &load->dest,
355                      num_components, bit_size, NULL);
356    nir_builder_instr_insert(b, &load->instr);
357 
358    return &load->dest.ssa;
359 }
360 
361 static nir_ssa_def *
lower_load(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_ssa_def * array_index,nir_variable * var,nir_ssa_def * offset,unsigned component,const struct glsl_type * type)362 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
363            nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
364            unsigned component, const struct glsl_type *type)
365 {
366    assert(intrin->dest.is_ssa);
367    if (intrin->dest.ssa.bit_size == 64 &&
368        (state->options & nir_lower_io_lower_64bit_to_32)) {
369       nir_builder *b = &state->builder;
370 
371       const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
372 
373       nir_ssa_def *comp64[4];
374       assert(component == 0 || component == 2);
375       unsigned dest_comp = 0;
376       while (dest_comp < intrin->dest.ssa.num_components) {
377          const unsigned num_comps =
378             MIN2(intrin->dest.ssa.num_components - dest_comp,
379                  (4 - component) / 2);
380 
381          nir_ssa_def *data32 =
382             emit_load(state, array_index, var, offset, component,
383                       num_comps * 2, 32, nir_type_uint32);
384          for (unsigned i = 0; i < num_comps; i++) {
385             comp64[dest_comp + i] =
386                nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
387          }
388 
389          /* Only the first store has a component offset */
390          component = 0;
391          dest_comp += num_comps;
392          offset = nir_iadd_imm(b, offset, slot_size);
393       }
394 
395       return nir_vec(b, comp64, intrin->dest.ssa.num_components);
396    } else if (intrin->dest.ssa.bit_size == 1) {
397       /* Booleans are 32-bit */
398       assert(glsl_type_is_boolean(type));
399       return nir_b2b1(&state->builder,
400                       emit_load(state, array_index, var, offset, component,
401                                 intrin->dest.ssa.num_components, 32,
402                                 nir_type_bool32));
403    } else {
404       return emit_load(state, array_index, var, offset, component,
405                        intrin->dest.ssa.num_components,
406                        intrin->dest.ssa.bit_size,
407                        nir_get_nir_type_for_glsl_type(type));
408    }
409 }
410 
411 static void
emit_store(struct lower_io_state * state,nir_ssa_def * data,nir_ssa_def * array_index,nir_variable * var,nir_ssa_def * offset,unsigned component,unsigned num_components,nir_component_mask_t write_mask,nir_alu_type src_type)412 emit_store(struct lower_io_state *state, nir_ssa_def *data,
413            nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
414            unsigned component, unsigned num_components,
415            nir_component_mask_t write_mask, nir_alu_type src_type)
416 {
417    nir_builder *b = &state->builder;
418 
419    assert(var->data.mode == nir_var_shader_out);
420    nir_intrinsic_op op =
421       !array_index            ? nir_intrinsic_store_output :
422       var->data.per_primitive ? nir_intrinsic_store_per_primitive_output :
423                                 nir_intrinsic_store_per_vertex_output;
424 
425    nir_intrinsic_instr *store =
426       nir_intrinsic_instr_create(state->builder.shader, op);
427    store->num_components = num_components;
428 
429    store->src[0] = nir_src_for_ssa(data);
430 
431    nir_intrinsic_set_base(store, var->data.driver_location);
432    nir_intrinsic_set_component(store, component);
433    nir_intrinsic_set_src_type(store, src_type);
434 
435    nir_intrinsic_set_write_mask(store, write_mask);
436 
437    if (nir_intrinsic_has_access(store))
438       nir_intrinsic_set_access(store, var->data.access);
439 
440    if (array_index)
441       store->src[1] = nir_src_for_ssa(array_index);
442 
443    store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset);
444 
445    unsigned gs_streams = 0;
446    if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) {
447       if (var->data.stream & NIR_STREAM_PACKED) {
448          gs_streams = var->data.stream & ~NIR_STREAM_PACKED;
449       } else {
450          assert(var->data.stream < 4);
451          gs_streams = 0;
452          for (unsigned i = 0; i < num_components; ++i)
453             gs_streams |= var->data.stream << (2 * i);
454       }
455    }
456 
457    nir_io_semantics semantics = {0};
458    semantics.location = var->data.location;
459    semantics.num_slots = get_number_of_slots(state, var);
460    semantics.dual_source_blend_index = var->data.index;
461    semantics.gs_streams = gs_streams;
462    semantics.medium_precision =
463       var->data.precision == GLSL_PRECISION_MEDIUM ||
464       var->data.precision == GLSL_PRECISION_LOW;
465    semantics.per_view = var->data.per_view;
466    semantics.invariant = var->data.invariant;
467 
468    nir_intrinsic_set_io_semantics(store, semantics);
469 
470    nir_builder_instr_insert(b, &store->instr);
471 }
472 
473 static void
lower_store(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_ssa_def * array_index,nir_variable * var,nir_ssa_def * offset,unsigned component,const struct glsl_type * type)474 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
475             nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
476             unsigned component, const struct glsl_type *type)
477 {
478    assert(intrin->src[1].is_ssa);
479    if (intrin->src[1].ssa->bit_size == 64 &&
480        (state->options & nir_lower_io_lower_64bit_to_32)) {
481       nir_builder *b = &state->builder;
482 
483       const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
484 
485       assert(component == 0 || component == 2);
486       unsigned src_comp = 0;
487       nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
488       while (src_comp < intrin->num_components) {
489          const unsigned num_comps =
490             MIN2(intrin->num_components - src_comp,
491                  (4 - component) / 2);
492 
493          if (write_mask & BITFIELD_MASK(num_comps)) {
494             nir_ssa_def *data =
495                nir_channels(b, intrin->src[1].ssa,
496                             BITFIELD_RANGE(src_comp, num_comps));
497             nir_ssa_def *data32 = nir_bitcast_vector(b, data, 32);
498 
499             nir_component_mask_t write_mask32 = 0;
500             for (unsigned i = 0; i < num_comps; i++) {
501                if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
502                   write_mask32 |= 3 << (i * 2);
503             }
504 
505             emit_store(state, data32, array_index, var, offset,
506                        component, data32->num_components, write_mask32,
507                        nir_type_uint32);
508          }
509 
510          /* Only the first store has a component offset */
511          component = 0;
512          src_comp += num_comps;
513          write_mask >>= num_comps;
514          offset = nir_iadd_imm(b, offset, slot_size);
515       }
516    } else if (intrin->dest.ssa.bit_size == 1) {
517       /* Booleans are 32-bit */
518       assert(glsl_type_is_boolean(type));
519       nir_ssa_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
520       emit_store(state, b32_val, array_index, var, offset,
521                  component, intrin->num_components,
522                  nir_intrinsic_write_mask(intrin),
523                  nir_type_bool32);
524    } else {
525       emit_store(state, intrin->src[1].ssa, array_index, var, offset,
526                  component, intrin->num_components,
527                  nir_intrinsic_write_mask(intrin),
528                  nir_get_nir_type_for_glsl_type(type));
529    }
530 }
531 
532 static nir_ssa_def *
lower_interpolate_at(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_variable * var,nir_ssa_def * offset,unsigned component,const struct glsl_type * type)533 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
534                      nir_variable *var, nir_ssa_def *offset, unsigned component,
535                      const struct glsl_type *type)
536 {
537    nir_builder *b = &state->builder;
538    assert(var->data.mode == nir_var_shader_in);
539 
540    /* Ignore interpolateAt() for flat variables - flat is flat. Lower
541     * interpolateAtVertex() for explicit variables.
542     */
543    if (var->data.interpolation == INTERP_MODE_FLAT ||
544        var->data.interpolation == INTERP_MODE_EXPLICIT) {
545       nir_ssa_def *vertex_index = NULL;
546 
547       if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
548          assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
549          vertex_index = intrin->src[1].ssa;
550       }
551 
552       return lower_load(intrin, state, vertex_index, var, offset, component, type);
553    }
554 
555    /* None of the supported APIs allow interpolation on 64-bit things */
556    assert(intrin->dest.is_ssa && intrin->dest.ssa.bit_size <= 32);
557 
558    nir_intrinsic_op bary_op;
559    switch (intrin->intrinsic) {
560    case nir_intrinsic_interp_deref_at_centroid:
561       bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
562                 nir_intrinsic_load_barycentric_sample :
563                 nir_intrinsic_load_barycentric_centroid;
564       break;
565    case nir_intrinsic_interp_deref_at_sample:
566       bary_op = nir_intrinsic_load_barycentric_at_sample;
567       break;
568    case nir_intrinsic_interp_deref_at_offset:
569       bary_op = nir_intrinsic_load_barycentric_at_offset;
570       break;
571    default:
572       unreachable("Bogus interpolateAt() intrinsic.");
573    }
574 
575    nir_intrinsic_instr *bary_setup =
576       nir_intrinsic_instr_create(state->builder.shader, bary_op);
577 
578    nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
579    nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
580 
581    if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
582        intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
583        intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
584       nir_src_copy(&bary_setup->src[0], &intrin->src[1]);
585 
586    nir_builder_instr_insert(b, &bary_setup->instr);
587 
588    nir_io_semantics semantics = {0};
589    semantics.location = var->data.location;
590    semantics.num_slots = get_number_of_slots(state, var);
591    semantics.medium_precision =
592       var->data.precision == GLSL_PRECISION_MEDIUM ||
593       var->data.precision == GLSL_PRECISION_LOW;
594 
595    assert(intrin->dest.is_ssa);
596    nir_ssa_def *load =
597       nir_load_interpolated_input(&state->builder,
598                                   intrin->dest.ssa.num_components,
599                                   intrin->dest.ssa.bit_size,
600                                   &bary_setup->dest.ssa,
601                                   offset,
602                                   .base = var->data.driver_location,
603                                   .component = component,
604                                   .io_semantics = semantics);
605 
606    return load;
607 }
608 
609 static bool
nir_lower_io_block(nir_block * block,struct lower_io_state * state)610 nir_lower_io_block(nir_block *block,
611                    struct lower_io_state *state)
612 {
613    nir_builder *b = &state->builder;
614    const nir_shader_compiler_options *options = b->shader->options;
615    bool progress = false;
616 
617    nir_foreach_instr_safe(instr, block) {
618       if (instr->type != nir_instr_type_intrinsic)
619          continue;
620 
621       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
622 
623       switch (intrin->intrinsic) {
624       case nir_intrinsic_load_deref:
625       case nir_intrinsic_store_deref:
626          /* We can lower the io for this nir instrinsic */
627          break;
628       case nir_intrinsic_interp_deref_at_centroid:
629       case nir_intrinsic_interp_deref_at_sample:
630       case nir_intrinsic_interp_deref_at_offset:
631       case nir_intrinsic_interp_deref_at_vertex:
632          /* We can optionally lower these to load_interpolated_input */
633          if (options->use_interpolated_input_intrinsics ||
634              options->lower_interpolate_at)
635             break;
636          FALLTHROUGH;
637       default:
638          /* We can't lower the io for this nir instrinsic, so skip it */
639          continue;
640       }
641 
642       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
643       if (!nir_deref_mode_is_one_of(deref, state->modes))
644          continue;
645 
646       nir_variable *var = nir_deref_instr_get_variable(deref);
647 
648       b->cursor = nir_before_instr(instr);
649 
650       const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage);
651 
652       nir_ssa_def *offset;
653       nir_ssa_def *array_index = NULL;
654       unsigned component_offset = var->data.location_frac;
655       bool bindless_type_size = var->data.mode == nir_var_shader_in ||
656                                 var->data.mode == nir_var_shader_out ||
657                                 var->data.bindless;
658 
659      if (nir_deref_instr_is_known_out_of_bounds(deref)) {
660         /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
661          *
662          *    In the subsections described above for array, vector, matrix and
663          *    structure accesses, any out-of-bounds access produced undefined
664          *    behavior....
665          *    Out-of-bounds reads return undefined values, which
666          *    include values from other variables of the active program or zero.
667          *    Out-of-bounds writes may be discarded or overwrite
668          *    other variables of the active program.
669          *
670          * GL_KHR_robustness and GL_ARB_robustness encourage us to return zero
671          * for reads.
672          *
673          * Otherwise get_io_offset would return out-of-bound offset which may
674          * result in out-of-bound loading/storing of inputs/outputs,
675          * that could cause issues in drivers down the line.
676          */
677          if (intrin->intrinsic != nir_intrinsic_store_deref) {
678             nir_ssa_def *zero =
679                nir_imm_zero(b, intrin->dest.ssa.num_components,
680                              intrin->dest.ssa.bit_size);
681             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
682                                   zero);
683          }
684 
685          nir_instr_remove(&intrin->instr);
686          progress = true;
687          continue;
688       }
689 
690       offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL,
691                              state->type_size, &component_offset,
692                              bindless_type_size);
693 
694       nir_ssa_def *replacement = NULL;
695 
696       switch (intrin->intrinsic) {
697       case nir_intrinsic_load_deref:
698          replacement = lower_load(intrin, state, array_index, var, offset,
699                                   component_offset, deref->type);
700          break;
701 
702       case nir_intrinsic_store_deref:
703          lower_store(intrin, state, array_index, var, offset,
704                      component_offset, deref->type);
705          break;
706 
707       case nir_intrinsic_interp_deref_at_centroid:
708       case nir_intrinsic_interp_deref_at_sample:
709       case nir_intrinsic_interp_deref_at_offset:
710       case nir_intrinsic_interp_deref_at_vertex:
711          assert(array_index == NULL);
712          replacement = lower_interpolate_at(intrin, state, var, offset,
713                                             component_offset, deref->type);
714          break;
715 
716       default:
717          continue;
718       }
719 
720       if (replacement) {
721          nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
722                                   replacement);
723       }
724       nir_instr_remove(&intrin->instr);
725       progress = true;
726    }
727 
728    return progress;
729 }
730 
731 static bool
nir_lower_io_impl(nir_function_impl * impl,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)732 nir_lower_io_impl(nir_function_impl *impl,
733                   nir_variable_mode modes,
734                   int (*type_size)(const struct glsl_type *, bool),
735                   nir_lower_io_options options)
736 {
737    struct lower_io_state state;
738    bool progress = false;
739 
740    nir_builder_init(&state.builder, impl);
741    state.dead_ctx = ralloc_context(NULL);
742    state.modes = modes;
743    state.type_size = type_size;
744    state.options = options;
745 
746    ASSERTED nir_variable_mode supported_modes =
747       nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
748    assert(!(modes & ~supported_modes));
749 
750    nir_foreach_block(block, impl) {
751       progress |= nir_lower_io_block(block, &state);
752    }
753 
754    ralloc_free(state.dead_ctx);
755 
756    nir_metadata_preserve(impl, nir_metadata_none);
757 
758    return progress;
759 }
760 
761 /** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
762  *
763  * This pass is intended to be used for cross-stage shader I/O and driver-
764  * managed uniforms to turn deref-based access into a simpler model using
765  * locations or offsets.  For fragment shader inputs, it can optionally turn
766  * load_deref into an explicit interpolation using barycentrics coming from
767  * one of the load_barycentric_* intrinsics.  This pass requires that all
768  * deref chains are complete and contain no casts.
769  */
770 bool
nir_lower_io(nir_shader * shader,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)771 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
772              int (*type_size)(const struct glsl_type *, bool),
773              nir_lower_io_options options)
774 {
775    bool progress = false;
776 
777    nir_foreach_function(function, shader) {
778       if (function->impl) {
779          progress |= nir_lower_io_impl(function->impl, modes,
780                                        type_size, options);
781       }
782    }
783 
784    return progress;
785 }
786 
787 static unsigned
type_scalar_size_bytes(const struct glsl_type * type)788 type_scalar_size_bytes(const struct glsl_type *type)
789 {
790    assert(glsl_type_is_vector_or_scalar(type) ||
791           glsl_type_is_matrix(type));
792    return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
793 }
794 
795 static nir_ssa_def *
build_addr_iadd(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,nir_ssa_def * offset)796 build_addr_iadd(nir_builder *b, nir_ssa_def *addr,
797                 nir_address_format addr_format,
798                 nir_variable_mode modes,
799                 nir_ssa_def *offset)
800 {
801    assert(offset->num_components == 1);
802 
803    switch (addr_format) {
804    case nir_address_format_32bit_global:
805    case nir_address_format_64bit_global:
806    case nir_address_format_32bit_offset:
807       assert(addr->bit_size == offset->bit_size);
808       assert(addr->num_components == 1);
809       return nir_iadd(b, addr, offset);
810 
811    case nir_address_format_32bit_offset_as_64bit:
812       assert(addr->num_components == 1);
813       assert(offset->bit_size == 32);
814       return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset));
815 
816    case nir_address_format_64bit_global_32bit_offset:
817    case nir_address_format_64bit_bounded_global:
818       assert(addr->num_components == 4);
819       assert(addr->bit_size == offset->bit_size);
820       return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 3), offset), 3);
821 
822    case nir_address_format_32bit_index_offset:
823       assert(addr->num_components == 2);
824       assert(addr->bit_size == offset->bit_size);
825       return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 1), offset), 1);
826 
827    case nir_address_format_32bit_index_offset_pack64:
828       assert(addr->num_components == 1);
829       assert(offset->bit_size == 32);
830       return nir_pack_64_2x32_split(b,
831                                     nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset),
832                                     nir_unpack_64_2x32_split_y(b, addr));
833 
834    case nir_address_format_vec2_index_32bit_offset:
835       assert(addr->num_components == 3);
836       assert(offset->bit_size == 32);
837       return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 2), offset), 2);
838 
839    case nir_address_format_62bit_generic:
840       assert(addr->num_components == 1);
841       assert(addr->bit_size == 64);
842       assert(offset->bit_size == 64);
843       if (!(modes & ~(nir_var_function_temp |
844                       nir_var_shader_temp |
845                       nir_var_mem_shared))) {
846          /* If we're sure it's one of these modes, we can do an easy 32-bit
847           * addition and don't need to bother with 64-bit math.
848           */
849          nir_ssa_def *addr32 = nir_unpack_64_2x32_split_x(b, addr);
850          nir_ssa_def *type = nir_unpack_64_2x32_split_y(b, addr);
851          addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset));
852          return nir_pack_64_2x32_split(b, addr32, type);
853       } else {
854          return nir_iadd(b, addr, offset);
855       }
856 
857    case nir_address_format_logical:
858       unreachable("Unsupported address format");
859    }
860    unreachable("Invalid address format");
861 }
862 
863 static unsigned
addr_get_offset_bit_size(nir_ssa_def * addr,nir_address_format addr_format)864 addr_get_offset_bit_size(nir_ssa_def *addr, nir_address_format addr_format)
865 {
866    if (addr_format == nir_address_format_32bit_offset_as_64bit ||
867        addr_format == nir_address_format_32bit_index_offset_pack64)
868       return 32;
869    return addr->bit_size;
870 }
871 
872 static nir_ssa_def *
build_addr_iadd_imm(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,int64_t offset)873 build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
874                     nir_address_format addr_format,
875                     nir_variable_mode modes,
876                     int64_t offset)
877 {
878    return build_addr_iadd(b, addr, addr_format, modes,
879                              nir_imm_intN_t(b, offset,
880                                             addr_get_offset_bit_size(addr, addr_format)));
881 }
882 
883 static nir_ssa_def *
build_addr_for_var(nir_builder * b,nir_variable * var,nir_address_format addr_format)884 build_addr_for_var(nir_builder *b, nir_variable *var,
885                    nir_address_format addr_format)
886 {
887    assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared |
888                             nir_var_mem_task_payload |
889                             nir_var_mem_global |
890                             nir_var_shader_temp | nir_var_function_temp |
891                             nir_var_mem_push_const | nir_var_mem_constant));
892 
893    const unsigned num_comps = nir_address_format_num_components(addr_format);
894    const unsigned bit_size = nir_address_format_bit_size(addr_format);
895 
896    switch (addr_format) {
897    case nir_address_format_32bit_global:
898    case nir_address_format_64bit_global: {
899       nir_ssa_def *base_addr;
900       switch (var->data.mode) {
901       case nir_var_shader_temp:
902          base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0);
903          break;
904 
905       case nir_var_function_temp:
906          base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1);
907          break;
908 
909       case nir_var_mem_constant:
910          base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size);
911          break;
912 
913       case nir_var_mem_shared:
914          base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size);
915          break;
916 
917       case nir_var_mem_global:
918          base_addr = nir_load_global_base_ptr(b, num_comps, bit_size);
919          break;
920 
921       default:
922          unreachable("Unsupported variable mode");
923       }
924 
925       return build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode,
926                                     var->data.driver_location);
927    }
928 
929    case nir_address_format_32bit_offset:
930       assert(var->data.driver_location <= UINT32_MAX);
931       return nir_imm_int(b, var->data.driver_location);
932 
933    case nir_address_format_32bit_offset_as_64bit:
934       assert(var->data.driver_location <= UINT32_MAX);
935       return nir_imm_int64(b, var->data.driver_location);
936 
937    case nir_address_format_62bit_generic:
938       switch (var->data.mode) {
939       case nir_var_shader_temp:
940       case nir_var_function_temp:
941          assert(var->data.driver_location <= UINT32_MAX);
942          return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64);
943 
944       case nir_var_mem_shared:
945          assert(var->data.driver_location <= UINT32_MAX);
946          return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64);
947 
948       case nir_var_mem_global:
949          return nir_iadd_imm(b, nir_load_global_base_ptr(b, num_comps, bit_size),
950                                 var->data.driver_location);
951 
952       default:
953          unreachable("Unsupported variable mode");
954       }
955 
956    default:
957       unreachable("Unsupported address format");
958    }
959 }
960 
961 static nir_ssa_def *
build_runtime_addr_mode_check(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode mode)962 build_runtime_addr_mode_check(nir_builder *b, nir_ssa_def *addr,
963                               nir_address_format addr_format,
964                               nir_variable_mode mode)
965 {
966    /* The compile-time check failed; do a run-time check */
967    switch (addr_format) {
968    case nir_address_format_62bit_generic: {
969       assert(addr->num_components == 1);
970       assert(addr->bit_size == 64);
971       nir_ssa_def *mode_enum = nir_ushr(b, addr, nir_imm_int(b, 62));
972       switch (mode) {
973       case nir_var_function_temp:
974       case nir_var_shader_temp:
975          return nir_ieq_imm(b, mode_enum, 0x2);
976 
977       case nir_var_mem_shared:
978          return nir_ieq_imm(b, mode_enum, 0x1);
979 
980       case nir_var_mem_global:
981          return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0),
982                            nir_ieq_imm(b, mode_enum, 0x3));
983 
984       default:
985          unreachable("Invalid mode check intrinsic");
986       }
987    }
988 
989    default:
990       unreachable("Unsupported address mode");
991    }
992 }
993 
994 unsigned
nir_address_format_bit_size(nir_address_format addr_format)995 nir_address_format_bit_size(nir_address_format addr_format)
996 {
997    switch (addr_format) {
998    case nir_address_format_32bit_global:              return 32;
999    case nir_address_format_64bit_global:              return 64;
1000    case nir_address_format_64bit_global_32bit_offset: return 32;
1001    case nir_address_format_64bit_bounded_global:      return 32;
1002    case nir_address_format_32bit_index_offset:        return 32;
1003    case nir_address_format_32bit_index_offset_pack64: return 64;
1004    case nir_address_format_vec2_index_32bit_offset:   return 32;
1005    case nir_address_format_62bit_generic:             return 64;
1006    case nir_address_format_32bit_offset:              return 32;
1007    case nir_address_format_32bit_offset_as_64bit:     return 64;
1008    case nir_address_format_logical:                   return 32;
1009    }
1010    unreachable("Invalid address format");
1011 }
1012 
1013 unsigned
nir_address_format_num_components(nir_address_format addr_format)1014 nir_address_format_num_components(nir_address_format addr_format)
1015 {
1016    switch (addr_format) {
1017    case nir_address_format_32bit_global:              return 1;
1018    case nir_address_format_64bit_global:              return 1;
1019    case nir_address_format_64bit_global_32bit_offset: return 4;
1020    case nir_address_format_64bit_bounded_global:      return 4;
1021    case nir_address_format_32bit_index_offset:        return 2;
1022    case nir_address_format_32bit_index_offset_pack64: return 1;
1023    case nir_address_format_vec2_index_32bit_offset:   return 3;
1024    case nir_address_format_62bit_generic:             return 1;
1025    case nir_address_format_32bit_offset:              return 1;
1026    case nir_address_format_32bit_offset_as_64bit:     return 1;
1027    case nir_address_format_logical:                   return 1;
1028    }
1029    unreachable("Invalid address format");
1030 }
1031 
1032 static nir_ssa_def *
addr_to_index(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format)1033 addr_to_index(nir_builder *b, nir_ssa_def *addr,
1034               nir_address_format addr_format)
1035 {
1036    switch (addr_format) {
1037    case nir_address_format_32bit_index_offset:
1038       assert(addr->num_components == 2);
1039       return nir_channel(b, addr, 0);
1040    case nir_address_format_32bit_index_offset_pack64:
1041       return nir_unpack_64_2x32_split_y(b, addr);
1042    case nir_address_format_vec2_index_32bit_offset:
1043       assert(addr->num_components == 3);
1044       return nir_channels(b, addr, 0x3);
1045    default: unreachable("Invalid address format");
1046    }
1047 }
1048 
1049 static nir_ssa_def *
addr_to_offset(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format)1050 addr_to_offset(nir_builder *b, nir_ssa_def *addr,
1051                nir_address_format addr_format)
1052 {
1053    switch (addr_format) {
1054    case nir_address_format_32bit_index_offset:
1055       assert(addr->num_components == 2);
1056       return nir_channel(b, addr, 1);
1057    case nir_address_format_32bit_index_offset_pack64:
1058       return nir_unpack_64_2x32_split_x(b, addr);
1059    case nir_address_format_vec2_index_32bit_offset:
1060       assert(addr->num_components == 3);
1061       return nir_channel(b, addr, 2);
1062    case nir_address_format_32bit_offset:
1063       return addr;
1064    case nir_address_format_32bit_offset_as_64bit:
1065    case nir_address_format_62bit_generic:
1066       return nir_u2u32(b, addr);
1067    default:
1068       unreachable("Invalid address format");
1069    }
1070 }
1071 
1072 /** Returns true if the given address format resolves to a global address */
1073 static bool
addr_format_is_global(nir_address_format addr_format,nir_variable_mode mode)1074 addr_format_is_global(nir_address_format addr_format,
1075                       nir_variable_mode mode)
1076 {
1077    if (addr_format == nir_address_format_62bit_generic)
1078       return mode == nir_var_mem_global;
1079 
1080    return addr_format == nir_address_format_32bit_global ||
1081           addr_format == nir_address_format_64bit_global ||
1082           addr_format == nir_address_format_64bit_global_32bit_offset ||
1083           addr_format == nir_address_format_64bit_bounded_global;
1084 }
1085 
1086 static bool
addr_format_is_offset(nir_address_format addr_format,nir_variable_mode mode)1087 addr_format_is_offset(nir_address_format addr_format,
1088                       nir_variable_mode mode)
1089 {
1090    if (addr_format == nir_address_format_62bit_generic)
1091       return mode != nir_var_mem_global;
1092 
1093    return addr_format == nir_address_format_32bit_offset ||
1094           addr_format == nir_address_format_32bit_offset_as_64bit;
1095 }
1096 
1097 static nir_ssa_def *
addr_to_global(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format)1098 addr_to_global(nir_builder *b, nir_ssa_def *addr,
1099                nir_address_format addr_format)
1100 {
1101    switch (addr_format) {
1102    case nir_address_format_32bit_global:
1103    case nir_address_format_64bit_global:
1104    case nir_address_format_62bit_generic:
1105       assert(addr->num_components == 1);
1106       return addr;
1107 
1108    case nir_address_format_64bit_global_32bit_offset:
1109    case nir_address_format_64bit_bounded_global:
1110       assert(addr->num_components == 4);
1111       return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)),
1112                          nir_u2u64(b, nir_channel(b, addr, 3)));
1113 
1114    case nir_address_format_32bit_index_offset:
1115    case nir_address_format_32bit_index_offset_pack64:
1116    case nir_address_format_vec2_index_32bit_offset:
1117    case nir_address_format_32bit_offset:
1118    case nir_address_format_32bit_offset_as_64bit:
1119    case nir_address_format_logical:
1120       unreachable("Cannot get a 64-bit address with this address format");
1121    }
1122 
1123    unreachable("Invalid address format");
1124 }
1125 
1126 static bool
addr_format_needs_bounds_check(nir_address_format addr_format)1127 addr_format_needs_bounds_check(nir_address_format addr_format)
1128 {
1129    return addr_format == nir_address_format_64bit_bounded_global;
1130 }
1131 
1132 static nir_ssa_def *
addr_is_in_bounds(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,unsigned size)1133 addr_is_in_bounds(nir_builder *b, nir_ssa_def *addr,
1134                   nir_address_format addr_format, unsigned size)
1135 {
1136    assert(addr_format == nir_address_format_64bit_bounded_global);
1137    assert(addr->num_components == 4);
1138    return nir_ige(b, nir_channel(b, addr, 2),
1139                      nir_iadd_imm(b, nir_channel(b, addr, 3), size));
1140 }
1141 
1142 static void
nir_get_explicit_deref_range(nir_deref_instr * deref,nir_address_format addr_format,uint32_t * out_base,uint32_t * out_range)1143 nir_get_explicit_deref_range(nir_deref_instr *deref,
1144                              nir_address_format addr_format,
1145                              uint32_t *out_base,
1146                              uint32_t *out_range)
1147 {
1148    uint32_t base = 0;
1149    uint32_t range = glsl_get_explicit_size(deref->type, false);
1150 
1151    while (true) {
1152       nir_deref_instr *parent = nir_deref_instr_parent(deref);
1153 
1154       switch (deref->deref_type) {
1155       case nir_deref_type_array:
1156       case nir_deref_type_array_wildcard:
1157       case nir_deref_type_ptr_as_array: {
1158          const unsigned stride = nir_deref_instr_array_stride(deref);
1159          if (stride == 0)
1160             goto fail;
1161 
1162          if (!parent)
1163             goto fail;
1164 
1165          if (deref->deref_type != nir_deref_type_array_wildcard &&
1166              nir_src_is_const(deref->arr.index)) {
1167             base += stride * nir_src_as_uint(deref->arr.index);
1168          } else {
1169             if (glsl_get_length(parent->type) == 0)
1170                goto fail;
1171             range += stride * (glsl_get_length(parent->type) - 1);
1172          }
1173          break;
1174       }
1175 
1176       case nir_deref_type_struct: {
1177          if (!parent)
1178             goto fail;
1179 
1180          base += glsl_get_struct_field_offset(parent->type, deref->strct.index);
1181          break;
1182       }
1183 
1184       case nir_deref_type_cast: {
1185          nir_instr *parent_instr = deref->parent.ssa->parent_instr;
1186 
1187          switch (parent_instr->type) {
1188          case nir_instr_type_load_const: {
1189             nir_load_const_instr *load = nir_instr_as_load_const(parent_instr);
1190 
1191             switch (addr_format) {
1192             case nir_address_format_32bit_offset:
1193                base += load->value[1].u32;
1194                break;
1195             case nir_address_format_32bit_index_offset:
1196                base += load->value[1].u32;
1197                break;
1198             case nir_address_format_vec2_index_32bit_offset:
1199                base += load->value[2].u32;
1200                break;
1201             default:
1202                goto fail;
1203             }
1204 
1205             *out_base = base;
1206             *out_range = range;
1207             return;
1208          }
1209 
1210          case nir_instr_type_intrinsic: {
1211             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr);
1212             switch (intr->intrinsic) {
1213             case nir_intrinsic_load_vulkan_descriptor:
1214                /* Assume that a load_vulkan_descriptor won't contribute to an
1215                 * offset within the resource.
1216                 */
1217                break;
1218             default:
1219                goto fail;
1220             }
1221 
1222             *out_base = base;
1223             *out_range = range;
1224             return;
1225          }
1226 
1227          default:
1228             goto fail;
1229          }
1230       }
1231 
1232       default:
1233          goto fail;
1234       }
1235 
1236       deref = parent;
1237    }
1238 
1239 fail:
1240    *out_base = 0;
1241    *out_range = ~0;
1242 }
1243 
1244 static nir_variable_mode
canonicalize_generic_modes(nir_variable_mode modes)1245 canonicalize_generic_modes(nir_variable_mode modes)
1246 {
1247    assert(modes != 0);
1248    if (util_bitcount(modes) == 1)
1249       return modes;
1250 
1251    assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp |
1252                       nir_var_mem_shared | nir_var_mem_global)));
1253 
1254    /* Canonicalize by converting shader_temp to function_temp */
1255    if (modes & nir_var_shader_temp) {
1256       modes &= ~nir_var_shader_temp;
1257       modes |= nir_var_function_temp;
1258    }
1259 
1260    return modes;
1261 }
1262 
1263 static nir_ssa_def *
build_explicit_io_load(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,unsigned num_components)1264 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
1265                        nir_ssa_def *addr, nir_address_format addr_format,
1266                        nir_variable_mode modes,
1267                        uint32_t align_mul, uint32_t align_offset,
1268                        unsigned num_components)
1269 {
1270    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1271    modes = canonicalize_generic_modes(modes);
1272 
1273    if (util_bitcount(modes) > 1) {
1274       if (addr_format_is_global(addr_format, modes)) {
1275          return build_explicit_io_load(b, intrin, addr, addr_format,
1276                                        nir_var_mem_global,
1277                                        align_mul, align_offset,
1278                                        num_components);
1279       } else if (modes & nir_var_function_temp) {
1280          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1281                                                       nir_var_function_temp));
1282          nir_ssa_def *res1 =
1283             build_explicit_io_load(b, intrin, addr, addr_format,
1284                                    nir_var_function_temp,
1285                                    align_mul, align_offset,
1286                                    num_components);
1287          nir_push_else(b, NULL);
1288          nir_ssa_def *res2 =
1289             build_explicit_io_load(b, intrin, addr, addr_format,
1290                                    modes & ~nir_var_function_temp,
1291                                    align_mul, align_offset,
1292                                    num_components);
1293          nir_pop_if(b, NULL);
1294          return nir_if_phi(b, res1, res2);
1295       } else {
1296          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1297                                                       nir_var_mem_shared));
1298          assert(modes & nir_var_mem_shared);
1299          nir_ssa_def *res1 =
1300             build_explicit_io_load(b, intrin, addr, addr_format,
1301                                    nir_var_mem_shared,
1302                                    align_mul, align_offset,
1303                                    num_components);
1304          nir_push_else(b, NULL);
1305          assert(modes & nir_var_mem_global);
1306          nir_ssa_def *res2 =
1307             build_explicit_io_load(b, intrin, addr, addr_format,
1308                                    nir_var_mem_global,
1309                                    align_mul, align_offset,
1310                                    num_components);
1311          nir_pop_if(b, NULL);
1312          return nir_if_phi(b, res1, res2);
1313       }
1314    }
1315 
1316    assert(util_bitcount(modes) == 1);
1317    const nir_variable_mode mode = modes;
1318 
1319    nir_intrinsic_op op;
1320    switch (intrin->intrinsic) {
1321    case nir_intrinsic_load_deref:
1322       switch (mode) {
1323       case nir_var_mem_ubo:
1324          if (addr_format == nir_address_format_64bit_global_32bit_offset)
1325             op = nir_intrinsic_load_global_constant_offset;
1326          else if (addr_format == nir_address_format_64bit_bounded_global)
1327             op = nir_intrinsic_load_global_constant_bounded;
1328          else if (addr_format_is_global(addr_format, mode))
1329             op = nir_intrinsic_load_global_constant;
1330          else
1331             op = nir_intrinsic_load_ubo;
1332          break;
1333       case nir_var_mem_ssbo:
1334          if (addr_format_is_global(addr_format, mode))
1335             op = nir_intrinsic_load_global;
1336          else
1337             op = nir_intrinsic_load_ssbo;
1338          break;
1339       case nir_var_mem_global:
1340          assert(addr_format_is_global(addr_format, mode));
1341          op = nir_intrinsic_load_global;
1342          break;
1343       case nir_var_uniform:
1344          assert(addr_format_is_offset(addr_format, mode));
1345          assert(b->shader->info.stage == MESA_SHADER_KERNEL);
1346          op = nir_intrinsic_load_kernel_input;
1347          break;
1348       case nir_var_mem_shared:
1349          assert(addr_format_is_offset(addr_format, mode));
1350          op = nir_intrinsic_load_shared;
1351          break;
1352       case nir_var_mem_task_payload:
1353          assert(addr_format_is_offset(addr_format, mode));
1354          op = nir_intrinsic_load_task_payload;
1355          break;
1356       case nir_var_shader_temp:
1357       case nir_var_function_temp:
1358          if (addr_format_is_offset(addr_format, mode)) {
1359             op = nir_intrinsic_load_scratch;
1360          } else {
1361             assert(addr_format_is_global(addr_format, mode));
1362             op = nir_intrinsic_load_global;
1363          }
1364          break;
1365       case nir_var_mem_push_const:
1366          assert(addr_format == nir_address_format_32bit_offset);
1367          op = nir_intrinsic_load_push_constant;
1368          break;
1369       case nir_var_mem_constant:
1370          if (addr_format_is_offset(addr_format, mode)) {
1371             op = nir_intrinsic_load_constant;
1372          } else {
1373             assert(addr_format_is_global(addr_format, mode));
1374             op = nir_intrinsic_load_global_constant;
1375          }
1376          break;
1377       default:
1378          unreachable("Unsupported explicit IO variable mode");
1379       }
1380       break;
1381 
1382    case nir_intrinsic_load_deref_block_intel:
1383       switch (mode) {
1384       case nir_var_mem_ssbo:
1385          if (addr_format_is_global(addr_format, mode))
1386             op = nir_intrinsic_load_global_block_intel;
1387          else
1388             op = nir_intrinsic_load_ssbo_block_intel;
1389          break;
1390       case nir_var_mem_global:
1391          op = nir_intrinsic_load_global_block_intel;
1392          break;
1393       case nir_var_mem_shared:
1394          op = nir_intrinsic_load_shared_block_intel;
1395          break;
1396       default:
1397          unreachable("Unsupported explicit IO variable mode");
1398       }
1399       break;
1400 
1401    default:
1402       unreachable("Invalid intrinsic");
1403    }
1404 
1405    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
1406 
1407    if (op == nir_intrinsic_load_global_constant_offset) {
1408       assert(addr_format == nir_address_format_64bit_global_32bit_offset);
1409       load->src[0] = nir_src_for_ssa(
1410          nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)));
1411       load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1412    } else if (op == nir_intrinsic_load_global_constant_bounded) {
1413       assert(addr_format == nir_address_format_64bit_bounded_global);
1414       load->src[0] = nir_src_for_ssa(
1415          nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)));
1416       load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1417       load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2));
1418    } else if (addr_format_is_global(addr_format, mode)) {
1419       load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1420    } else if (addr_format_is_offset(addr_format, mode)) {
1421       assert(addr->num_components == 1);
1422       load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1423    } else {
1424       load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1425       load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1426    }
1427 
1428    if (nir_intrinsic_has_access(load))
1429       nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
1430 
1431    if (op == nir_intrinsic_load_constant) {
1432       nir_intrinsic_set_base(load, 0);
1433       nir_intrinsic_set_range(load, b->shader->constant_data_size);
1434    } else if (mode == nir_var_mem_push_const) {
1435       /* Push constants are required to be able to be chased back to the
1436        * variable so we can provide a base/range.
1437        */
1438       nir_variable *var = nir_deref_instr_get_variable(deref);
1439       nir_intrinsic_set_base(load, 0);
1440       nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false));
1441    }
1442 
1443    unsigned bit_size = intrin->dest.ssa.bit_size;
1444    if (bit_size == 1) {
1445       /* TODO: Make the native bool bit_size an option. */
1446       bit_size = 32;
1447    }
1448 
1449    if (nir_intrinsic_has_align(load))
1450       nir_intrinsic_set_align(load, align_mul, align_offset);
1451 
1452    if (nir_intrinsic_has_range_base(load)) {
1453       unsigned base, range;
1454       nir_get_explicit_deref_range(deref, addr_format, &base, &range);
1455       nir_intrinsic_set_range_base(load, base);
1456       nir_intrinsic_set_range(load, range);
1457    }
1458 
1459    assert(intrin->dest.is_ssa);
1460    load->num_components = num_components;
1461    nir_ssa_dest_init(&load->instr, &load->dest, num_components,
1462                      bit_size, NULL);
1463 
1464    assert(bit_size % 8 == 0);
1465 
1466    nir_ssa_def *result;
1467    if (addr_format_needs_bounds_check(addr_format) &&
1468        op != nir_intrinsic_load_global_constant_bounded) {
1469       /* We don't need to bounds-check global_constant_bounded because bounds
1470        * checking is handled by the intrinsic itself.
1471        *
1472        * The Vulkan spec for robustBufferAccess gives us quite a few options
1473        * as to what we can do with an OOB read.  Unfortunately, returning
1474        * undefined values isn't one of them so we return an actual zero.
1475        */
1476       nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
1477 
1478       /* TODO: Better handle block_intel. */
1479       const unsigned load_size = (bit_size / 8) * load->num_components;
1480       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
1481 
1482       nir_builder_instr_insert(b, &load->instr);
1483 
1484       nir_pop_if(b, NULL);
1485 
1486       result = nir_if_phi(b, &load->dest.ssa, zero);
1487    } else {
1488       nir_builder_instr_insert(b, &load->instr);
1489       result = &load->dest.ssa;
1490    }
1491 
1492    if (intrin->dest.ssa.bit_size == 1) {
1493       /* For shared, we can go ahead and use NIR's and/or the back-end's
1494        * standard encoding for booleans rather than forcing a 0/1 boolean.
1495        * This should save an instruction or two.
1496        */
1497       if (mode == nir_var_mem_shared ||
1498           mode == nir_var_shader_temp ||
1499           mode == nir_var_function_temp)
1500          result = nir_b2b1(b, result);
1501       else
1502          result = nir_i2b(b, result);
1503    }
1504 
1505    return result;
1506 }
1507 
1508 static void
build_explicit_io_store(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,nir_ssa_def * value,nir_component_mask_t write_mask)1509 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
1510                         nir_ssa_def *addr, nir_address_format addr_format,
1511                         nir_variable_mode modes,
1512                         uint32_t align_mul, uint32_t align_offset,
1513                         nir_ssa_def *value, nir_component_mask_t write_mask)
1514 {
1515    modes = canonicalize_generic_modes(modes);
1516 
1517    if (util_bitcount(modes) > 1) {
1518       if (addr_format_is_global(addr_format, modes)) {
1519          build_explicit_io_store(b, intrin, addr, addr_format,
1520                                  nir_var_mem_global,
1521                                  align_mul, align_offset,
1522                                  value, write_mask);
1523       } else if (modes & nir_var_function_temp) {
1524          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1525                                                       nir_var_function_temp));
1526          build_explicit_io_store(b, intrin, addr, addr_format,
1527                                  nir_var_function_temp,
1528                                  align_mul, align_offset,
1529                                  value, write_mask);
1530          nir_push_else(b, NULL);
1531          build_explicit_io_store(b, intrin, addr, addr_format,
1532                                  modes & ~nir_var_function_temp,
1533                                  align_mul, align_offset,
1534                                  value, write_mask);
1535          nir_pop_if(b, NULL);
1536       } else {
1537          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1538                                                       nir_var_mem_shared));
1539          assert(modes & nir_var_mem_shared);
1540          build_explicit_io_store(b, intrin, addr, addr_format,
1541                                  nir_var_mem_shared,
1542                                  align_mul, align_offset,
1543                                  value, write_mask);
1544          nir_push_else(b, NULL);
1545          assert(modes & nir_var_mem_global);
1546          build_explicit_io_store(b, intrin, addr, addr_format,
1547                                  nir_var_mem_global,
1548                                  align_mul, align_offset,
1549                                  value, write_mask);
1550          nir_pop_if(b, NULL);
1551       }
1552       return;
1553    }
1554 
1555    assert(util_bitcount(modes) == 1);
1556    const nir_variable_mode mode = modes;
1557 
1558    nir_intrinsic_op op;
1559    switch (intrin->intrinsic) {
1560    case nir_intrinsic_store_deref:
1561       assert(write_mask != 0);
1562 
1563       switch (mode) {
1564       case nir_var_mem_ssbo:
1565          if (addr_format_is_global(addr_format, mode))
1566             op = nir_intrinsic_store_global;
1567          else
1568             op = nir_intrinsic_store_ssbo;
1569          break;
1570       case nir_var_mem_global:
1571          assert(addr_format_is_global(addr_format, mode));
1572          op = nir_intrinsic_store_global;
1573          break;
1574       case nir_var_mem_shared:
1575          assert(addr_format_is_offset(addr_format, mode));
1576          op = nir_intrinsic_store_shared;
1577          break;
1578       case nir_var_mem_task_payload:
1579          assert(addr_format_is_offset(addr_format, mode));
1580          op = nir_intrinsic_store_task_payload;
1581          break;
1582       case nir_var_shader_temp:
1583       case nir_var_function_temp:
1584          if (addr_format_is_offset(addr_format, mode)) {
1585             op = nir_intrinsic_store_scratch;
1586          } else {
1587             assert(addr_format_is_global(addr_format, mode));
1588             op = nir_intrinsic_store_global;
1589          }
1590          break;
1591       default:
1592          unreachable("Unsupported explicit IO variable mode");
1593       }
1594       break;
1595 
1596    case nir_intrinsic_store_deref_block_intel:
1597       assert(write_mask == 0);
1598 
1599       switch (mode) {
1600       case nir_var_mem_ssbo:
1601          if (addr_format_is_global(addr_format, mode))
1602             op = nir_intrinsic_store_global_block_intel;
1603          else
1604             op = nir_intrinsic_store_ssbo_block_intel;
1605          break;
1606       case nir_var_mem_global:
1607          op = nir_intrinsic_store_global_block_intel;
1608          break;
1609       case nir_var_mem_shared:
1610          op = nir_intrinsic_store_shared_block_intel;
1611          break;
1612       default:
1613          unreachable("Unsupported explicit IO variable mode");
1614       }
1615       break;
1616 
1617    default:
1618       unreachable("Invalid intrinsic");
1619    }
1620 
1621    nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
1622 
1623    if (value->bit_size == 1) {
1624       /* For shared, we can go ahead and use NIR's and/or the back-end's
1625        * standard encoding for booleans rather than forcing a 0/1 boolean.
1626        * This should save an instruction or two.
1627        *
1628        * TODO: Make the native bool bit_size an option.
1629        */
1630       if (mode == nir_var_mem_shared ||
1631           mode == nir_var_shader_temp ||
1632           mode == nir_var_function_temp)
1633          value = nir_b2b32(b, value);
1634       else
1635          value = nir_b2i(b, value, 32);
1636    }
1637 
1638    store->src[0] = nir_src_for_ssa(value);
1639    if (addr_format_is_global(addr_format, mode)) {
1640       store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1641    } else if (addr_format_is_offset(addr_format, mode)) {
1642       assert(addr->num_components == 1);
1643       store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1644    } else {
1645       store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1646       store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1647    }
1648 
1649    nir_intrinsic_set_write_mask(store, write_mask);
1650 
1651    if (nir_intrinsic_has_access(store))
1652       nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
1653 
1654    nir_intrinsic_set_align(store, align_mul, align_offset);
1655 
1656    assert(value->num_components == 1 ||
1657           value->num_components == intrin->num_components);
1658    store->num_components = value->num_components;
1659 
1660    assert(value->bit_size % 8 == 0);
1661 
1662    if (addr_format_needs_bounds_check(addr_format)) {
1663       /* TODO: Better handle block_intel. */
1664       const unsigned store_size = (value->bit_size / 8) * store->num_components;
1665       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
1666 
1667       nir_builder_instr_insert(b, &store->instr);
1668 
1669       nir_pop_if(b, NULL);
1670    } else {
1671       nir_builder_instr_insert(b, &store->instr);
1672    }
1673 }
1674 
1675 static nir_ssa_def *
build_explicit_io_atomic(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes)1676 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
1677                          nir_ssa_def *addr, nir_address_format addr_format,
1678                          nir_variable_mode modes)
1679 {
1680    modes = canonicalize_generic_modes(modes);
1681 
1682    if (util_bitcount(modes) > 1) {
1683       if (addr_format_is_global(addr_format, modes)) {
1684          return build_explicit_io_atomic(b, intrin, addr, addr_format,
1685                                          nir_var_mem_global);
1686       } else if (modes & nir_var_function_temp) {
1687          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1688                                                       nir_var_function_temp));
1689          nir_ssa_def *res1 =
1690             build_explicit_io_atomic(b, intrin, addr, addr_format,
1691                                      nir_var_function_temp);
1692          nir_push_else(b, NULL);
1693          nir_ssa_def *res2 =
1694             build_explicit_io_atomic(b, intrin, addr, addr_format,
1695                                      modes & ~nir_var_function_temp);
1696          nir_pop_if(b, NULL);
1697          return nir_if_phi(b, res1, res2);
1698       } else {
1699          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1700                                                       nir_var_mem_shared));
1701          assert(modes & nir_var_mem_shared);
1702          nir_ssa_def *res1 =
1703             build_explicit_io_atomic(b, intrin, addr, addr_format,
1704                                      nir_var_mem_shared);
1705          nir_push_else(b, NULL);
1706          assert(modes & nir_var_mem_global);
1707          nir_ssa_def *res2 =
1708             build_explicit_io_atomic(b, intrin, addr, addr_format,
1709                                      nir_var_mem_global);
1710          nir_pop_if(b, NULL);
1711          return nir_if_phi(b, res1, res2);
1712       }
1713    }
1714 
1715    assert(util_bitcount(modes) == 1);
1716    const nir_variable_mode mode = modes;
1717 
1718    const unsigned num_data_srcs =
1719       nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
1720 
1721    nir_intrinsic_op op;
1722    switch (mode) {
1723    case nir_var_mem_ssbo:
1724       if (addr_format_is_global(addr_format, mode))
1725          op = global_atomic_for_deref(intrin->intrinsic);
1726       else
1727          op = ssbo_atomic_for_deref(intrin->intrinsic);
1728       break;
1729    case nir_var_mem_global:
1730       assert(addr_format_is_global(addr_format, mode));
1731       op = global_atomic_for_deref(intrin->intrinsic);
1732       break;
1733    case nir_var_mem_shared:
1734       assert(addr_format_is_offset(addr_format, mode));
1735       op = shared_atomic_for_deref(intrin->intrinsic);
1736       break;
1737    default:
1738       unreachable("Unsupported explicit IO variable mode");
1739    }
1740 
1741    nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
1742 
1743    unsigned src = 0;
1744    if (addr_format_is_global(addr_format, mode)) {
1745       atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1746    } else if (addr_format_is_offset(addr_format, mode)) {
1747       assert(addr->num_components == 1);
1748       atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1749    } else {
1750       atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1751       atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1752    }
1753    for (unsigned i = 0; i < num_data_srcs; i++) {
1754       atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
1755    }
1756 
1757    /* Global atomics don't have access flags because they assume that the
1758     * address may be non-uniform.
1759     */
1760    if (nir_intrinsic_has_access(atomic))
1761       nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
1762 
1763    assert(intrin->dest.ssa.num_components == 1);
1764    nir_ssa_dest_init(&atomic->instr, &atomic->dest,
1765                      1, intrin->dest.ssa.bit_size, NULL);
1766 
1767    assert(atomic->dest.ssa.bit_size % 8 == 0);
1768 
1769    if (addr_format_needs_bounds_check(addr_format)) {
1770       const unsigned atomic_size = atomic->dest.ssa.bit_size / 8;
1771       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
1772 
1773       nir_builder_instr_insert(b, &atomic->instr);
1774 
1775       nir_pop_if(b, NULL);
1776       return nir_if_phi(b, &atomic->dest.ssa,
1777                            nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size));
1778    } else {
1779       nir_builder_instr_insert(b, &atomic->instr);
1780       return &atomic->dest.ssa;
1781    }
1782 }
1783 
1784 nir_ssa_def *
nir_explicit_io_address_from_deref(nir_builder * b,nir_deref_instr * deref,nir_ssa_def * base_addr,nir_address_format addr_format)1785 nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
1786                                    nir_ssa_def *base_addr,
1787                                    nir_address_format addr_format)
1788 {
1789    assert(deref->dest.is_ssa);
1790    switch (deref->deref_type) {
1791    case nir_deref_type_var:
1792       return build_addr_for_var(b, deref->var, addr_format);
1793 
1794    case nir_deref_type_array: {
1795       unsigned stride = nir_deref_instr_array_stride(deref);
1796       assert(stride > 0);
1797 
1798       nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
1799       index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format));
1800       return build_addr_iadd(b, base_addr, addr_format, deref->modes,
1801                                 nir_amul_imm(b, index, stride));
1802    }
1803 
1804    case nir_deref_type_ptr_as_array: {
1805       nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
1806       index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format));
1807       unsigned stride = nir_deref_instr_array_stride(deref);
1808       return build_addr_iadd(b, base_addr, addr_format, deref->modes,
1809                                 nir_amul_imm(b, index, stride));
1810    }
1811 
1812    case nir_deref_type_array_wildcard:
1813       unreachable("Wildcards should be lowered by now");
1814       break;
1815 
1816    case nir_deref_type_struct: {
1817       nir_deref_instr *parent = nir_deref_instr_parent(deref);
1818       int offset = glsl_get_struct_field_offset(parent->type,
1819                                                 deref->strct.index);
1820       assert(offset >= 0);
1821       return build_addr_iadd_imm(b, base_addr, addr_format,
1822                                  deref->modes, offset);
1823    }
1824 
1825    case nir_deref_type_cast:
1826       /* Nothing to do here */
1827       return base_addr;
1828    }
1829 
1830    unreachable("Invalid NIR deref type");
1831 }
1832 
1833 void
nir_lower_explicit_io_instr(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format)1834 nir_lower_explicit_io_instr(nir_builder *b,
1835                             nir_intrinsic_instr *intrin,
1836                             nir_ssa_def *addr,
1837                             nir_address_format addr_format)
1838 {
1839    b->cursor = nir_after_instr(&intrin->instr);
1840 
1841    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1842    unsigned vec_stride = glsl_get_explicit_stride(deref->type);
1843    unsigned scalar_size = type_scalar_size_bytes(deref->type);
1844    assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
1845    assert(vec_stride == 0 || vec_stride >= scalar_size);
1846 
1847    uint32_t align_mul, align_offset;
1848    if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) {
1849       /* If we don't have an alignment from the deref, assume scalar */
1850       align_mul = scalar_size;
1851       align_offset = 0;
1852    }
1853 
1854    switch (intrin->intrinsic) {
1855    case nir_intrinsic_load_deref: {
1856       nir_ssa_def *value;
1857       if (vec_stride > scalar_size) {
1858          nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { NULL, };
1859          for (unsigned i = 0; i < intrin->num_components; i++) {
1860             unsigned comp_offset = i * vec_stride;
1861             nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1862                                                          deref->modes,
1863                                                          comp_offset);
1864             comps[i] = build_explicit_io_load(b, intrin, comp_addr,
1865                                               addr_format, deref->modes,
1866                                               align_mul,
1867                                               (align_offset + comp_offset) %
1868                                                  align_mul,
1869                                               1);
1870          }
1871          value = nir_vec(b, comps, intrin->num_components);
1872       } else {
1873          value = build_explicit_io_load(b, intrin, addr, addr_format,
1874                                         deref->modes, align_mul, align_offset,
1875                                         intrin->num_components);
1876       }
1877       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
1878       break;
1879    }
1880 
1881    case nir_intrinsic_store_deref: {
1882       assert(intrin->src[1].is_ssa);
1883       nir_ssa_def *value = intrin->src[1].ssa;
1884       nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
1885       if (vec_stride > scalar_size) {
1886          for (unsigned i = 0; i < intrin->num_components; i++) {
1887             if (!(write_mask & (1 << i)))
1888                continue;
1889 
1890             unsigned comp_offset = i * vec_stride;
1891             nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1892                                                          deref->modes,
1893                                                          comp_offset);
1894             build_explicit_io_store(b, intrin, comp_addr, addr_format,
1895                                     deref->modes, align_mul,
1896                                     (align_offset + comp_offset) % align_mul,
1897                                     nir_channel(b, value, i), 1);
1898          }
1899       } else {
1900          build_explicit_io_store(b, intrin, addr, addr_format,
1901                                  deref->modes, align_mul, align_offset,
1902                                  value, write_mask);
1903       }
1904       break;
1905    }
1906 
1907    case nir_intrinsic_load_deref_block_intel: {
1908       nir_ssa_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
1909                                                   deref->modes,
1910                                                   align_mul, align_offset,
1911                                                   intrin->num_components);
1912       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
1913       break;
1914    }
1915 
1916    case nir_intrinsic_store_deref_block_intel: {
1917       assert(intrin->src[1].is_ssa);
1918       nir_ssa_def *value = intrin->src[1].ssa;
1919       const nir_component_mask_t write_mask = 0;
1920       build_explicit_io_store(b, intrin, addr, addr_format,
1921                               deref->modes, align_mul, align_offset,
1922                               value, write_mask);
1923       break;
1924    }
1925 
1926    default: {
1927       nir_ssa_def *value =
1928          build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
1929       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
1930       break;
1931    }
1932    }
1933 
1934    nir_instr_remove(&intrin->instr);
1935 }
1936 
1937 bool
nir_get_explicit_deref_align(nir_deref_instr * deref,bool default_to_type_align,uint32_t * align_mul,uint32_t * align_offset)1938 nir_get_explicit_deref_align(nir_deref_instr *deref,
1939                              bool default_to_type_align,
1940                              uint32_t *align_mul,
1941                              uint32_t *align_offset)
1942 {
1943    if (deref->deref_type == nir_deref_type_var) {
1944       /* If we see a variable, align_mul is effectively infinite because we
1945        * know the offset exactly (up to the offset of the base pointer for the
1946        * given variable mode).   We have to pick something so we choose 256B
1947        * as an arbitrary alignment which seems high enough for any reasonable
1948        * wide-load use-case.  Back-ends should clamp alignments down if 256B
1949        * is too large for some reason.
1950        */
1951       *align_mul = 256;
1952       *align_offset = deref->var->data.driver_location % 256;
1953       return true;
1954    }
1955 
1956    /* If we're a cast deref that has an alignment, use that. */
1957    if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) {
1958       *align_mul = deref->cast.align_mul;
1959       *align_offset = deref->cast.align_offset;
1960       return true;
1961    }
1962 
1963    /* Otherwise, we need to compute the alignment based on the parent */
1964    nir_deref_instr *parent = nir_deref_instr_parent(deref);
1965    if (parent == NULL) {
1966       assert(deref->deref_type == nir_deref_type_cast);
1967       if (default_to_type_align) {
1968          /* If we don't have a parent, assume the type's alignment, if any. */
1969          unsigned type_align = glsl_get_explicit_alignment(deref->type);
1970          if (type_align == 0)
1971             return false;
1972 
1973          *align_mul = type_align;
1974          *align_offset = 0;
1975          return true;
1976       } else {
1977          return false;
1978       }
1979    }
1980 
1981    uint32_t parent_mul, parent_offset;
1982    if (!nir_get_explicit_deref_align(parent, default_to_type_align,
1983                                      &parent_mul, &parent_offset))
1984       return false;
1985 
1986    switch (deref->deref_type) {
1987    case nir_deref_type_var:
1988       unreachable("Handled above");
1989 
1990    case nir_deref_type_array:
1991    case nir_deref_type_array_wildcard:
1992    case nir_deref_type_ptr_as_array: {
1993       const unsigned stride = nir_deref_instr_array_stride(deref);
1994       if (stride == 0)
1995          return false;
1996 
1997       if (deref->deref_type != nir_deref_type_array_wildcard &&
1998           nir_src_is_const(deref->arr.index)) {
1999          unsigned offset = nir_src_as_uint(deref->arr.index) * stride;
2000          *align_mul = parent_mul;
2001          *align_offset = (parent_offset + offset) % parent_mul;
2002       } else {
2003          /* If this is a wildcard or an indirect deref, we have to go with the
2004           * power-of-two gcd.
2005           */
2006          *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1));
2007          *align_offset = parent_offset % *align_mul;
2008       }
2009       return true;
2010    }
2011 
2012    case nir_deref_type_struct: {
2013       const int offset = glsl_get_struct_field_offset(parent->type,
2014                                                       deref->strct.index);
2015       if (offset < 0)
2016          return false;
2017 
2018       *align_mul = parent_mul;
2019       *align_offset = (parent_offset + offset) % parent_mul;
2020       return true;
2021    }
2022 
2023    case nir_deref_type_cast:
2024       /* We handled the explicit alignment case above. */
2025       assert(deref->cast.align_mul == 0);
2026       *align_mul = parent_mul;
2027       *align_offset = parent_offset;
2028       return true;
2029    }
2030 
2031    unreachable("Invalid deref_instr_type");
2032 }
2033 
2034 static void
lower_explicit_io_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format)2035 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
2036                         nir_address_format addr_format)
2037 {
2038    /* Just delete the deref if it's not used.  We can't use
2039     * nir_deref_instr_remove_if_unused here because it may remove more than
2040     * one deref which could break our list walking since we walk the list
2041     * backwards.
2042     */
2043    assert(list_is_empty(&deref->dest.ssa.if_uses));
2044    if (list_is_empty(&deref->dest.ssa.uses)) {
2045       nir_instr_remove(&deref->instr);
2046       return;
2047    }
2048 
2049    b->cursor = nir_after_instr(&deref->instr);
2050 
2051    nir_ssa_def *base_addr = NULL;
2052    if (deref->deref_type != nir_deref_type_var) {
2053       assert(deref->parent.is_ssa);
2054       base_addr = deref->parent.ssa;
2055    }
2056 
2057    nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
2058                                                           addr_format);
2059    assert(addr->bit_size == deref->dest.ssa.bit_size);
2060    assert(addr->num_components == deref->dest.ssa.num_components);
2061 
2062    nir_instr_remove(&deref->instr);
2063    nir_ssa_def_rewrite_uses(&deref->dest.ssa, addr);
2064 }
2065 
2066 static void
lower_explicit_io_access(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2067 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
2068                          nir_address_format addr_format)
2069 {
2070    assert(intrin->src[0].is_ssa);
2071    nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
2072 }
2073 
2074 static void
lower_explicit_io_array_length(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2075 lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
2076                                nir_address_format addr_format)
2077 {
2078    b->cursor = nir_after_instr(&intrin->instr);
2079 
2080    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2081 
2082    assert(glsl_type_is_array(deref->type));
2083    assert(glsl_get_length(deref->type) == 0);
2084    assert(nir_deref_mode_is(deref, nir_var_mem_ssbo));
2085    unsigned stride = glsl_get_explicit_stride(deref->type);
2086    assert(stride > 0);
2087 
2088    nir_ssa_def *addr = &deref->dest.ssa;
2089    nir_ssa_def *index = addr_to_index(b, addr, addr_format);
2090    nir_ssa_def *offset = addr_to_offset(b, addr, addr_format);
2091    unsigned access = nir_intrinsic_access(intrin);
2092 
2093    nir_ssa_def *arr_size = nir_get_ssbo_size(b, index, .access=access);
2094    arr_size = nir_imax(b, nir_isub(b, arr_size, offset), nir_imm_int(b, 0u));
2095    arr_size = nir_idiv(b, arr_size, nir_imm_int(b, stride));
2096 
2097    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, arr_size);
2098    nir_instr_remove(&intrin->instr);
2099 }
2100 
2101 static void
lower_explicit_io_mode_check(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2102 lower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin,
2103                              nir_address_format addr_format)
2104 {
2105    if (addr_format_is_global(addr_format, 0)) {
2106       /* If the address format is always global, then the driver can use
2107        * global addresses regardless of the mode.  In that case, don't create
2108        * a check, just whack the intrinsic to addr_mode_is and delegate to the
2109        * driver lowering.
2110        */
2111       intrin->intrinsic = nir_intrinsic_addr_mode_is;
2112       return;
2113    }
2114 
2115    assert(intrin->src[0].is_ssa);
2116    nir_ssa_def *addr = intrin->src[0].ssa;
2117 
2118    b->cursor = nir_instr_remove(&intrin->instr);
2119 
2120    nir_ssa_def *is_mode =
2121       build_runtime_addr_mode_check(b, addr, addr_format,
2122                                     nir_intrinsic_memory_modes(intrin));
2123 
2124    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, is_mode);
2125 }
2126 
2127 static bool
nir_lower_explicit_io_impl(nir_function_impl * impl,nir_variable_mode modes,nir_address_format addr_format)2128 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
2129                            nir_address_format addr_format)
2130 {
2131    bool progress = false;
2132 
2133    nir_builder b;
2134    nir_builder_init(&b, impl);
2135 
2136    /* Walk in reverse order so that we can see the full deref chain when we
2137     * lower the access operations.  We lower them assuming that the derefs
2138     * will be turned into address calculations later.
2139     */
2140    nir_foreach_block_reverse(block, impl) {
2141       nir_foreach_instr_reverse_safe(instr, block) {
2142          switch (instr->type) {
2143          case nir_instr_type_deref: {
2144             nir_deref_instr *deref = nir_instr_as_deref(instr);
2145             if (nir_deref_mode_is_in_set(deref, modes)) {
2146                lower_explicit_io_deref(&b, deref, addr_format);
2147                progress = true;
2148             }
2149             break;
2150          }
2151 
2152          case nir_instr_type_intrinsic: {
2153             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2154             switch (intrin->intrinsic) {
2155             case nir_intrinsic_load_deref:
2156             case nir_intrinsic_store_deref:
2157             case nir_intrinsic_load_deref_block_intel:
2158             case nir_intrinsic_store_deref_block_intel:
2159             case nir_intrinsic_deref_atomic_add:
2160             case nir_intrinsic_deref_atomic_imin:
2161             case nir_intrinsic_deref_atomic_umin:
2162             case nir_intrinsic_deref_atomic_imax:
2163             case nir_intrinsic_deref_atomic_umax:
2164             case nir_intrinsic_deref_atomic_and:
2165             case nir_intrinsic_deref_atomic_or:
2166             case nir_intrinsic_deref_atomic_xor:
2167             case nir_intrinsic_deref_atomic_exchange:
2168             case nir_intrinsic_deref_atomic_comp_swap:
2169             case nir_intrinsic_deref_atomic_fadd:
2170             case nir_intrinsic_deref_atomic_fmin:
2171             case nir_intrinsic_deref_atomic_fmax:
2172             case nir_intrinsic_deref_atomic_fcomp_swap: {
2173                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2174                if (nir_deref_mode_is_in_set(deref, modes)) {
2175                   lower_explicit_io_access(&b, intrin, addr_format);
2176                   progress = true;
2177                }
2178                break;
2179             }
2180 
2181             case nir_intrinsic_deref_buffer_array_length: {
2182                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2183                if (nir_deref_mode_is_in_set(deref, modes)) {
2184                   lower_explicit_io_array_length(&b, intrin, addr_format);
2185                   progress = true;
2186                }
2187                break;
2188             }
2189 
2190             case nir_intrinsic_deref_mode_is: {
2191                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2192                if (nir_deref_mode_is_in_set(deref, modes)) {
2193                   lower_explicit_io_mode_check(&b, intrin, addr_format);
2194                   progress = true;
2195                }
2196                break;
2197             }
2198 
2199             default:
2200                break;
2201             }
2202             break;
2203          }
2204 
2205          default:
2206             /* Nothing to do */
2207             break;
2208          }
2209       }
2210    }
2211 
2212    if (progress) {
2213       nir_metadata_preserve(impl, nir_metadata_block_index |
2214                                   nir_metadata_dominance);
2215    } else {
2216       nir_metadata_preserve(impl, nir_metadata_all);
2217    }
2218 
2219    return progress;
2220 }
2221 
2222 /** Lower explicitly laid out I/O access to byte offset/address intrinsics
2223  *
2224  * This pass is intended to be used for any I/O which touches memory external
2225  * to the shader or which is directly visible to the client.  It requires that
2226  * all data types in the given modes have a explicit stride/offset decorations
2227  * to tell it exactly how to calculate the offset/address for the given load,
2228  * store, or atomic operation.  If the offset/stride information does not come
2229  * from the client explicitly (as with shared variables in GL or Vulkan),
2230  * nir_lower_vars_to_explicit_types() can be used to add them.
2231  *
2232  * Unlike nir_lower_io, this pass is fully capable of handling incomplete
2233  * pointer chains which may contain cast derefs.  It does so by walking the
2234  * deref chain backwards and simply replacing each deref, one at a time, with
2235  * the appropriate address calculation.  The pass takes a nir_address_format
2236  * parameter which describes how the offset or address is to be represented
2237  * during calculations.  By ensuring that the address is always in a
2238  * consistent format, pointers can safely be conjured from thin air by the
2239  * driver, stored to variables, passed through phis, etc.
2240  *
2241  * The one exception to the simple algorithm described above is for handling
2242  * row-major matrices in which case we may look down one additional level of
2243  * the deref chain.
2244  *
2245  * This pass is also capable of handling OpenCL generic pointers.  If the
2246  * address mode is global, it will lower any ambiguous (more than one mode)
2247  * access to global and pass through the deref_mode_is run-time checks as
2248  * addr_mode_is.  This assumes the driver has somehow mapped shared and
2249  * scratch memory to the global address space.  For other modes such as
2250  * 62bit_generic, there is an enum embedded in the address and we lower
2251  * ambiguous access to an if-ladder and deref_mode_is to a check against the
2252  * embedded enum.  If nir_lower_explicit_io is called on any shader that
2253  * contains generic pointers, it must either be used on all of the generic
2254  * modes or none.
2255  */
2256 bool
nir_lower_explicit_io(nir_shader * shader,nir_variable_mode modes,nir_address_format addr_format)2257 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
2258                       nir_address_format addr_format)
2259 {
2260    bool progress = false;
2261 
2262    nir_foreach_function(function, shader) {
2263       if (function->impl &&
2264           nir_lower_explicit_io_impl(function->impl, modes, addr_format))
2265          progress = true;
2266    }
2267 
2268    return progress;
2269 }
2270 
2271 static bool
nir_lower_vars_to_explicit_types_impl(nir_function_impl * impl,nir_variable_mode modes,glsl_type_size_align_func type_info)2272 nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
2273                                       nir_variable_mode modes,
2274                                       glsl_type_size_align_func type_info)
2275 {
2276    bool progress = false;
2277 
2278    nir_foreach_block(block, impl) {
2279       nir_foreach_instr(instr, block) {
2280          if (instr->type != nir_instr_type_deref)
2281             continue;
2282 
2283          nir_deref_instr *deref = nir_instr_as_deref(instr);
2284          if (!nir_deref_mode_is_in_set(deref, modes))
2285             continue;
2286 
2287          unsigned size, alignment;
2288          const struct glsl_type *new_type =
2289             glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
2290          if (new_type != deref->type) {
2291             progress = true;
2292             deref->type = new_type;
2293          }
2294          if (deref->deref_type == nir_deref_type_cast) {
2295             /* See also glsl_type::get_explicit_type_for_size_align() */
2296             unsigned new_stride = align(size, alignment);
2297             if (new_stride != deref->cast.ptr_stride) {
2298                deref->cast.ptr_stride = new_stride;
2299                progress = true;
2300             }
2301          }
2302       }
2303    }
2304 
2305    if (progress) {
2306       nir_metadata_preserve(impl, nir_metadata_block_index |
2307                                   nir_metadata_dominance |
2308                                   nir_metadata_live_ssa_defs |
2309                                   nir_metadata_loop_analysis);
2310    } else {
2311       nir_metadata_preserve(impl, nir_metadata_all);
2312    }
2313 
2314    return progress;
2315 }
2316 
2317 static bool
lower_vars_to_explicit(nir_shader * shader,struct exec_list * vars,nir_variable_mode mode,glsl_type_size_align_func type_info)2318 lower_vars_to_explicit(nir_shader *shader,
2319                        struct exec_list *vars, nir_variable_mode mode,
2320                        glsl_type_size_align_func type_info)
2321 {
2322    bool progress = false;
2323    unsigned offset;
2324    switch (mode) {
2325    case nir_var_uniform:
2326       assert(shader->info.stage == MESA_SHADER_KERNEL);
2327       offset = 0;
2328       break;
2329    case nir_var_function_temp:
2330    case nir_var_shader_temp:
2331       offset = shader->scratch_size;
2332       break;
2333    case nir_var_mem_shared:
2334       offset = shader->info.shared_size;
2335       break;
2336    case nir_var_mem_task_payload:
2337       offset = shader->info.task_payload_size;
2338       break;
2339    case nir_var_mem_global:
2340       offset = shader->global_mem_size;
2341       break;
2342    case nir_var_mem_constant:
2343       offset = shader->constant_data_size;
2344       break;
2345    case nir_var_shader_call_data:
2346    case nir_var_ray_hit_attrib:
2347       offset = 0;
2348       break;
2349    default:
2350       unreachable("Unsupported mode");
2351    }
2352    nir_foreach_variable_in_list(var, vars) {
2353       if (var->data.mode != mode)
2354          continue;
2355 
2356       unsigned size, align;
2357       const struct glsl_type *explicit_type =
2358          glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align);
2359 
2360       if (explicit_type != var->type)
2361          var->type = explicit_type;
2362 
2363       UNUSED bool is_empty_struct =
2364          glsl_type_is_struct_or_ifc(explicit_type) &&
2365          glsl_get_length(explicit_type) == 0;
2366 
2367       assert(util_is_power_of_two_nonzero(align) || is_empty_struct);
2368       var->data.driver_location = ALIGN_POT(offset, align);
2369       offset = var->data.driver_location + size;
2370       progress = true;
2371    }
2372 
2373    switch (mode) {
2374    case nir_var_uniform:
2375       assert(shader->info.stage == MESA_SHADER_KERNEL);
2376       shader->num_uniforms = offset;
2377       break;
2378    case nir_var_shader_temp:
2379    case nir_var_function_temp:
2380       shader->scratch_size = offset;
2381       break;
2382    case nir_var_mem_shared:
2383       shader->info.shared_size = offset;
2384       break;
2385    case nir_var_mem_task_payload:
2386       shader->info.task_payload_size = offset;
2387       break;
2388    case nir_var_mem_global:
2389       shader->global_mem_size = offset;
2390       break;
2391    case nir_var_mem_constant:
2392       shader->constant_data_size = offset;
2393       break;
2394    case nir_var_shader_call_data:
2395    case nir_var_ray_hit_attrib:
2396       break;
2397    default:
2398       unreachable("Unsupported mode");
2399    }
2400 
2401    return progress;
2402 }
2403 
2404 /* If nir_lower_vars_to_explicit_types is called on any shader that contains
2405  * generic pointers, it must either be used on all of the generic modes or
2406  * none.
2407  */
2408 bool
nir_lower_vars_to_explicit_types(nir_shader * shader,nir_variable_mode modes,glsl_type_size_align_func type_info)2409 nir_lower_vars_to_explicit_types(nir_shader *shader,
2410                                  nir_variable_mode modes,
2411                                  glsl_type_size_align_func type_info)
2412 {
2413    /* TODO: Situations which need to be handled to support more modes:
2414     * - row-major matrices
2415     * - compact shader inputs/outputs
2416     * - interface types
2417     */
2418    ASSERTED nir_variable_mode supported =
2419       nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant |
2420       nir_var_shader_temp | nir_var_function_temp | nir_var_uniform |
2421       nir_var_shader_call_data | nir_var_ray_hit_attrib |
2422       nir_var_mem_task_payload;
2423    assert(!(modes & ~supported) && "unsupported");
2424 
2425    bool progress = false;
2426 
2427    if (modes & nir_var_uniform)
2428       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info);
2429    if (modes & nir_var_mem_global)
2430       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_global, type_info);
2431 
2432    if (modes & nir_var_mem_shared) {
2433       assert(!shader->info.shared_memory_explicit_layout);
2434       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info);
2435    }
2436 
2437    if (modes & nir_var_shader_temp)
2438       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info);
2439    if (modes & nir_var_mem_constant)
2440       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info);
2441    if (modes & nir_var_shader_call_data)
2442       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info);
2443    if (modes & nir_var_ray_hit_attrib)
2444       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info);
2445    if (modes & nir_var_mem_task_payload)
2446       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_task_payload, type_info);
2447 
2448    nir_foreach_function(function, shader) {
2449       if (function->impl) {
2450          if (modes & nir_var_function_temp)
2451             progress |= lower_vars_to_explicit(shader, &function->impl->locals, nir_var_function_temp, type_info);
2452 
2453          progress |= nir_lower_vars_to_explicit_types_impl(function->impl, modes, type_info);
2454       }
2455    }
2456 
2457    return progress;
2458 }
2459 
2460 static void
write_constant(void * dst,size_t dst_size,const nir_constant * c,const struct glsl_type * type)2461 write_constant(void *dst, size_t dst_size,
2462                const nir_constant *c, const struct glsl_type *type)
2463 {
2464    if (glsl_type_is_vector_or_scalar(type)) {
2465       const unsigned num_components = glsl_get_vector_elements(type);
2466       const unsigned bit_size = glsl_get_bit_size(type);
2467       if (bit_size == 1) {
2468          /* Booleans are special-cased to be 32-bit
2469           *
2470           * TODO: Make the native bool bit_size an option.
2471           */
2472          assert(num_components * 4 <= dst_size);
2473          for (unsigned i = 0; i < num_components; i++) {
2474             int32_t b32 = -(int)c->values[i].b;
2475             memcpy((char *)dst + i * 4, &b32, 4);
2476          }
2477       } else {
2478          assert(bit_size >= 8 && bit_size % 8 == 0);
2479          const unsigned byte_size = bit_size / 8;
2480          assert(num_components * byte_size <= dst_size);
2481          for (unsigned i = 0; i < num_components; i++) {
2482             /* Annoyingly, thanks to packed structs, we can't make any
2483              * assumptions about the alignment of dst.  To avoid any strange
2484              * issues with unaligned writes, we always use memcpy.
2485              */
2486             memcpy((char *)dst + i * byte_size, &c->values[i], byte_size);
2487          }
2488       }
2489    } else if (glsl_type_is_array_or_matrix(type)) {
2490       const unsigned array_len = glsl_get_length(type);
2491       const unsigned stride = glsl_get_explicit_stride(type);
2492       assert(stride > 0);
2493       const struct glsl_type *elem_type = glsl_get_array_element(type);
2494       for (unsigned i = 0; i < array_len; i++) {
2495          unsigned elem_offset = i * stride;
2496          assert(elem_offset < dst_size);
2497          write_constant((char *)dst + elem_offset, dst_size - elem_offset,
2498                         c->elements[i], elem_type);
2499       }
2500    } else {
2501       assert(glsl_type_is_struct_or_ifc(type));
2502       const unsigned num_fields = glsl_get_length(type);
2503       for (unsigned i = 0; i < num_fields; i++) {
2504          const int field_offset = glsl_get_struct_field_offset(type, i);
2505          assert(field_offset >= 0 && field_offset < dst_size);
2506          const struct glsl_type *field_type = glsl_get_struct_field(type, i);
2507          write_constant((char *)dst + field_offset, dst_size - field_offset,
2508                         c->elements[i], field_type);
2509       }
2510    }
2511 }
2512 
2513 void
nir_gather_explicit_io_initializers(nir_shader * shader,void * dst,size_t dst_size,nir_variable_mode mode)2514 nir_gather_explicit_io_initializers(nir_shader *shader,
2515                                     void *dst, size_t dst_size,
2516                                     nir_variable_mode mode)
2517 {
2518    /* It doesn't really make sense to gather initializers for more than one
2519     * mode at a time.  If this ever becomes well-defined, we can drop the
2520     * assert then.
2521     */
2522    assert(util_bitcount(mode) == 1);
2523 
2524    nir_foreach_variable_with_modes(var, shader, mode) {
2525       assert(var->data.driver_location < dst_size);
2526       write_constant((char *)dst + var->data.driver_location,
2527                      dst_size - var->data.driver_location,
2528                      var->constant_initializer, var->type);
2529    }
2530 }
2531 
2532 /**
2533  * Return the offset source for a load/store intrinsic.
2534  */
2535 nir_src *
nir_get_io_offset_src(nir_intrinsic_instr * instr)2536 nir_get_io_offset_src(nir_intrinsic_instr *instr)
2537 {
2538    switch (instr->intrinsic) {
2539    case nir_intrinsic_load_input:
2540    case nir_intrinsic_load_output:
2541    case nir_intrinsic_load_shared:
2542    case nir_intrinsic_load_task_payload:
2543    case nir_intrinsic_load_uniform:
2544    case nir_intrinsic_load_kernel_input:
2545    case nir_intrinsic_load_global:
2546    case nir_intrinsic_load_global_constant:
2547    case nir_intrinsic_load_scratch:
2548    case nir_intrinsic_load_fs_input_interp_deltas:
2549    case nir_intrinsic_shared_atomic_add:
2550    case nir_intrinsic_shared_atomic_and:
2551    case nir_intrinsic_shared_atomic_comp_swap:
2552    case nir_intrinsic_shared_atomic_exchange:
2553    case nir_intrinsic_shared_atomic_fadd:
2554    case nir_intrinsic_shared_atomic_fcomp_swap:
2555    case nir_intrinsic_shared_atomic_fmax:
2556    case nir_intrinsic_shared_atomic_fmin:
2557    case nir_intrinsic_shared_atomic_imax:
2558    case nir_intrinsic_shared_atomic_imin:
2559    case nir_intrinsic_shared_atomic_or:
2560    case nir_intrinsic_shared_atomic_umax:
2561    case nir_intrinsic_shared_atomic_umin:
2562    case nir_intrinsic_shared_atomic_xor:
2563    case nir_intrinsic_global_atomic_add:
2564    case nir_intrinsic_global_atomic_and:
2565    case nir_intrinsic_global_atomic_comp_swap:
2566    case nir_intrinsic_global_atomic_exchange:
2567    case nir_intrinsic_global_atomic_fadd:
2568    case nir_intrinsic_global_atomic_fcomp_swap:
2569    case nir_intrinsic_global_atomic_fmax:
2570    case nir_intrinsic_global_atomic_fmin:
2571    case nir_intrinsic_global_atomic_imax:
2572    case nir_intrinsic_global_atomic_imin:
2573    case nir_intrinsic_global_atomic_or:
2574    case nir_intrinsic_global_atomic_umax:
2575    case nir_intrinsic_global_atomic_umin:
2576    case nir_intrinsic_global_atomic_xor:
2577       return &instr->src[0];
2578    case nir_intrinsic_load_ubo:
2579    case nir_intrinsic_load_ssbo:
2580    case nir_intrinsic_load_input_vertex:
2581    case nir_intrinsic_load_per_vertex_input:
2582    case nir_intrinsic_load_per_vertex_output:
2583    case nir_intrinsic_load_per_primitive_output:
2584    case nir_intrinsic_load_interpolated_input:
2585    case nir_intrinsic_store_output:
2586    case nir_intrinsic_store_shared:
2587    case nir_intrinsic_store_task_payload:
2588    case nir_intrinsic_store_global:
2589    case nir_intrinsic_store_scratch:
2590    case nir_intrinsic_ssbo_atomic_add:
2591    case nir_intrinsic_ssbo_atomic_imin:
2592    case nir_intrinsic_ssbo_atomic_umin:
2593    case nir_intrinsic_ssbo_atomic_imax:
2594    case nir_intrinsic_ssbo_atomic_umax:
2595    case nir_intrinsic_ssbo_atomic_and:
2596    case nir_intrinsic_ssbo_atomic_or:
2597    case nir_intrinsic_ssbo_atomic_xor:
2598    case nir_intrinsic_ssbo_atomic_exchange:
2599    case nir_intrinsic_ssbo_atomic_comp_swap:
2600    case nir_intrinsic_ssbo_atomic_fadd:
2601    case nir_intrinsic_ssbo_atomic_fmin:
2602    case nir_intrinsic_ssbo_atomic_fmax:
2603    case nir_intrinsic_ssbo_atomic_fcomp_swap:
2604       return &instr->src[1];
2605    case nir_intrinsic_store_ssbo:
2606    case nir_intrinsic_store_per_vertex_output:
2607    case nir_intrinsic_store_per_primitive_output:
2608       return &instr->src[2];
2609    default:
2610       return NULL;
2611    }
2612 }
2613 
2614 /**
2615  * Return the vertex index source for a load/store per_vertex intrinsic.
2616  */
2617 nir_src *
nir_get_io_arrayed_index_src(nir_intrinsic_instr * instr)2618 nir_get_io_arrayed_index_src(nir_intrinsic_instr *instr)
2619 {
2620    switch (instr->intrinsic) {
2621    case nir_intrinsic_load_per_vertex_input:
2622    case nir_intrinsic_load_per_vertex_output:
2623    case nir_intrinsic_load_per_primitive_output:
2624       return &instr->src[0];
2625    case nir_intrinsic_store_per_vertex_output:
2626    case nir_intrinsic_store_per_primitive_output:
2627       return &instr->src[1];
2628    default:
2629       return NULL;
2630    }
2631 }
2632 
2633 /**
2634  * Return the numeric constant that identify a NULL pointer for each address
2635  * format.
2636  */
2637 const nir_const_value *
nir_address_format_null_value(nir_address_format addr_format)2638 nir_address_format_null_value(nir_address_format addr_format)
2639 {
2640    const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
2641       [nir_address_format_32bit_global] = {{0}},
2642       [nir_address_format_64bit_global] = {{0}},
2643       [nir_address_format_64bit_global_32bit_offset] = {{0}},
2644       [nir_address_format_64bit_bounded_global] = {{0}},
2645       [nir_address_format_32bit_index_offset] = {{.u32 = ~0}, {.u32 = ~0}},
2646       [nir_address_format_32bit_index_offset_pack64] = {{.u64 = ~0ull}},
2647       [nir_address_format_vec2_index_32bit_offset] = {{.u32 = ~0}, {.u32 = ~0}, {.u32 = ~0}},
2648       [nir_address_format_32bit_offset] = {{.u32 = ~0}},
2649       [nir_address_format_32bit_offset_as_64bit] = {{.u64 = ~0ull}},
2650       [nir_address_format_62bit_generic] = {{.u64 = 0}},
2651       [nir_address_format_logical] = {{.u32 = ~0}},
2652    };
2653 
2654    assert(addr_format < ARRAY_SIZE(null_values));
2655    return null_values[addr_format];
2656 }
2657 
2658 nir_ssa_def *
nir_build_addr_ieq(nir_builder * b,nir_ssa_def * addr0,nir_ssa_def * addr1,nir_address_format addr_format)2659 nir_build_addr_ieq(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
2660                    nir_address_format addr_format)
2661 {
2662    switch (addr_format) {
2663    case nir_address_format_32bit_global:
2664    case nir_address_format_64bit_global:
2665    case nir_address_format_64bit_bounded_global:
2666    case nir_address_format_32bit_index_offset:
2667    case nir_address_format_vec2_index_32bit_offset:
2668    case nir_address_format_32bit_offset:
2669    case nir_address_format_62bit_generic:
2670       return nir_ball_iequal(b, addr0, addr1);
2671 
2672    case nir_address_format_64bit_global_32bit_offset:
2673       return nir_ball_iequal(b, nir_channels(b, addr0, 0xb),
2674                                 nir_channels(b, addr1, 0xb));
2675 
2676    case nir_address_format_32bit_offset_as_64bit:
2677       assert(addr0->num_components == 1 && addr1->num_components == 1);
2678       return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1));
2679 
2680    case nir_address_format_32bit_index_offset_pack64:
2681       assert(addr0->num_components == 1 && addr1->num_components == 1);
2682       return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1));
2683 
2684    case nir_address_format_logical:
2685       unreachable("Unsupported address format");
2686    }
2687 
2688    unreachable("Invalid address format");
2689 }
2690 
2691 nir_ssa_def *
nir_build_addr_isub(nir_builder * b,nir_ssa_def * addr0,nir_ssa_def * addr1,nir_address_format addr_format)2692 nir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
2693                     nir_address_format addr_format)
2694 {
2695    switch (addr_format) {
2696    case nir_address_format_32bit_global:
2697    case nir_address_format_64bit_global:
2698    case nir_address_format_32bit_offset:
2699    case nir_address_format_32bit_index_offset_pack64:
2700    case nir_address_format_62bit_generic:
2701       assert(addr0->num_components == 1);
2702       assert(addr1->num_components == 1);
2703       return nir_isub(b, addr0, addr1);
2704 
2705    case nir_address_format_32bit_offset_as_64bit:
2706       assert(addr0->num_components == 1);
2707       assert(addr1->num_components == 1);
2708       return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)));
2709 
2710    case nir_address_format_64bit_global_32bit_offset:
2711    case nir_address_format_64bit_bounded_global:
2712       return nir_isub(b, addr_to_global(b, addr0, addr_format),
2713                          addr_to_global(b, addr1, addr_format));
2714 
2715    case nir_address_format_32bit_index_offset:
2716       assert(addr0->num_components == 2);
2717       assert(addr1->num_components == 2);
2718       /* Assume the same buffer index. */
2719       return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
2720 
2721    case nir_address_format_vec2_index_32bit_offset:
2722       assert(addr0->num_components == 3);
2723       assert(addr1->num_components == 3);
2724       /* Assume the same buffer index. */
2725       return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2));
2726 
2727    case nir_address_format_logical:
2728       unreachable("Unsupported address format");
2729    }
2730 
2731    unreachable("Invalid address format");
2732 }
2733 
2734 static bool
is_input(nir_intrinsic_instr * intrin)2735 is_input(nir_intrinsic_instr *intrin)
2736 {
2737    return intrin->intrinsic == nir_intrinsic_load_input ||
2738           intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
2739           intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
2740           intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
2741 }
2742 
2743 static bool
is_output(nir_intrinsic_instr * intrin)2744 is_output(nir_intrinsic_instr *intrin)
2745 {
2746    return intrin->intrinsic == nir_intrinsic_load_output ||
2747           intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
2748           intrin->intrinsic == nir_intrinsic_load_per_primitive_output ||
2749           intrin->intrinsic == nir_intrinsic_store_output ||
2750           intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2751           intrin->intrinsic == nir_intrinsic_store_per_primitive_output;
2752 }
2753 
is_dual_slot(nir_intrinsic_instr * intrin)2754 static bool is_dual_slot(nir_intrinsic_instr *intrin)
2755 {
2756    if (intrin->intrinsic == nir_intrinsic_store_output ||
2757        intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2758        intrin->intrinsic == nir_intrinsic_store_per_primitive_output) {
2759       return nir_src_bit_size(intrin->src[0]) == 64 &&
2760              nir_src_num_components(intrin->src[0]) >= 3;
2761    }
2762 
2763    return nir_dest_bit_size(intrin->dest) == 64 &&
2764           nir_dest_num_components(intrin->dest) >= 3;
2765 }
2766 
2767 /**
2768  * This pass adds constant offsets to instr->const_index[0] for input/output
2769  * intrinsics, and resets the offset source to 0.  Non-constant offsets remain
2770  * unchanged - since we don't know what part of a compound variable is
2771  * accessed, we allocate storage for the entire thing. For drivers that use
2772  * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
2773  * the offset source will be 0, so that they don't have to add it in manually.
2774  */
2775 
2776 static bool
add_const_offset_to_base_block(nir_block * block,nir_builder * b,nir_variable_mode modes)2777 add_const_offset_to_base_block(nir_block *block, nir_builder *b,
2778                                nir_variable_mode modes)
2779 {
2780    bool progress = false;
2781    nir_foreach_instr_safe(instr, block) {
2782       if (instr->type != nir_instr_type_intrinsic)
2783          continue;
2784 
2785       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2786 
2787       if (((modes & nir_var_shader_in) && is_input(intrin)) ||
2788           ((modes & nir_var_shader_out) && is_output(intrin))) {
2789          nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
2790 
2791          /* NV_mesh_shader: ignore MS primitive indices. */
2792          if (b->shader->info.stage == MESA_SHADER_MESH &&
2793              sem.location == VARYING_SLOT_PRIMITIVE_INDICES &&
2794              !(b->shader->info.per_primitive_outputs &
2795                BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_INDICES)))
2796             continue;
2797 
2798          nir_src *offset = nir_get_io_offset_src(intrin);
2799 
2800          /* TODO: Better handling of per-view variables here */
2801          if (nir_src_is_const(*offset) &&
2802              !nir_intrinsic_io_semantics(intrin).per_view) {
2803             unsigned off = nir_src_as_uint(*offset);
2804 
2805             nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off);
2806 
2807             sem.location += off;
2808             /* non-indirect indexing should reduce num_slots */
2809             sem.num_slots = is_dual_slot(intrin) ? 2 : 1;
2810             nir_intrinsic_set_io_semantics(intrin, sem);
2811 
2812             b->cursor = nir_before_instr(&intrin->instr);
2813             nir_instr_rewrite_src(&intrin->instr, offset,
2814                                   nir_src_for_ssa(nir_imm_int(b, 0)));
2815             progress = true;
2816          }
2817       }
2818    }
2819 
2820    return progress;
2821 }
2822 
2823 bool
nir_io_add_const_offset_to_base(nir_shader * nir,nir_variable_mode modes)2824 nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes)
2825 {
2826    bool progress = false;
2827 
2828    nir_foreach_function(f, nir) {
2829       if (f->impl) {
2830          nir_builder b;
2831          nir_builder_init(&b, f->impl);
2832          nir_foreach_block(block, f->impl) {
2833             progress |= add_const_offset_to_base_block(block, &b, modes);
2834          }
2835       }
2836    }
2837 
2838    return progress;
2839 }
2840 
2841 static bool
nir_lower_color_inputs(nir_shader * nir)2842 nir_lower_color_inputs(nir_shader *nir)
2843 {
2844    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2845    bool progress = false;
2846 
2847    nir_builder b;
2848    nir_builder_init(&b, impl);
2849 
2850    nir_foreach_block (block, impl) {
2851       nir_foreach_instr_safe (instr, block) {
2852          if (instr->type != nir_instr_type_intrinsic)
2853             continue;
2854 
2855          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2856 
2857          if (intrin->intrinsic != nir_intrinsic_load_deref)
2858             continue;
2859 
2860          nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2861          if (!nir_deref_mode_is(deref, nir_var_shader_in))
2862             continue;
2863 
2864          b.cursor = nir_before_instr(instr);
2865          nir_variable *var = nir_deref_instr_get_variable(deref);
2866          nir_ssa_def *def;
2867 
2868          if (var->data.location == VARYING_SLOT_COL0) {
2869             def = nir_load_color0(&b);
2870             nir->info.fs.color0_interp = var->data.interpolation;
2871             nir->info.fs.color0_sample = var->data.sample;
2872             nir->info.fs.color0_centroid = var->data.centroid;
2873          } else if (var->data.location == VARYING_SLOT_COL1) {
2874             def = nir_load_color1(&b);
2875             nir->info.fs.color1_interp = var->data.interpolation;
2876             nir->info.fs.color1_sample = var->data.sample;
2877             nir->info.fs.color1_centroid = var->data.centroid;
2878          } else {
2879             continue;
2880          }
2881 
2882          nir_ssa_def_rewrite_uses(&intrin->dest.ssa, def);
2883          nir_instr_remove(instr);
2884          progress = true;
2885       }
2886    }
2887 
2888    if (progress) {
2889       nir_metadata_preserve(impl, nir_metadata_dominance |
2890                                   nir_metadata_block_index);
2891    } else {
2892       nir_metadata_preserve(impl, nir_metadata_all);
2893    }
2894    return progress;
2895 }
2896 
2897 static bool
nir_add_xfb_info(nir_shader * nir,nir_xfb_info * info)2898 nir_add_xfb_info(nir_shader *nir, nir_xfb_info *info)
2899 {
2900    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
2901    bool progress = false;
2902 
2903    for (unsigned i = 0; i < NIR_MAX_XFB_BUFFERS; i++)
2904       nir->info.xfb_stride[i] = info->buffers[i].stride;
2905 
2906    nir_foreach_block (block, impl) {
2907       nir_foreach_instr_safe (instr, block) {
2908          if (instr->type != nir_instr_type_intrinsic)
2909             continue;
2910 
2911          nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
2912 
2913          if (!nir_intrinsic_has_io_xfb(intr))
2914             continue;
2915 
2916          /* No indirect indexing allowed. The index is implied to be 0. */
2917          ASSERTED nir_src offset = *nir_get_io_offset_src(intr);
2918          assert(nir_src_is_const(offset) && nir_src_as_uint(offset) == 0);
2919 
2920          /* Calling this pass for the second time shouldn't do anything. */
2921          if (nir_intrinsic_io_xfb(intr).out[0].num_components ||
2922              nir_intrinsic_io_xfb(intr).out[1].num_components ||
2923              nir_intrinsic_io_xfb2(intr).out[0].num_components ||
2924              nir_intrinsic_io_xfb2(intr).out[1].num_components)
2925             continue;
2926 
2927          nir_io_semantics sem = nir_intrinsic_io_semantics(intr);
2928          unsigned writemask = nir_intrinsic_write_mask(intr) <<
2929                             nir_intrinsic_component(intr);
2930 
2931          nir_io_xfb xfb[2];
2932          memset(xfb, 0, sizeof(xfb));
2933 
2934          for (unsigned i = 0; i < info->output_count; i++) {
2935             if (info->outputs[i].location == sem.location) {
2936                nir_xfb_output_info *out = &info->outputs[i];
2937                unsigned xfb_mask = writemask & out->component_mask;
2938 
2939                /*fprintf(stdout, "output%u: buffer=%u, offset=%u, location=%u, "
2940                            "component_offset=%u, component_mask=0x%x, xfb_mask=0x%x, slots=%u\n",
2941                        i, out->buffer,
2942                        out->offset,
2943                        out->location,
2944                        out->component_offset,
2945                        out->component_mask,
2946                        xfb_mask, sem.num_slots);*/
2947 
2948                while (xfb_mask) {
2949                   int start, count;
2950                   u_bit_scan_consecutive_range(&xfb_mask, &start, &count);
2951 
2952                   xfb[start / 2].out[start % 2].num_components = count;
2953                   xfb[start / 2].out[start % 2].buffer = out->buffer;
2954                   /* out->offset is relative to the first stored xfb component */
2955                   /* start is relative to component 0 */
2956                   xfb[start / 2].out[start % 2].offset =
2957                      out->offset / 4 - out->component_offset + start;
2958 
2959                   progress = true;
2960                }
2961             }
2962          }
2963 
2964          nir_intrinsic_set_io_xfb(intr, xfb[0]);
2965          nir_intrinsic_set_io_xfb2(intr, xfb[1]);
2966       }
2967    }
2968 
2969    nir_metadata_preserve(impl, nir_metadata_all);
2970    return progress;
2971 }
2972 
2973 static int
type_size_vec4(const struct glsl_type * type,bool bindless)2974 type_size_vec4(const struct glsl_type *type, bool bindless)
2975 {
2976    return glsl_count_attribute_slots(type, false);
2977 }
2978 
2979 void
nir_lower_io_passes(nir_shader * nir,nir_xfb_info * xfb)2980 nir_lower_io_passes(nir_shader *nir, nir_xfb_info *xfb)
2981 {
2982    if (!nir->options->lower_io_variables)
2983       return;
2984 
2985    /* Ignore transform feedback for stages that can't have it. */
2986    if (nir->info.stage != MESA_SHADER_VERTEX &&
2987        nir->info.stage != MESA_SHADER_TESS_EVAL &&
2988        nir->info.stage != MESA_SHADER_GEOMETRY)
2989       xfb = NULL;
2990 
2991    bool has_indirect_inputs =
2992       (nir->options->support_indirect_inputs >> nir->info.stage) & 0x1;
2993 
2994    /* Transform feedback requires that indirect outputs are lowered. */
2995    bool has_indirect_outputs =
2996       (nir->options->support_indirect_outputs >> nir->info.stage) & 0x1 && !xfb;
2997 
2998    if (!has_indirect_inputs || !has_indirect_outputs) {
2999       NIR_PASS_V(nir, nir_lower_io_to_temporaries,
3000                  nir_shader_get_entrypoint(nir), !has_indirect_outputs,
3001                  !has_indirect_inputs);
3002 
3003       /* We need to lower all the copy_deref's introduced by lower_io_to-
3004        * _temporaries before calling nir_lower_io.
3005        */
3006       NIR_PASS_V(nir, nir_split_var_copies);
3007       NIR_PASS_V(nir, nir_lower_var_copies);
3008       NIR_PASS_V(nir, nir_lower_global_vars_to_local);
3009    }
3010 
3011    if (nir->info.stage == MESA_SHADER_FRAGMENT &&
3012        nir->options->lower_fs_color_inputs)
3013       NIR_PASS_V(nir, nir_lower_color_inputs);
3014 
3015    NIR_PASS_V(nir, nir_lower_io, nir_var_shader_out | nir_var_shader_in,
3016               type_size_vec4, nir_lower_io_lower_64bit_to_32);
3017 
3018    /* nir_io_add_const_offset_to_base needs actual constants. */
3019    NIR_PASS_V(nir, nir_opt_constant_folding);
3020    NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
3021                                                     nir_var_shader_out);
3022 
3023    /* Lower and remove dead derefs and variables to clean up the IR. */
3024    NIR_PASS_V(nir, nir_lower_vars_to_ssa);
3025    NIR_PASS_V(nir, nir_opt_dce);
3026    NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp |
3027               nir_var_shader_in | nir_var_shader_out, NULL);
3028 
3029    if (xfb)
3030       NIR_PASS_V(nir, nir_add_xfb_info, xfb);
3031 
3032    nir->info.io_lowered = true;
3033 }
3034