1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Connor Abbott (cwabbott0@gmail.com)
25  *    Jason Ekstrand (jason@jlekstrand.net)
26  *
27  */
28 
29 /*
30  * This lowering pass converts references to input/output variables with
31  * loads/stores to actual input/output intrinsics.
32  */
33 
34 #include "nir.h"
35 #include "nir_builder.h"
36 #include "nir_deref.h"
37 
38 #include "util/u_math.h"
39 
40 struct lower_io_state {
41    void *dead_ctx;
42    nir_builder builder;
43    int (*type_size)(const struct glsl_type *type, bool);
44    nir_variable_mode modes;
45    nir_lower_io_options options;
46 };
47 
48 static nir_intrinsic_op
ssbo_atomic_for_deref(nir_intrinsic_op deref_op)49 ssbo_atomic_for_deref(nir_intrinsic_op deref_op)
50 {
51    switch (deref_op) {
52 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O;
53    OP(atomic_exchange)
54    OP(atomic_comp_swap)
55    OP(atomic_add)
56    OP(atomic_imin)
57    OP(atomic_umin)
58    OP(atomic_imax)
59    OP(atomic_umax)
60    OP(atomic_and)
61    OP(atomic_or)
62    OP(atomic_xor)
63    OP(atomic_fadd)
64    OP(atomic_fmin)
65    OP(atomic_fmax)
66    OP(atomic_fcomp_swap)
67 #undef OP
68    default:
69       unreachable("Invalid SSBO atomic");
70    }
71 }
72 
73 static nir_intrinsic_op
global_atomic_for_deref(nir_intrinsic_op deref_op)74 global_atomic_for_deref(nir_intrinsic_op deref_op)
75 {
76    switch (deref_op) {
77 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O;
78    OP(atomic_exchange)
79    OP(atomic_comp_swap)
80    OP(atomic_add)
81    OP(atomic_imin)
82    OP(atomic_umin)
83    OP(atomic_imax)
84    OP(atomic_umax)
85    OP(atomic_and)
86    OP(atomic_or)
87    OP(atomic_xor)
88    OP(atomic_fadd)
89    OP(atomic_fmin)
90    OP(atomic_fmax)
91    OP(atomic_fcomp_swap)
92 #undef OP
93    default:
94       unreachable("Invalid SSBO atomic");
95    }
96 }
97 
98 static nir_intrinsic_op
shared_atomic_for_deref(nir_intrinsic_op deref_op)99 shared_atomic_for_deref(nir_intrinsic_op deref_op)
100 {
101    switch (deref_op) {
102 #define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O;
103    OP(atomic_exchange)
104    OP(atomic_comp_swap)
105    OP(atomic_add)
106    OP(atomic_imin)
107    OP(atomic_umin)
108    OP(atomic_imax)
109    OP(atomic_umax)
110    OP(atomic_and)
111    OP(atomic_or)
112    OP(atomic_xor)
113    OP(atomic_fadd)
114    OP(atomic_fmin)
115    OP(atomic_fmax)
116    OP(atomic_fcomp_swap)
117 #undef OP
118    default:
119       unreachable("Invalid shared atomic");
120    }
121 }
122 
123 void
nir_assign_var_locations(nir_shader * shader,nir_variable_mode mode,unsigned * size,int (* type_size)(const struct glsl_type *,bool))124 nir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
125                          unsigned *size,
126                          int (*type_size)(const struct glsl_type *, bool))
127 {
128    unsigned location = 0;
129 
130    nir_foreach_variable_with_modes(var, shader, mode) {
131       var->data.driver_location = location;
132       bool bindless_type_size = var->data.mode == nir_var_shader_in ||
133                                 var->data.mode == nir_var_shader_out ||
134                                 var->data.bindless;
135       location += type_size(var->type, bindless_type_size);
136    }
137 
138    *size = location;
139 }
140 
141 /**
142  * Some inputs and outputs are arrayed, meaning that there is an extra level
143  * of array indexing to handle mismatches between the shader interface and the
144  * dispatch pattern of the shader.  For instance, geometry shaders are
145  * executed per-primitive while their inputs and outputs are specified
146  * per-vertex so all inputs and outputs have to be additionally indexed with
147  * the vertex index within the primitive.
148  */
149 bool
nir_is_arrayed_io(const nir_variable * var,gl_shader_stage stage)150 nir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage)
151 {
152    if (var->data.patch || !glsl_type_is_array(var->type))
153       return false;
154 
155    if (var->data.mode == nir_var_shader_in)
156       return stage == MESA_SHADER_GEOMETRY ||
157              stage == MESA_SHADER_TESS_CTRL ||
158              stage == MESA_SHADER_TESS_EVAL;
159 
160    if (var->data.mode == nir_var_shader_out)
161       return stage == MESA_SHADER_TESS_CTRL ||
162              stage == MESA_SHADER_MESH;
163 
164    return false;
165 }
166 
get_number_of_slots(struct lower_io_state * state,const nir_variable * var)167 static unsigned get_number_of_slots(struct lower_io_state *state,
168                                     const nir_variable *var)
169 {
170    const struct glsl_type *type = var->type;
171 
172    if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) {
173       assert(glsl_type_is_array(type));
174       type = glsl_get_array_element(type);
175    }
176 
177    return state->type_size(type, var->data.bindless);
178 }
179 
180 static nir_ssa_def *
get_io_offset(nir_builder * b,nir_deref_instr * deref,nir_ssa_def ** array_index,int (* type_size)(const struct glsl_type *,bool),unsigned * component,bool bts)181 get_io_offset(nir_builder *b, nir_deref_instr *deref,
182               nir_ssa_def **array_index,
183               int (*type_size)(const struct glsl_type *, bool),
184               unsigned *component, bool bts)
185 {
186    nir_deref_path path;
187    nir_deref_path_init(&path, deref, NULL);
188 
189    assert(path.path[0]->deref_type == nir_deref_type_var);
190    nir_deref_instr **p = &path.path[1];
191 
192    /* For arrayed I/O (e.g., per-vertex input arrays in geometry shader
193     * inputs), skip the outermost array index.  Process the rest normally.
194     */
195    if (array_index != NULL) {
196       assert((*p)->deref_type == nir_deref_type_array);
197       *array_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
198       p++;
199    }
200 
201    if (path.path[0]->var->data.compact) {
202       assert((*p)->deref_type == nir_deref_type_array);
203       assert(glsl_type_is_scalar((*p)->type));
204 
205       /* We always lower indirect dereferences for "compact" array vars. */
206       const unsigned index = nir_src_as_uint((*p)->arr.index);
207       const unsigned total_offset = *component + index;
208       const unsigned slot_offset = total_offset / 4;
209       *component = total_offset % 4;
210       return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
211    }
212 
213    /* Just emit code and let constant-folding go to town */
214    nir_ssa_def *offset = nir_imm_int(b, 0);
215 
216    for (; *p; p++) {
217       if ((*p)->deref_type == nir_deref_type_array) {
218          unsigned size = type_size((*p)->type, bts);
219 
220          nir_ssa_def *mul =
221             nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
222 
223          offset = nir_iadd(b, offset, mul);
224       } else if ((*p)->deref_type == nir_deref_type_struct) {
225          /* p starts at path[1], so this is safe */
226          nir_deref_instr *parent = *(p - 1);
227 
228          unsigned field_offset = 0;
229          for (unsigned i = 0; i < (*p)->strct.index; i++) {
230             field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
231          }
232          offset = nir_iadd_imm(b, offset, field_offset);
233       } else {
234          unreachable("Unsupported deref type");
235       }
236    }
237 
238    nir_deref_path_finish(&path);
239 
240    return offset;
241 }
242 
243 static nir_ssa_def *
emit_load(struct lower_io_state * state,nir_ssa_def * array_index,nir_variable * var,nir_ssa_def * offset,unsigned component,unsigned num_components,unsigned bit_size,nir_alu_type dest_type)244 emit_load(struct lower_io_state *state,
245           nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
246           unsigned component, unsigned num_components, unsigned bit_size,
247           nir_alu_type dest_type)
248 {
249    nir_builder *b = &state->builder;
250    const nir_shader *nir = b->shader;
251    nir_variable_mode mode = var->data.mode;
252    nir_ssa_def *barycentric = NULL;
253 
254    nir_intrinsic_op op;
255    switch (mode) {
256    case nir_var_shader_in:
257       if (nir->info.stage == MESA_SHADER_FRAGMENT &&
258           nir->options->use_interpolated_input_intrinsics &&
259           var->data.interpolation != INTERP_MODE_FLAT &&
260           !var->data.per_primitive) {
261          if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
262             assert(array_index != NULL);
263             op = nir_intrinsic_load_input_vertex;
264          } else {
265             assert(array_index == NULL);
266 
267             nir_intrinsic_op bary_op;
268             if (var->data.sample ||
269                 (state->options & nir_lower_io_force_sample_interpolation))
270                bary_op = nir_intrinsic_load_barycentric_sample;
271             else if (var->data.centroid)
272                bary_op = nir_intrinsic_load_barycentric_centroid;
273             else
274                bary_op = nir_intrinsic_load_barycentric_pixel;
275 
276             barycentric = nir_load_barycentric(&state->builder, bary_op,
277                                                var->data.interpolation);
278             op = nir_intrinsic_load_interpolated_input;
279          }
280       } else {
281          op = array_index ? nir_intrinsic_load_per_vertex_input :
282                             nir_intrinsic_load_input;
283       }
284       break;
285    case nir_var_shader_out:
286       op = !array_index            ? nir_intrinsic_load_output :
287            var->data.per_primitive ? nir_intrinsic_load_per_primitive_output :
288                                      nir_intrinsic_load_per_vertex_output;
289       break;
290    case nir_var_uniform:
291       op = nir_intrinsic_load_uniform;
292       break;
293    default:
294       unreachable("Unknown variable mode");
295    }
296 
297    nir_intrinsic_instr *load =
298       nir_intrinsic_instr_create(state->builder.shader, op);
299    load->num_components = num_components;
300 
301    nir_intrinsic_set_base(load, var->data.driver_location);
302    if (mode == nir_var_shader_in || mode == nir_var_shader_out)
303       nir_intrinsic_set_component(load, component);
304 
305    if (load->intrinsic == nir_intrinsic_load_uniform)
306       nir_intrinsic_set_range(load,
307                               state->type_size(var->type, var->data.bindless));
308 
309    if (nir_intrinsic_has_access(load))
310       nir_intrinsic_set_access(load, var->data.access);
311 
312    nir_intrinsic_set_dest_type(load, dest_type);
313 
314    if (load->intrinsic != nir_intrinsic_load_uniform) {
315       nir_io_semantics semantics = {0};
316       semantics.location = var->data.location;
317       semantics.num_slots = get_number_of_slots(state, var);
318       semantics.fb_fetch_output = var->data.fb_fetch_output;
319       semantics.medium_precision =
320          var->data.precision == GLSL_PRECISION_MEDIUM ||
321          var->data.precision == GLSL_PRECISION_LOW;
322       nir_intrinsic_set_io_semantics(load, semantics);
323    }
324 
325    if (array_index) {
326       load->src[0] = nir_src_for_ssa(array_index);
327       load->src[1] = nir_src_for_ssa(offset);
328    } else if (barycentric) {
329       load->src[0] = nir_src_for_ssa(barycentric);
330       load->src[1] = nir_src_for_ssa(offset);
331    } else {
332       load->src[0] = nir_src_for_ssa(offset);
333    }
334 
335    nir_ssa_dest_init(&load->instr, &load->dest,
336                      num_components, bit_size, NULL);
337    nir_builder_instr_insert(b, &load->instr);
338 
339    return &load->dest.ssa;
340 }
341 
342 static nir_ssa_def *
lower_load(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_ssa_def * array_index,nir_variable * var,nir_ssa_def * offset,unsigned component,const struct glsl_type * type)343 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
344            nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
345            unsigned component, const struct glsl_type *type)
346 {
347    assert(intrin->dest.is_ssa);
348    if (intrin->dest.ssa.bit_size == 64 &&
349        (state->options & nir_lower_io_lower_64bit_to_32)) {
350       nir_builder *b = &state->builder;
351 
352       const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
353 
354       nir_ssa_def *comp64[4];
355       assert(component == 0 || component == 2);
356       unsigned dest_comp = 0;
357       while (dest_comp < intrin->dest.ssa.num_components) {
358          const unsigned num_comps =
359             MIN2(intrin->dest.ssa.num_components - dest_comp,
360                  (4 - component) / 2);
361 
362          nir_ssa_def *data32 =
363             emit_load(state, array_index, var, offset, component,
364                       num_comps * 2, 32, nir_type_uint32);
365          for (unsigned i = 0; i < num_comps; i++) {
366             comp64[dest_comp + i] =
367                nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
368          }
369 
370          /* Only the first store has a component offset */
371          component = 0;
372          dest_comp += num_comps;
373          offset = nir_iadd_imm(b, offset, slot_size);
374       }
375 
376       return nir_vec(b, comp64, intrin->dest.ssa.num_components);
377    } else if (intrin->dest.ssa.bit_size == 1) {
378       /* Booleans are 32-bit */
379       assert(glsl_type_is_boolean(type));
380       return nir_b2b1(&state->builder,
381                       emit_load(state, array_index, var, offset, component,
382                                 intrin->dest.ssa.num_components, 32,
383                                 nir_type_bool32));
384    } else {
385       return emit_load(state, array_index, var, offset, component,
386                        intrin->dest.ssa.num_components,
387                        intrin->dest.ssa.bit_size,
388                        nir_get_nir_type_for_glsl_type(type));
389    }
390 }
391 
392 static void
emit_store(struct lower_io_state * state,nir_ssa_def * data,nir_ssa_def * array_index,nir_variable * var,nir_ssa_def * offset,unsigned component,unsigned num_components,nir_component_mask_t write_mask,nir_alu_type src_type)393 emit_store(struct lower_io_state *state, nir_ssa_def *data,
394            nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
395            unsigned component, unsigned num_components,
396            nir_component_mask_t write_mask, nir_alu_type src_type)
397 {
398    nir_builder *b = &state->builder;
399 
400    assert(var->data.mode == nir_var_shader_out);
401    nir_intrinsic_op op =
402       !array_index            ? nir_intrinsic_store_output :
403       var->data.per_primitive ? nir_intrinsic_store_per_primitive_output :
404                                 nir_intrinsic_store_per_vertex_output;
405 
406    nir_intrinsic_instr *store =
407       nir_intrinsic_instr_create(state->builder.shader, op);
408    store->num_components = num_components;
409 
410    store->src[0] = nir_src_for_ssa(data);
411 
412    nir_intrinsic_set_base(store, var->data.driver_location);
413    nir_intrinsic_set_component(store, component);
414    nir_intrinsic_set_src_type(store, src_type);
415 
416    nir_intrinsic_set_write_mask(store, write_mask);
417 
418    if (nir_intrinsic_has_access(store))
419       nir_intrinsic_set_access(store, var->data.access);
420 
421    if (array_index)
422       store->src[1] = nir_src_for_ssa(array_index);
423 
424    store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset);
425 
426    unsigned gs_streams = 0;
427    if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) {
428       if (var->data.stream & NIR_STREAM_PACKED) {
429          gs_streams = var->data.stream & ~NIR_STREAM_PACKED;
430       } else {
431          assert(var->data.stream < 4);
432          gs_streams = 0;
433          for (unsigned i = 0; i < num_components; ++i)
434             gs_streams |= var->data.stream << (2 * i);
435       }
436    }
437 
438    nir_io_semantics semantics = {0};
439    semantics.location = var->data.location;
440    semantics.num_slots = get_number_of_slots(state, var);
441    semantics.dual_source_blend_index = var->data.index;
442    semantics.gs_streams = gs_streams;
443    semantics.medium_precision =
444       var->data.precision == GLSL_PRECISION_MEDIUM ||
445       var->data.precision == GLSL_PRECISION_LOW;
446    semantics.per_view = var->data.per_view;
447    nir_intrinsic_set_io_semantics(store, semantics);
448 
449    nir_builder_instr_insert(b, &store->instr);
450 }
451 
452 static void
lower_store(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_ssa_def * array_index,nir_variable * var,nir_ssa_def * offset,unsigned component,const struct glsl_type * type)453 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
454             nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
455             unsigned component, const struct glsl_type *type)
456 {
457    assert(intrin->src[1].is_ssa);
458    if (intrin->src[1].ssa->bit_size == 64 &&
459        (state->options & nir_lower_io_lower_64bit_to_32)) {
460       nir_builder *b = &state->builder;
461 
462       const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
463 
464       assert(component == 0 || component == 2);
465       unsigned src_comp = 0;
466       nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
467       while (src_comp < intrin->num_components) {
468          const unsigned num_comps =
469             MIN2(intrin->num_components - src_comp,
470                  (4 - component) / 2);
471 
472          if (write_mask & BITFIELD_MASK(num_comps)) {
473             nir_ssa_def *data =
474                nir_channels(b, intrin->src[1].ssa,
475                             BITFIELD_RANGE(src_comp, num_comps));
476             nir_ssa_def *data32 = nir_bitcast_vector(b, data, 32);
477 
478             nir_component_mask_t write_mask32 = 0;
479             for (unsigned i = 0; i < num_comps; i++) {
480                if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
481                   write_mask32 |= 3 << (i * 2);
482             }
483 
484             emit_store(state, data32, array_index, var, offset,
485                        component, data32->num_components, write_mask32,
486                        nir_type_uint32);
487          }
488 
489          /* Only the first store has a component offset */
490          component = 0;
491          src_comp += num_comps;
492          write_mask >>= num_comps;
493          offset = nir_iadd_imm(b, offset, slot_size);
494       }
495    } else if (intrin->dest.ssa.bit_size == 1) {
496       /* Booleans are 32-bit */
497       assert(glsl_type_is_boolean(type));
498       nir_ssa_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
499       emit_store(state, b32_val, array_index, var, offset,
500                  component, intrin->num_components,
501                  nir_intrinsic_write_mask(intrin),
502                  nir_type_bool32);
503    } else {
504       emit_store(state, intrin->src[1].ssa, array_index, var, offset,
505                  component, intrin->num_components,
506                  nir_intrinsic_write_mask(intrin),
507                  nir_get_nir_type_for_glsl_type(type));
508    }
509 }
510 
511 static nir_ssa_def *
lower_interpolate_at(nir_intrinsic_instr * intrin,struct lower_io_state * state,nir_variable * var,nir_ssa_def * offset,unsigned component,const struct glsl_type * type)512 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
513                      nir_variable *var, nir_ssa_def *offset, unsigned component,
514                      const struct glsl_type *type)
515 {
516    nir_builder *b = &state->builder;
517    assert(var->data.mode == nir_var_shader_in);
518 
519    /* Ignore interpolateAt() for flat variables - flat is flat. Lower
520     * interpolateAtVertex() for explicit variables.
521     */
522    if (var->data.interpolation == INTERP_MODE_FLAT ||
523        var->data.interpolation == INTERP_MODE_EXPLICIT) {
524       nir_ssa_def *vertex_index = NULL;
525 
526       if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
527          assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
528          vertex_index = intrin->src[1].ssa;
529       }
530 
531       return lower_load(intrin, state, vertex_index, var, offset, component, type);
532    }
533 
534    /* None of the supported APIs allow interpolation on 64-bit things */
535    assert(intrin->dest.is_ssa && intrin->dest.ssa.bit_size <= 32);
536 
537    nir_intrinsic_op bary_op;
538    switch (intrin->intrinsic) {
539    case nir_intrinsic_interp_deref_at_centroid:
540       bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
541                 nir_intrinsic_load_barycentric_sample :
542                 nir_intrinsic_load_barycentric_centroid;
543       break;
544    case nir_intrinsic_interp_deref_at_sample:
545       bary_op = nir_intrinsic_load_barycentric_at_sample;
546       break;
547    case nir_intrinsic_interp_deref_at_offset:
548       bary_op = nir_intrinsic_load_barycentric_at_offset;
549       break;
550    default:
551       unreachable("Bogus interpolateAt() intrinsic.");
552    }
553 
554    nir_intrinsic_instr *bary_setup =
555       nir_intrinsic_instr_create(state->builder.shader, bary_op);
556 
557    nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
558    nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
559 
560    if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
561        intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
562        intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
563       nir_src_copy(&bary_setup->src[0], &intrin->src[1]);
564 
565    nir_builder_instr_insert(b, &bary_setup->instr);
566 
567    nir_io_semantics semantics = {0};
568    semantics.location = var->data.location;
569    semantics.num_slots = get_number_of_slots(state, var);
570    semantics.medium_precision =
571       var->data.precision == GLSL_PRECISION_MEDIUM ||
572       var->data.precision == GLSL_PRECISION_LOW;
573 
574    assert(intrin->dest.is_ssa);
575    nir_ssa_def *load =
576       nir_load_interpolated_input(&state->builder,
577                                   intrin->dest.ssa.num_components,
578                                   intrin->dest.ssa.bit_size,
579                                   &bary_setup->dest.ssa,
580                                   offset,
581                                   .base = var->data.driver_location,
582                                   .component = component,
583                                   .io_semantics = semantics);
584 
585    return load;
586 }
587 
588 static bool
nir_lower_io_block(nir_block * block,struct lower_io_state * state)589 nir_lower_io_block(nir_block *block,
590                    struct lower_io_state *state)
591 {
592    nir_builder *b = &state->builder;
593    const nir_shader_compiler_options *options = b->shader->options;
594    bool progress = false;
595 
596    nir_foreach_instr_safe(instr, block) {
597       if (instr->type != nir_instr_type_intrinsic)
598          continue;
599 
600       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
601 
602       switch (intrin->intrinsic) {
603       case nir_intrinsic_load_deref:
604       case nir_intrinsic_store_deref:
605          /* We can lower the io for this nir instrinsic */
606          break;
607       case nir_intrinsic_interp_deref_at_centroid:
608       case nir_intrinsic_interp_deref_at_sample:
609       case nir_intrinsic_interp_deref_at_offset:
610       case nir_intrinsic_interp_deref_at_vertex:
611          /* We can optionally lower these to load_interpolated_input */
612          if (options->use_interpolated_input_intrinsics ||
613              options->lower_interpolate_at)
614             break;
615          FALLTHROUGH;
616       default:
617          /* We can't lower the io for this nir instrinsic, so skip it */
618          continue;
619       }
620 
621       nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
622       if (!nir_deref_mode_is_one_of(deref, state->modes))
623          continue;
624 
625       nir_variable *var = nir_deref_instr_get_variable(deref);
626 
627       b->cursor = nir_before_instr(instr);
628 
629       const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage);
630 
631       nir_ssa_def *offset;
632       nir_ssa_def *array_index = NULL;
633       unsigned component_offset = var->data.location_frac;
634       bool bindless_type_size = var->data.mode == nir_var_shader_in ||
635                                 var->data.mode == nir_var_shader_out ||
636                                 var->data.bindless;
637 
638      if (nir_deref_instr_is_known_out_of_bounds(deref)) {
639         /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
640          *
641          *    In the subsections described above for array, vector, matrix and
642          *    structure accesses, any out-of-bounds access produced undefined
643          *    behavior....
644          *    Out-of-bounds reads return undefined values, which
645          *    include values from other variables of the active program or zero.
646          *    Out-of-bounds writes may be discarded or overwrite
647          *    other variables of the active program.
648          *
649          * GL_KHR_robustness and GL_ARB_robustness encourage us to return zero
650          * for reads.
651          *
652          * Otherwise get_io_offset would return out-of-bound offset which may
653          * result in out-of-bound loading/storing of inputs/outputs,
654          * that could cause issues in drivers down the line.
655          */
656          if (intrin->intrinsic != nir_intrinsic_store_deref) {
657             nir_ssa_def *zero =
658                nir_imm_zero(b, intrin->dest.ssa.num_components,
659                              intrin->dest.ssa.bit_size);
660             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
661                                   zero);
662          }
663 
664          nir_instr_remove(&intrin->instr);
665          progress = true;
666          continue;
667       }
668 
669       offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL,
670                              state->type_size, &component_offset,
671                              bindless_type_size);
672 
673       nir_ssa_def *replacement = NULL;
674 
675       switch (intrin->intrinsic) {
676       case nir_intrinsic_load_deref:
677          replacement = lower_load(intrin, state, array_index, var, offset,
678                                   component_offset, deref->type);
679          break;
680 
681       case nir_intrinsic_store_deref:
682          lower_store(intrin, state, array_index, var, offset,
683                      component_offset, deref->type);
684          break;
685 
686       case nir_intrinsic_interp_deref_at_centroid:
687       case nir_intrinsic_interp_deref_at_sample:
688       case nir_intrinsic_interp_deref_at_offset:
689       case nir_intrinsic_interp_deref_at_vertex:
690          assert(array_index == NULL);
691          replacement = lower_interpolate_at(intrin, state, var, offset,
692                                             component_offset, deref->type);
693          break;
694 
695       default:
696          continue;
697       }
698 
699       if (replacement) {
700          nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
701                                   replacement);
702       }
703       nir_instr_remove(&intrin->instr);
704       progress = true;
705    }
706 
707    return progress;
708 }
709 
710 static bool
nir_lower_io_impl(nir_function_impl * impl,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)711 nir_lower_io_impl(nir_function_impl *impl,
712                   nir_variable_mode modes,
713                   int (*type_size)(const struct glsl_type *, bool),
714                   nir_lower_io_options options)
715 {
716    struct lower_io_state state;
717    bool progress = false;
718 
719    nir_builder_init(&state.builder, impl);
720    state.dead_ctx = ralloc_context(NULL);
721    state.modes = modes;
722    state.type_size = type_size;
723    state.options = options;
724 
725    ASSERTED nir_variable_mode supported_modes =
726       nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
727    assert(!(modes & ~supported_modes));
728 
729    nir_foreach_block(block, impl) {
730       progress |= nir_lower_io_block(block, &state);
731    }
732 
733    ralloc_free(state.dead_ctx);
734 
735    nir_metadata_preserve(impl, nir_metadata_none);
736 
737    return progress;
738 }
739 
740 /** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
741  *
742  * This pass is intended to be used for cross-stage shader I/O and driver-
743  * managed uniforms to turn deref-based access into a simpler model using
744  * locations or offsets.  For fragment shader inputs, it can optionally turn
745  * load_deref into an explicit interpolation using barycentrics coming from
746  * one of the load_barycentric_* intrinsics.  This pass requires that all
747  * deref chains are complete and contain no casts.
748  */
749 bool
nir_lower_io(nir_shader * shader,nir_variable_mode modes,int (* type_size)(const struct glsl_type *,bool),nir_lower_io_options options)750 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
751              int (*type_size)(const struct glsl_type *, bool),
752              nir_lower_io_options options)
753 {
754    bool progress = false;
755 
756    nir_foreach_function(function, shader) {
757       if (function->impl) {
758          progress |= nir_lower_io_impl(function->impl, modes,
759                                        type_size, options);
760       }
761    }
762 
763    return progress;
764 }
765 
766 static unsigned
type_scalar_size_bytes(const struct glsl_type * type)767 type_scalar_size_bytes(const struct glsl_type *type)
768 {
769    assert(glsl_type_is_vector_or_scalar(type) ||
770           glsl_type_is_matrix(type));
771    return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
772 }
773 
774 static nir_ssa_def *
build_addr_iadd(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,nir_ssa_def * offset)775 build_addr_iadd(nir_builder *b, nir_ssa_def *addr,
776                 nir_address_format addr_format,
777                 nir_variable_mode modes,
778                 nir_ssa_def *offset)
779 {
780    assert(offset->num_components == 1);
781 
782    switch (addr_format) {
783    case nir_address_format_32bit_global:
784    case nir_address_format_64bit_global:
785    case nir_address_format_32bit_offset:
786       assert(addr->bit_size == offset->bit_size);
787       assert(addr->num_components == 1);
788       return nir_iadd(b, addr, offset);
789 
790    case nir_address_format_32bit_offset_as_64bit:
791       assert(addr->num_components == 1);
792       assert(offset->bit_size == 32);
793       return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset));
794 
795    case nir_address_format_64bit_global_32bit_offset:
796    case nir_address_format_64bit_bounded_global:
797       assert(addr->num_components == 4);
798       assert(addr->bit_size == offset->bit_size);
799       return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 3), offset), 3);
800 
801    case nir_address_format_32bit_index_offset:
802       assert(addr->num_components == 2);
803       assert(addr->bit_size == offset->bit_size);
804       return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 1), offset), 1);
805 
806    case nir_address_format_32bit_index_offset_pack64:
807       assert(addr->num_components == 1);
808       assert(offset->bit_size == 32);
809       return nir_pack_64_2x32_split(b,
810                                     nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset),
811                                     nir_unpack_64_2x32_split_y(b, addr));
812 
813    case nir_address_format_vec2_index_32bit_offset:
814       assert(addr->num_components == 3);
815       assert(offset->bit_size == 32);
816       return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 2), offset), 2);
817 
818    case nir_address_format_62bit_generic:
819       assert(addr->num_components == 1);
820       assert(addr->bit_size == 64);
821       assert(offset->bit_size == 64);
822       if (!(modes & ~(nir_var_function_temp |
823                       nir_var_shader_temp |
824                       nir_var_mem_shared))) {
825          /* If we're sure it's one of these modes, we can do an easy 32-bit
826           * addition and don't need to bother with 64-bit math.
827           */
828          nir_ssa_def *addr32 = nir_unpack_64_2x32_split_x(b, addr);
829          nir_ssa_def *type = nir_unpack_64_2x32_split_y(b, addr);
830          addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset));
831          return nir_pack_64_2x32_split(b, addr32, type);
832       } else {
833          return nir_iadd(b, addr, offset);
834       }
835 
836    case nir_address_format_logical:
837       unreachable("Unsupported address format");
838    }
839    unreachable("Invalid address format");
840 }
841 
842 static unsigned
addr_get_offset_bit_size(nir_ssa_def * addr,nir_address_format addr_format)843 addr_get_offset_bit_size(nir_ssa_def *addr, nir_address_format addr_format)
844 {
845    if (addr_format == nir_address_format_32bit_offset_as_64bit ||
846        addr_format == nir_address_format_32bit_index_offset_pack64)
847       return 32;
848    return addr->bit_size;
849 }
850 
851 static nir_ssa_def *
build_addr_iadd_imm(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,int64_t offset)852 build_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
853                     nir_address_format addr_format,
854                     nir_variable_mode modes,
855                     int64_t offset)
856 {
857    return build_addr_iadd(b, addr, addr_format, modes,
858                              nir_imm_intN_t(b, offset,
859                                             addr_get_offset_bit_size(addr, addr_format)));
860 }
861 
862 static nir_ssa_def *
build_addr_for_var(nir_builder * b,nir_variable * var,nir_address_format addr_format)863 build_addr_for_var(nir_builder *b, nir_variable *var,
864                    nir_address_format addr_format)
865 {
866    assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared |
867                             nir_var_shader_temp | nir_var_function_temp |
868                             nir_var_mem_push_const | nir_var_mem_constant));
869 
870    const unsigned num_comps = nir_address_format_num_components(addr_format);
871    const unsigned bit_size = nir_address_format_bit_size(addr_format);
872 
873    switch (addr_format) {
874    case nir_address_format_32bit_global:
875    case nir_address_format_64bit_global: {
876       nir_ssa_def *base_addr;
877       switch (var->data.mode) {
878       case nir_var_shader_temp:
879          base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0);
880          break;
881 
882       case nir_var_function_temp:
883          base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1);
884          break;
885 
886       case nir_var_mem_constant:
887          base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size);
888          break;
889 
890       case nir_var_mem_shared:
891          base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size);
892          break;
893 
894       default:
895          unreachable("Unsupported variable mode");
896       }
897 
898       return build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode,
899                                     var->data.driver_location);
900    }
901 
902    case nir_address_format_32bit_offset:
903       assert(var->data.driver_location <= UINT32_MAX);
904       return nir_imm_int(b, var->data.driver_location);
905 
906    case nir_address_format_32bit_offset_as_64bit:
907       assert(var->data.driver_location <= UINT32_MAX);
908       return nir_imm_int64(b, var->data.driver_location);
909 
910    case nir_address_format_62bit_generic:
911       switch (var->data.mode) {
912       case nir_var_shader_temp:
913       case nir_var_function_temp:
914          assert(var->data.driver_location <= UINT32_MAX);
915          return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64);
916 
917       case nir_var_mem_shared:
918          assert(var->data.driver_location <= UINT32_MAX);
919          return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64);
920 
921       default:
922          unreachable("Unsupported variable mode");
923       }
924 
925    default:
926       unreachable("Unsupported address format");
927    }
928 }
929 
930 static nir_ssa_def *
build_runtime_addr_mode_check(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode mode)931 build_runtime_addr_mode_check(nir_builder *b, nir_ssa_def *addr,
932                               nir_address_format addr_format,
933                               nir_variable_mode mode)
934 {
935    /* The compile-time check failed; do a run-time check */
936    switch (addr_format) {
937    case nir_address_format_62bit_generic: {
938       assert(addr->num_components == 1);
939       assert(addr->bit_size == 64);
940       nir_ssa_def *mode_enum = nir_ushr(b, addr, nir_imm_int(b, 62));
941       switch (mode) {
942       case nir_var_function_temp:
943       case nir_var_shader_temp:
944          return nir_ieq_imm(b, mode_enum, 0x2);
945 
946       case nir_var_mem_shared:
947          return nir_ieq_imm(b, mode_enum, 0x1);
948 
949       case nir_var_mem_global:
950          return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0),
951                            nir_ieq_imm(b, mode_enum, 0x3));
952 
953       default:
954          unreachable("Invalid mode check intrinsic");
955       }
956    }
957 
958    default:
959       unreachable("Unsupported address mode");
960    }
961 }
962 
963 static nir_ssa_def *
addr_to_index(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format)964 addr_to_index(nir_builder *b, nir_ssa_def *addr,
965               nir_address_format addr_format)
966 {
967    switch (addr_format) {
968    case nir_address_format_32bit_index_offset:
969       assert(addr->num_components == 2);
970       return nir_channel(b, addr, 0);
971    case nir_address_format_32bit_index_offset_pack64:
972       return nir_unpack_64_2x32_split_y(b, addr);
973    case nir_address_format_vec2_index_32bit_offset:
974       assert(addr->num_components == 3);
975       return nir_channels(b, addr, 0x3);
976    default: unreachable("Invalid address format");
977    }
978 }
979 
980 static nir_ssa_def *
addr_to_offset(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format)981 addr_to_offset(nir_builder *b, nir_ssa_def *addr,
982                nir_address_format addr_format)
983 {
984    switch (addr_format) {
985    case nir_address_format_32bit_index_offset:
986       assert(addr->num_components == 2);
987       return nir_channel(b, addr, 1);
988    case nir_address_format_32bit_index_offset_pack64:
989       return nir_unpack_64_2x32_split_x(b, addr);
990    case nir_address_format_vec2_index_32bit_offset:
991       assert(addr->num_components == 3);
992       return nir_channel(b, addr, 2);
993    case nir_address_format_32bit_offset:
994       return addr;
995    case nir_address_format_32bit_offset_as_64bit:
996    case nir_address_format_62bit_generic:
997       return nir_u2u32(b, addr);
998    default:
999       unreachable("Invalid address format");
1000    }
1001 }
1002 
1003 /** Returns true if the given address format resolves to a global address */
1004 static bool
addr_format_is_global(nir_address_format addr_format,nir_variable_mode mode)1005 addr_format_is_global(nir_address_format addr_format,
1006                       nir_variable_mode mode)
1007 {
1008    if (addr_format == nir_address_format_62bit_generic)
1009       return mode == nir_var_mem_global;
1010 
1011    return addr_format == nir_address_format_32bit_global ||
1012           addr_format == nir_address_format_64bit_global ||
1013           addr_format == nir_address_format_64bit_global_32bit_offset ||
1014           addr_format == nir_address_format_64bit_bounded_global;
1015 }
1016 
1017 static bool
addr_format_is_offset(nir_address_format addr_format,nir_variable_mode mode)1018 addr_format_is_offset(nir_address_format addr_format,
1019                       nir_variable_mode mode)
1020 {
1021    if (addr_format == nir_address_format_62bit_generic)
1022       return mode != nir_var_mem_global;
1023 
1024    return addr_format == nir_address_format_32bit_offset ||
1025           addr_format == nir_address_format_32bit_offset_as_64bit;
1026 }
1027 
1028 static nir_ssa_def *
addr_to_global(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format)1029 addr_to_global(nir_builder *b, nir_ssa_def *addr,
1030                nir_address_format addr_format)
1031 {
1032    switch (addr_format) {
1033    case nir_address_format_32bit_global:
1034    case nir_address_format_64bit_global:
1035    case nir_address_format_62bit_generic:
1036       assert(addr->num_components == 1);
1037       return addr;
1038 
1039    case nir_address_format_64bit_global_32bit_offset:
1040    case nir_address_format_64bit_bounded_global:
1041       assert(addr->num_components == 4);
1042       return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)),
1043                          nir_u2u64(b, nir_channel(b, addr, 3)));
1044 
1045    case nir_address_format_32bit_index_offset:
1046    case nir_address_format_32bit_index_offset_pack64:
1047    case nir_address_format_vec2_index_32bit_offset:
1048    case nir_address_format_32bit_offset:
1049    case nir_address_format_32bit_offset_as_64bit:
1050    case nir_address_format_logical:
1051       unreachable("Cannot get a 64-bit address with this address format");
1052    }
1053 
1054    unreachable("Invalid address format");
1055 }
1056 
1057 static bool
addr_format_needs_bounds_check(nir_address_format addr_format)1058 addr_format_needs_bounds_check(nir_address_format addr_format)
1059 {
1060    return addr_format == nir_address_format_64bit_bounded_global;
1061 }
1062 
1063 static nir_ssa_def *
addr_is_in_bounds(nir_builder * b,nir_ssa_def * addr,nir_address_format addr_format,unsigned size)1064 addr_is_in_bounds(nir_builder *b, nir_ssa_def *addr,
1065                   nir_address_format addr_format, unsigned size)
1066 {
1067    assert(addr_format == nir_address_format_64bit_bounded_global);
1068    assert(addr->num_components == 4);
1069    return nir_ige(b, nir_channel(b, addr, 2),
1070                      nir_iadd_imm(b, nir_channel(b, addr, 3), size));
1071 }
1072 
1073 static void
nir_get_explicit_deref_range(nir_deref_instr * deref,nir_address_format addr_format,uint32_t * out_base,uint32_t * out_range)1074 nir_get_explicit_deref_range(nir_deref_instr *deref,
1075                              nir_address_format addr_format,
1076                              uint32_t *out_base,
1077                              uint32_t *out_range)
1078 {
1079    uint32_t base = 0;
1080    uint32_t range = glsl_get_explicit_size(deref->type, false);
1081 
1082    while (true) {
1083       nir_deref_instr *parent = nir_deref_instr_parent(deref);
1084 
1085       switch (deref->deref_type) {
1086       case nir_deref_type_array:
1087       case nir_deref_type_array_wildcard:
1088       case nir_deref_type_ptr_as_array: {
1089          const unsigned stride = nir_deref_instr_array_stride(deref);
1090          if (stride == 0)
1091             goto fail;
1092 
1093          if (!parent)
1094             goto fail;
1095 
1096          if (deref->deref_type != nir_deref_type_array_wildcard &&
1097              nir_src_is_const(deref->arr.index)) {
1098             base += stride * nir_src_as_uint(deref->arr.index);
1099          } else {
1100             if (glsl_get_length(parent->type) == 0)
1101                goto fail;
1102             range += stride * (glsl_get_length(parent->type) - 1);
1103          }
1104          break;
1105       }
1106 
1107       case nir_deref_type_struct: {
1108          if (!parent)
1109             goto fail;
1110 
1111          base += glsl_get_struct_field_offset(parent->type, deref->strct.index);
1112          break;
1113       }
1114 
1115       case nir_deref_type_cast: {
1116          nir_instr *parent_instr = deref->parent.ssa->parent_instr;
1117 
1118          switch (parent_instr->type) {
1119          case nir_instr_type_load_const: {
1120             nir_load_const_instr *load = nir_instr_as_load_const(parent_instr);
1121 
1122             switch (addr_format) {
1123             case nir_address_format_32bit_offset:
1124                base += load->value[1].u32;
1125                break;
1126             case nir_address_format_32bit_index_offset:
1127                base += load->value[1].u32;
1128                break;
1129             case nir_address_format_vec2_index_32bit_offset:
1130                base += load->value[2].u32;
1131                break;
1132             default:
1133                goto fail;
1134             }
1135 
1136             *out_base = base;
1137             *out_range = range;
1138             return;
1139          }
1140 
1141          case nir_instr_type_intrinsic: {
1142             nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr);
1143             switch (intr->intrinsic) {
1144             case nir_intrinsic_load_vulkan_descriptor:
1145                /* Assume that a load_vulkan_descriptor won't contribute to an
1146                 * offset within the resource.
1147                 */
1148                break;
1149             default:
1150                goto fail;
1151             }
1152 
1153             *out_base = base;
1154             *out_range = range;
1155             return;
1156          }
1157 
1158          default:
1159             goto fail;
1160          }
1161       }
1162 
1163       default:
1164          goto fail;
1165       }
1166 
1167       deref = parent;
1168    }
1169 
1170 fail:
1171    *out_base = 0;
1172    *out_range = ~0;
1173 }
1174 
1175 static nir_variable_mode
canonicalize_generic_modes(nir_variable_mode modes)1176 canonicalize_generic_modes(nir_variable_mode modes)
1177 {
1178    assert(modes != 0);
1179    if (util_bitcount(modes) == 1)
1180       return modes;
1181 
1182    assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp |
1183                       nir_var_mem_shared | nir_var_mem_global)));
1184 
1185    /* Canonicalize by converting shader_temp to function_temp */
1186    if (modes & nir_var_shader_temp) {
1187       modes &= ~nir_var_shader_temp;
1188       modes |= nir_var_function_temp;
1189    }
1190 
1191    return modes;
1192 }
1193 
1194 static nir_ssa_def *
build_explicit_io_load(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,unsigned num_components)1195 build_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
1196                        nir_ssa_def *addr, nir_address_format addr_format,
1197                        nir_variable_mode modes,
1198                        uint32_t align_mul, uint32_t align_offset,
1199                        unsigned num_components)
1200 {
1201    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1202    modes = canonicalize_generic_modes(modes);
1203 
1204    if (util_bitcount(modes) > 1) {
1205       if (addr_format_is_global(addr_format, modes)) {
1206          return build_explicit_io_load(b, intrin, addr, addr_format,
1207                                        nir_var_mem_global,
1208                                        align_mul, align_offset,
1209                                        num_components);
1210       } else if (modes & nir_var_function_temp) {
1211          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1212                                                       nir_var_function_temp));
1213          nir_ssa_def *res1 =
1214             build_explicit_io_load(b, intrin, addr, addr_format,
1215                                    nir_var_function_temp,
1216                                    align_mul, align_offset,
1217                                    num_components);
1218          nir_push_else(b, NULL);
1219          nir_ssa_def *res2 =
1220             build_explicit_io_load(b, intrin, addr, addr_format,
1221                                    modes & ~nir_var_function_temp,
1222                                    align_mul, align_offset,
1223                                    num_components);
1224          nir_pop_if(b, NULL);
1225          return nir_if_phi(b, res1, res2);
1226       } else {
1227          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1228                                                       nir_var_mem_shared));
1229          assert(modes & nir_var_mem_shared);
1230          nir_ssa_def *res1 =
1231             build_explicit_io_load(b, intrin, addr, addr_format,
1232                                    nir_var_mem_shared,
1233                                    align_mul, align_offset,
1234                                    num_components);
1235          nir_push_else(b, NULL);
1236          assert(modes & nir_var_mem_global);
1237          nir_ssa_def *res2 =
1238             build_explicit_io_load(b, intrin, addr, addr_format,
1239                                    nir_var_mem_global,
1240                                    align_mul, align_offset,
1241                                    num_components);
1242          nir_pop_if(b, NULL);
1243          return nir_if_phi(b, res1, res2);
1244       }
1245    }
1246 
1247    assert(util_bitcount(modes) == 1);
1248    const nir_variable_mode mode = modes;
1249 
1250    nir_intrinsic_op op;
1251    switch (intrin->intrinsic) {
1252    case nir_intrinsic_load_deref:
1253       switch (mode) {
1254       case nir_var_mem_ubo:
1255          if (addr_format == nir_address_format_64bit_global_32bit_offset)
1256             op = nir_intrinsic_load_global_constant_offset;
1257          else if (addr_format == nir_address_format_64bit_bounded_global)
1258             op = nir_intrinsic_load_global_constant_bounded;
1259          else if (addr_format_is_global(addr_format, mode))
1260             op = nir_intrinsic_load_global_constant;
1261          else
1262             op = nir_intrinsic_load_ubo;
1263          break;
1264       case nir_var_mem_ssbo:
1265          if (addr_format_is_global(addr_format, mode))
1266             op = nir_intrinsic_load_global;
1267          else
1268             op = nir_intrinsic_load_ssbo;
1269          break;
1270       case nir_var_mem_global:
1271          assert(addr_format_is_global(addr_format, mode));
1272          op = nir_intrinsic_load_global;
1273          break;
1274       case nir_var_uniform:
1275          assert(addr_format_is_offset(addr_format, mode));
1276          assert(b->shader->info.stage == MESA_SHADER_KERNEL);
1277          op = nir_intrinsic_load_kernel_input;
1278          break;
1279       case nir_var_mem_shared:
1280          assert(addr_format_is_offset(addr_format, mode));
1281          op = nir_intrinsic_load_shared;
1282          break;
1283       case nir_var_shader_temp:
1284       case nir_var_function_temp:
1285          if (addr_format_is_offset(addr_format, mode)) {
1286             op = nir_intrinsic_load_scratch;
1287          } else {
1288             assert(addr_format_is_global(addr_format, mode));
1289             op = nir_intrinsic_load_global;
1290          }
1291          break;
1292       case nir_var_mem_push_const:
1293          assert(addr_format == nir_address_format_32bit_offset);
1294          op = nir_intrinsic_load_push_constant;
1295          break;
1296       case nir_var_mem_constant:
1297          if (addr_format_is_offset(addr_format, mode)) {
1298             op = nir_intrinsic_load_constant;
1299          } else {
1300             assert(addr_format_is_global(addr_format, mode));
1301             op = nir_intrinsic_load_global_constant;
1302          }
1303          break;
1304       default:
1305          unreachable("Unsupported explicit IO variable mode");
1306       }
1307       break;
1308 
1309    case nir_intrinsic_load_deref_block_intel:
1310       switch (mode) {
1311       case nir_var_mem_ssbo:
1312          if (addr_format_is_global(addr_format, mode))
1313             op = nir_intrinsic_load_global_block_intel;
1314          else
1315             op = nir_intrinsic_load_ssbo_block_intel;
1316          break;
1317       case nir_var_mem_global:
1318          op = nir_intrinsic_load_global_block_intel;
1319          break;
1320       case nir_var_mem_shared:
1321          op = nir_intrinsic_load_shared_block_intel;
1322          break;
1323       default:
1324          unreachable("Unsupported explicit IO variable mode");
1325       }
1326       break;
1327 
1328    default:
1329       unreachable("Invalid intrinsic");
1330    }
1331 
1332    nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
1333 
1334    if (op == nir_intrinsic_load_global_constant_offset) {
1335       assert(addr_format == nir_address_format_64bit_global_32bit_offset);
1336       load->src[0] = nir_src_for_ssa(
1337          nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)));
1338       load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1339    } else if (op == nir_intrinsic_load_global_constant_bounded) {
1340       assert(addr_format == nir_address_format_64bit_bounded_global);
1341       load->src[0] = nir_src_for_ssa(
1342          nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)));
1343       load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
1344       load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2));
1345    } else if (addr_format_is_global(addr_format, mode)) {
1346       load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1347    } else if (addr_format_is_offset(addr_format, mode)) {
1348       assert(addr->num_components == 1);
1349       load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1350    } else {
1351       load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1352       load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1353    }
1354 
1355    if (nir_intrinsic_has_access(load))
1356       nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
1357 
1358    if (op == nir_intrinsic_load_constant) {
1359       nir_intrinsic_set_base(load, 0);
1360       nir_intrinsic_set_range(load, b->shader->constant_data_size);
1361    } else if (mode == nir_var_mem_push_const) {
1362       /* Push constants are required to be able to be chased back to the
1363        * variable so we can provide a base/range.
1364        */
1365       nir_variable *var = nir_deref_instr_get_variable(deref);
1366       nir_intrinsic_set_base(load, 0);
1367       nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false));
1368    }
1369 
1370    unsigned bit_size = intrin->dest.ssa.bit_size;
1371    if (bit_size == 1) {
1372       /* TODO: Make the native bool bit_size an option. */
1373       bit_size = 32;
1374    }
1375 
1376    if (nir_intrinsic_has_align(load))
1377       nir_intrinsic_set_align(load, align_mul, align_offset);
1378 
1379    if (nir_intrinsic_has_range_base(load)) {
1380       unsigned base, range;
1381       nir_get_explicit_deref_range(deref, addr_format, &base, &range);
1382       nir_intrinsic_set_range_base(load, base);
1383       nir_intrinsic_set_range(load, range);
1384    }
1385 
1386    assert(intrin->dest.is_ssa);
1387    load->num_components = num_components;
1388    nir_ssa_dest_init(&load->instr, &load->dest, num_components,
1389                      bit_size, NULL);
1390 
1391    assert(bit_size % 8 == 0);
1392 
1393    nir_ssa_def *result;
1394    if (addr_format_needs_bounds_check(addr_format) &&
1395        op != nir_intrinsic_load_global_constant_bounded) {
1396       /* We don't need to bounds-check global_constant_bounded because bounds
1397        * checking is handled by the intrinsic itself.
1398        *
1399        * The Vulkan spec for robustBufferAccess gives us quite a few options
1400        * as to what we can do with an OOB read.  Unfortunately, returning
1401        * undefined values isn't one of them so we return an actual zero.
1402        */
1403       nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
1404 
1405       /* TODO: Better handle block_intel. */
1406       const unsigned load_size = (bit_size / 8) * load->num_components;
1407       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
1408 
1409       nir_builder_instr_insert(b, &load->instr);
1410 
1411       nir_pop_if(b, NULL);
1412 
1413       result = nir_if_phi(b, &load->dest.ssa, zero);
1414    } else {
1415       nir_builder_instr_insert(b, &load->instr);
1416       result = &load->dest.ssa;
1417    }
1418 
1419    if (intrin->dest.ssa.bit_size == 1) {
1420       /* For shared, we can go ahead and use NIR's and/or the back-end's
1421        * standard encoding for booleans rather than forcing a 0/1 boolean.
1422        * This should save an instruction or two.
1423        */
1424       if (mode == nir_var_mem_shared ||
1425           mode == nir_var_shader_temp ||
1426           mode == nir_var_function_temp)
1427          result = nir_b2b1(b, result);
1428       else
1429          result = nir_i2b(b, result);
1430    }
1431 
1432    return result;
1433 }
1434 
1435 static void
build_explicit_io_store(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes,uint32_t align_mul,uint32_t align_offset,nir_ssa_def * value,nir_component_mask_t write_mask)1436 build_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
1437                         nir_ssa_def *addr, nir_address_format addr_format,
1438                         nir_variable_mode modes,
1439                         uint32_t align_mul, uint32_t align_offset,
1440                         nir_ssa_def *value, nir_component_mask_t write_mask)
1441 {
1442    modes = canonicalize_generic_modes(modes);
1443 
1444    if (util_bitcount(modes) > 1) {
1445       if (addr_format_is_global(addr_format, modes)) {
1446          build_explicit_io_store(b, intrin, addr, addr_format,
1447                                  nir_var_mem_global,
1448                                  align_mul, align_offset,
1449                                  value, write_mask);
1450       } else if (modes & nir_var_function_temp) {
1451          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1452                                                       nir_var_function_temp));
1453          build_explicit_io_store(b, intrin, addr, addr_format,
1454                                  nir_var_function_temp,
1455                                  align_mul, align_offset,
1456                                  value, write_mask);
1457          nir_push_else(b, NULL);
1458          build_explicit_io_store(b, intrin, addr, addr_format,
1459                                  modes & ~nir_var_function_temp,
1460                                  align_mul, align_offset,
1461                                  value, write_mask);
1462          nir_pop_if(b, NULL);
1463       } else {
1464          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1465                                                       nir_var_mem_shared));
1466          assert(modes & nir_var_mem_shared);
1467          build_explicit_io_store(b, intrin, addr, addr_format,
1468                                  nir_var_mem_shared,
1469                                  align_mul, align_offset,
1470                                  value, write_mask);
1471          nir_push_else(b, NULL);
1472          assert(modes & nir_var_mem_global);
1473          build_explicit_io_store(b, intrin, addr, addr_format,
1474                                  nir_var_mem_global,
1475                                  align_mul, align_offset,
1476                                  value, write_mask);
1477          nir_pop_if(b, NULL);
1478       }
1479       return;
1480    }
1481 
1482    assert(util_bitcount(modes) == 1);
1483    const nir_variable_mode mode = modes;
1484 
1485    nir_intrinsic_op op;
1486    switch (intrin->intrinsic) {
1487    case nir_intrinsic_store_deref:
1488       assert(write_mask != 0);
1489 
1490       switch (mode) {
1491       case nir_var_mem_ssbo:
1492          if (addr_format_is_global(addr_format, mode))
1493             op = nir_intrinsic_store_global;
1494          else
1495             op = nir_intrinsic_store_ssbo;
1496          break;
1497       case nir_var_mem_global:
1498          assert(addr_format_is_global(addr_format, mode));
1499          op = nir_intrinsic_store_global;
1500          break;
1501       case nir_var_mem_shared:
1502          assert(addr_format_is_offset(addr_format, mode));
1503          op = nir_intrinsic_store_shared;
1504          break;
1505       case nir_var_shader_temp:
1506       case nir_var_function_temp:
1507          if (addr_format_is_offset(addr_format, mode)) {
1508             op = nir_intrinsic_store_scratch;
1509          } else {
1510             assert(addr_format_is_global(addr_format, mode));
1511             op = nir_intrinsic_store_global;
1512          }
1513          break;
1514       default:
1515          unreachable("Unsupported explicit IO variable mode");
1516       }
1517       break;
1518 
1519    case nir_intrinsic_store_deref_block_intel:
1520       assert(write_mask == 0);
1521 
1522       switch (mode) {
1523       case nir_var_mem_ssbo:
1524          if (addr_format_is_global(addr_format, mode))
1525             op = nir_intrinsic_store_global_block_intel;
1526          else
1527             op = nir_intrinsic_store_ssbo_block_intel;
1528          break;
1529       case nir_var_mem_global:
1530          op = nir_intrinsic_store_global_block_intel;
1531          break;
1532       case nir_var_mem_shared:
1533          op = nir_intrinsic_store_shared_block_intel;
1534          break;
1535       default:
1536          unreachable("Unsupported explicit IO variable mode");
1537       }
1538       break;
1539 
1540    default:
1541       unreachable("Invalid intrinsic");
1542    }
1543 
1544    nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
1545 
1546    if (value->bit_size == 1) {
1547       /* For shared, we can go ahead and use NIR's and/or the back-end's
1548        * standard encoding for booleans rather than forcing a 0/1 boolean.
1549        * This should save an instruction or two.
1550        *
1551        * TODO: Make the native bool bit_size an option.
1552        */
1553       if (mode == nir_var_mem_shared ||
1554           mode == nir_var_shader_temp ||
1555           mode == nir_var_function_temp)
1556          value = nir_b2b32(b, value);
1557       else
1558          value = nir_b2i(b, value, 32);
1559    }
1560 
1561    store->src[0] = nir_src_for_ssa(value);
1562    if (addr_format_is_global(addr_format, mode)) {
1563       store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1564    } else if (addr_format_is_offset(addr_format, mode)) {
1565       assert(addr->num_components == 1);
1566       store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1567    } else {
1568       store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1569       store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1570    }
1571 
1572    nir_intrinsic_set_write_mask(store, write_mask);
1573 
1574    if (nir_intrinsic_has_access(store))
1575       nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
1576 
1577    nir_intrinsic_set_align(store, align_mul, align_offset);
1578 
1579    assert(value->num_components == 1 ||
1580           value->num_components == intrin->num_components);
1581    store->num_components = value->num_components;
1582 
1583    assert(value->bit_size % 8 == 0);
1584 
1585    if (addr_format_needs_bounds_check(addr_format)) {
1586       /* TODO: Better handle block_intel. */
1587       const unsigned store_size = (value->bit_size / 8) * store->num_components;
1588       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
1589 
1590       nir_builder_instr_insert(b, &store->instr);
1591 
1592       nir_pop_if(b, NULL);
1593    } else {
1594       nir_builder_instr_insert(b, &store->instr);
1595    }
1596 }
1597 
1598 static nir_ssa_def *
build_explicit_io_atomic(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format,nir_variable_mode modes)1599 build_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
1600                          nir_ssa_def *addr, nir_address_format addr_format,
1601                          nir_variable_mode modes)
1602 {
1603    modes = canonicalize_generic_modes(modes);
1604 
1605    if (util_bitcount(modes) > 1) {
1606       if (addr_format_is_global(addr_format, modes)) {
1607          return build_explicit_io_atomic(b, intrin, addr, addr_format,
1608                                          nir_var_mem_global);
1609       } else if (modes & nir_var_function_temp) {
1610          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1611                                                       nir_var_function_temp));
1612          nir_ssa_def *res1 =
1613             build_explicit_io_atomic(b, intrin, addr, addr_format,
1614                                      nir_var_function_temp);
1615          nir_push_else(b, NULL);
1616          nir_ssa_def *res2 =
1617             build_explicit_io_atomic(b, intrin, addr, addr_format,
1618                                      modes & ~nir_var_function_temp);
1619          nir_pop_if(b, NULL);
1620          return nir_if_phi(b, res1, res2);
1621       } else {
1622          nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
1623                                                       nir_var_mem_shared));
1624          assert(modes & nir_var_mem_shared);
1625          nir_ssa_def *res1 =
1626             build_explicit_io_atomic(b, intrin, addr, addr_format,
1627                                      nir_var_mem_shared);
1628          nir_push_else(b, NULL);
1629          assert(modes & nir_var_mem_global);
1630          nir_ssa_def *res2 =
1631             build_explicit_io_atomic(b, intrin, addr, addr_format,
1632                                      nir_var_mem_global);
1633          nir_pop_if(b, NULL);
1634          return nir_if_phi(b, res1, res2);
1635       }
1636    }
1637 
1638    assert(util_bitcount(modes) == 1);
1639    const nir_variable_mode mode = modes;
1640 
1641    const unsigned num_data_srcs =
1642       nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
1643 
1644    nir_intrinsic_op op;
1645    switch (mode) {
1646    case nir_var_mem_ssbo:
1647       if (addr_format_is_global(addr_format, mode))
1648          op = global_atomic_for_deref(intrin->intrinsic);
1649       else
1650          op = ssbo_atomic_for_deref(intrin->intrinsic);
1651       break;
1652    case nir_var_mem_global:
1653       assert(addr_format_is_global(addr_format, mode));
1654       op = global_atomic_for_deref(intrin->intrinsic);
1655       break;
1656    case nir_var_mem_shared:
1657       assert(addr_format_is_offset(addr_format, mode));
1658       op = shared_atomic_for_deref(intrin->intrinsic);
1659       break;
1660    default:
1661       unreachable("Unsupported explicit IO variable mode");
1662    }
1663 
1664    nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
1665 
1666    unsigned src = 0;
1667    if (addr_format_is_global(addr_format, mode)) {
1668       atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
1669    } else if (addr_format_is_offset(addr_format, mode)) {
1670       assert(addr->num_components == 1);
1671       atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1672    } else {
1673       atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
1674       atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
1675    }
1676    for (unsigned i = 0; i < num_data_srcs; i++) {
1677       atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
1678    }
1679 
1680    /* Global atomics don't have access flags because they assume that the
1681     * address may be non-uniform.
1682     */
1683    if (nir_intrinsic_has_access(atomic))
1684       nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
1685 
1686    assert(intrin->dest.ssa.num_components == 1);
1687    nir_ssa_dest_init(&atomic->instr, &atomic->dest,
1688                      1, intrin->dest.ssa.bit_size, NULL);
1689 
1690    assert(atomic->dest.ssa.bit_size % 8 == 0);
1691 
1692    if (addr_format_needs_bounds_check(addr_format)) {
1693       const unsigned atomic_size = atomic->dest.ssa.bit_size / 8;
1694       nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
1695 
1696       nir_builder_instr_insert(b, &atomic->instr);
1697 
1698       nir_pop_if(b, NULL);
1699       return nir_if_phi(b, &atomic->dest.ssa,
1700                            nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size));
1701    } else {
1702       nir_builder_instr_insert(b, &atomic->instr);
1703       return &atomic->dest.ssa;
1704    }
1705 }
1706 
1707 nir_ssa_def *
nir_explicit_io_address_from_deref(nir_builder * b,nir_deref_instr * deref,nir_ssa_def * base_addr,nir_address_format addr_format)1708 nir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
1709                                    nir_ssa_def *base_addr,
1710                                    nir_address_format addr_format)
1711 {
1712    assert(deref->dest.is_ssa);
1713    switch (deref->deref_type) {
1714    case nir_deref_type_var:
1715       return build_addr_for_var(b, deref->var, addr_format);
1716 
1717    case nir_deref_type_array: {
1718       unsigned stride = nir_deref_instr_array_stride(deref);
1719       assert(stride > 0);
1720 
1721       nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
1722       index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format));
1723       return build_addr_iadd(b, base_addr, addr_format, deref->modes,
1724                                 nir_amul_imm(b, index, stride));
1725    }
1726 
1727    case nir_deref_type_ptr_as_array: {
1728       nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
1729       index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format));
1730       unsigned stride = nir_deref_instr_array_stride(deref);
1731       return build_addr_iadd(b, base_addr, addr_format, deref->modes,
1732                                 nir_amul_imm(b, index, stride));
1733    }
1734 
1735    case nir_deref_type_array_wildcard:
1736       unreachable("Wildcards should be lowered by now");
1737       break;
1738 
1739    case nir_deref_type_struct: {
1740       nir_deref_instr *parent = nir_deref_instr_parent(deref);
1741       int offset = glsl_get_struct_field_offset(parent->type,
1742                                                 deref->strct.index);
1743       assert(offset >= 0);
1744       return build_addr_iadd_imm(b, base_addr, addr_format,
1745                                  deref->modes, offset);
1746    }
1747 
1748    case nir_deref_type_cast:
1749       /* Nothing to do here */
1750       return base_addr;
1751    }
1752 
1753    unreachable("Invalid NIR deref type");
1754 }
1755 
1756 void
nir_lower_explicit_io_instr(nir_builder * b,nir_intrinsic_instr * intrin,nir_ssa_def * addr,nir_address_format addr_format)1757 nir_lower_explicit_io_instr(nir_builder *b,
1758                             nir_intrinsic_instr *intrin,
1759                             nir_ssa_def *addr,
1760                             nir_address_format addr_format)
1761 {
1762    b->cursor = nir_after_instr(&intrin->instr);
1763 
1764    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1765    unsigned vec_stride = glsl_get_explicit_stride(deref->type);
1766    unsigned scalar_size = type_scalar_size_bytes(deref->type);
1767    assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
1768    assert(vec_stride == 0 || vec_stride >= scalar_size);
1769 
1770    uint32_t align_mul, align_offset;
1771    if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) {
1772       /* If we don't have an alignment from the deref, assume scalar */
1773       align_mul = scalar_size;
1774       align_offset = 0;
1775    }
1776 
1777    switch (intrin->intrinsic) {
1778    case nir_intrinsic_load_deref: {
1779       nir_ssa_def *value;
1780       if (vec_stride > scalar_size) {
1781          nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { NULL, };
1782          for (unsigned i = 0; i < intrin->num_components; i++) {
1783             unsigned comp_offset = i * vec_stride;
1784             nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1785                                                          deref->modes,
1786                                                          comp_offset);
1787             comps[i] = build_explicit_io_load(b, intrin, comp_addr,
1788                                               addr_format, deref->modes,
1789                                               align_mul,
1790                                               (align_offset + comp_offset) %
1791                                                  align_mul,
1792                                               1);
1793          }
1794          value = nir_vec(b, comps, intrin->num_components);
1795       } else {
1796          value = build_explicit_io_load(b, intrin, addr, addr_format,
1797                                         deref->modes, align_mul, align_offset,
1798                                         intrin->num_components);
1799       }
1800       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
1801       break;
1802    }
1803 
1804    case nir_intrinsic_store_deref: {
1805       assert(intrin->src[1].is_ssa);
1806       nir_ssa_def *value = intrin->src[1].ssa;
1807       nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
1808       if (vec_stride > scalar_size) {
1809          for (unsigned i = 0; i < intrin->num_components; i++) {
1810             if (!(write_mask & (1 << i)))
1811                continue;
1812 
1813             unsigned comp_offset = i * vec_stride;
1814             nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
1815                                                          deref->modes,
1816                                                          comp_offset);
1817             build_explicit_io_store(b, intrin, comp_addr, addr_format,
1818                                     deref->modes, align_mul,
1819                                     (align_offset + comp_offset) % align_mul,
1820                                     nir_channel(b, value, i), 1);
1821          }
1822       } else {
1823          build_explicit_io_store(b, intrin, addr, addr_format,
1824                                  deref->modes, align_mul, align_offset,
1825                                  value, write_mask);
1826       }
1827       break;
1828    }
1829 
1830    case nir_intrinsic_load_deref_block_intel: {
1831       nir_ssa_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
1832                                                   deref->modes,
1833                                                   align_mul, align_offset,
1834                                                   intrin->num_components);
1835       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
1836       break;
1837    }
1838 
1839    case nir_intrinsic_store_deref_block_intel: {
1840       assert(intrin->src[1].is_ssa);
1841       nir_ssa_def *value = intrin->src[1].ssa;
1842       const nir_component_mask_t write_mask = 0;
1843       build_explicit_io_store(b, intrin, addr, addr_format,
1844                               deref->modes, align_mul, align_offset,
1845                               value, write_mask);
1846       break;
1847    }
1848 
1849    default: {
1850       nir_ssa_def *value =
1851          build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
1852       nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
1853       break;
1854    }
1855    }
1856 
1857    nir_instr_remove(&intrin->instr);
1858 }
1859 
1860 bool
nir_get_explicit_deref_align(nir_deref_instr * deref,bool default_to_type_align,uint32_t * align_mul,uint32_t * align_offset)1861 nir_get_explicit_deref_align(nir_deref_instr *deref,
1862                              bool default_to_type_align,
1863                              uint32_t *align_mul,
1864                              uint32_t *align_offset)
1865 {
1866    if (deref->deref_type == nir_deref_type_var) {
1867       /* If we see a variable, align_mul is effectively infinite because we
1868        * know the offset exactly (up to the offset of the base pointer for the
1869        * given variable mode).   We have to pick something so we choose 256B
1870        * as an arbitrary alignment which seems high enough for any reasonable
1871        * wide-load use-case.  Back-ends should clamp alignments down if 256B
1872        * is too large for some reason.
1873        */
1874       *align_mul = 256;
1875       *align_offset = deref->var->data.driver_location % 256;
1876       return true;
1877    }
1878 
1879    /* If we're a cast deref that has an alignment, use that. */
1880    if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) {
1881       *align_mul = deref->cast.align_mul;
1882       *align_offset = deref->cast.align_offset;
1883       return true;
1884    }
1885 
1886    /* Otherwise, we need to compute the alignment based on the parent */
1887    nir_deref_instr *parent = nir_deref_instr_parent(deref);
1888    if (parent == NULL) {
1889       assert(deref->deref_type == nir_deref_type_cast);
1890       if (default_to_type_align) {
1891          /* If we don't have a parent, assume the type's alignment, if any. */
1892          unsigned type_align = glsl_get_explicit_alignment(deref->type);
1893          if (type_align == 0)
1894             return false;
1895 
1896          *align_mul = type_align;
1897          *align_offset = 0;
1898          return true;
1899       } else {
1900          return false;
1901       }
1902    }
1903 
1904    uint32_t parent_mul, parent_offset;
1905    if (!nir_get_explicit_deref_align(parent, default_to_type_align,
1906                                      &parent_mul, &parent_offset))
1907       return false;
1908 
1909    switch (deref->deref_type) {
1910    case nir_deref_type_var:
1911       unreachable("Handled above");
1912 
1913    case nir_deref_type_array:
1914    case nir_deref_type_array_wildcard:
1915    case nir_deref_type_ptr_as_array: {
1916       const unsigned stride = nir_deref_instr_array_stride(deref);
1917       if (stride == 0)
1918          return false;
1919 
1920       if (deref->deref_type != nir_deref_type_array_wildcard &&
1921           nir_src_is_const(deref->arr.index)) {
1922          unsigned offset = nir_src_as_uint(deref->arr.index) * stride;
1923          *align_mul = parent_mul;
1924          *align_offset = (parent_offset + offset) % parent_mul;
1925       } else {
1926          /* If this is a wildcard or an indirect deref, we have to go with the
1927           * power-of-two gcd.
1928           */
1929          *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1));
1930          *align_offset = parent_offset % *align_mul;
1931       }
1932       return true;
1933    }
1934 
1935    case nir_deref_type_struct: {
1936       const int offset = glsl_get_struct_field_offset(parent->type,
1937                                                       deref->strct.index);
1938       if (offset < 0)
1939          return false;
1940 
1941       *align_mul = parent_mul;
1942       *align_offset = (parent_offset + offset) % parent_mul;
1943       return true;
1944    }
1945 
1946    case nir_deref_type_cast:
1947       /* We handled the explicit alignment case above. */
1948       assert(deref->cast.align_mul == 0);
1949       *align_mul = parent_mul;
1950       *align_offset = parent_offset;
1951       return true;
1952    }
1953 
1954    unreachable("Invalid deref_instr_type");
1955 }
1956 
1957 static void
lower_explicit_io_deref(nir_builder * b,nir_deref_instr * deref,nir_address_format addr_format)1958 lower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
1959                         nir_address_format addr_format)
1960 {
1961    /* Just delete the deref if it's not used.  We can't use
1962     * nir_deref_instr_remove_if_unused here because it may remove more than
1963     * one deref which could break our list walking since we walk the list
1964     * backwards.
1965     */
1966    assert(list_is_empty(&deref->dest.ssa.if_uses));
1967    if (list_is_empty(&deref->dest.ssa.uses)) {
1968       nir_instr_remove(&deref->instr);
1969       return;
1970    }
1971 
1972    b->cursor = nir_after_instr(&deref->instr);
1973 
1974    nir_ssa_def *base_addr = NULL;
1975    if (deref->deref_type != nir_deref_type_var) {
1976       assert(deref->parent.is_ssa);
1977       base_addr = deref->parent.ssa;
1978    }
1979 
1980    nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
1981                                                           addr_format);
1982    assert(addr->bit_size == deref->dest.ssa.bit_size);
1983    assert(addr->num_components == deref->dest.ssa.num_components);
1984 
1985    nir_instr_remove(&deref->instr);
1986    nir_ssa_def_rewrite_uses(&deref->dest.ssa, addr);
1987 }
1988 
1989 static void
lower_explicit_io_access(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)1990 lower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
1991                          nir_address_format addr_format)
1992 {
1993    assert(intrin->src[0].is_ssa);
1994    nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
1995 }
1996 
1997 static void
lower_explicit_io_array_length(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)1998 lower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
1999                                nir_address_format addr_format)
2000 {
2001    b->cursor = nir_after_instr(&intrin->instr);
2002 
2003    nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2004 
2005    assert(glsl_type_is_array(deref->type));
2006    assert(glsl_get_length(deref->type) == 0);
2007    assert(nir_deref_mode_is(deref, nir_var_mem_ssbo));
2008    unsigned stride = glsl_get_explicit_stride(deref->type);
2009    assert(stride > 0);
2010 
2011    nir_ssa_def *addr = &deref->dest.ssa;
2012    nir_ssa_def *index = addr_to_index(b, addr, addr_format);
2013    nir_ssa_def *offset = addr_to_offset(b, addr, addr_format);
2014    unsigned access = nir_intrinsic_access(intrin);
2015 
2016    nir_ssa_def *arr_size = nir_get_ssbo_size(b, index, .access=access);
2017    arr_size = nir_imax(b, nir_isub(b, arr_size, offset), nir_imm_int(b, 0u));
2018    arr_size = nir_idiv(b, arr_size, nir_imm_int(b, stride));
2019 
2020    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, arr_size);
2021    nir_instr_remove(&intrin->instr);
2022 }
2023 
2024 static void
lower_explicit_io_mode_check(nir_builder * b,nir_intrinsic_instr * intrin,nir_address_format addr_format)2025 lower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin,
2026                              nir_address_format addr_format)
2027 {
2028    if (addr_format_is_global(addr_format, 0)) {
2029       /* If the address format is always global, then the driver can use
2030        * global addresses regardless of the mode.  In that case, don't create
2031        * a check, just whack the intrinsic to addr_mode_is and delegate to the
2032        * driver lowering.
2033        */
2034       intrin->intrinsic = nir_intrinsic_addr_mode_is;
2035       return;
2036    }
2037 
2038    assert(intrin->src[0].is_ssa);
2039    nir_ssa_def *addr = intrin->src[0].ssa;
2040 
2041    b->cursor = nir_instr_remove(&intrin->instr);
2042 
2043    nir_ssa_def *is_mode =
2044       build_runtime_addr_mode_check(b, addr, addr_format,
2045                                     nir_intrinsic_memory_modes(intrin));
2046 
2047    nir_ssa_def_rewrite_uses(&intrin->dest.ssa, is_mode);
2048 }
2049 
2050 static bool
nir_lower_explicit_io_impl(nir_function_impl * impl,nir_variable_mode modes,nir_address_format addr_format)2051 nir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
2052                            nir_address_format addr_format)
2053 {
2054    bool progress = false;
2055 
2056    nir_builder b;
2057    nir_builder_init(&b, impl);
2058 
2059    /* Walk in reverse order so that we can see the full deref chain when we
2060     * lower the access operations.  We lower them assuming that the derefs
2061     * will be turned into address calculations later.
2062     */
2063    nir_foreach_block_reverse(block, impl) {
2064       nir_foreach_instr_reverse_safe(instr, block) {
2065          switch (instr->type) {
2066          case nir_instr_type_deref: {
2067             nir_deref_instr *deref = nir_instr_as_deref(instr);
2068             if (nir_deref_mode_is_in_set(deref, modes)) {
2069                lower_explicit_io_deref(&b, deref, addr_format);
2070                progress = true;
2071             }
2072             break;
2073          }
2074 
2075          case nir_instr_type_intrinsic: {
2076             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2077             switch (intrin->intrinsic) {
2078             case nir_intrinsic_load_deref:
2079             case nir_intrinsic_store_deref:
2080             case nir_intrinsic_load_deref_block_intel:
2081             case nir_intrinsic_store_deref_block_intel:
2082             case nir_intrinsic_deref_atomic_add:
2083             case nir_intrinsic_deref_atomic_imin:
2084             case nir_intrinsic_deref_atomic_umin:
2085             case nir_intrinsic_deref_atomic_imax:
2086             case nir_intrinsic_deref_atomic_umax:
2087             case nir_intrinsic_deref_atomic_and:
2088             case nir_intrinsic_deref_atomic_or:
2089             case nir_intrinsic_deref_atomic_xor:
2090             case nir_intrinsic_deref_atomic_exchange:
2091             case nir_intrinsic_deref_atomic_comp_swap:
2092             case nir_intrinsic_deref_atomic_fadd:
2093             case nir_intrinsic_deref_atomic_fmin:
2094             case nir_intrinsic_deref_atomic_fmax:
2095             case nir_intrinsic_deref_atomic_fcomp_swap: {
2096                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2097                if (nir_deref_mode_is_in_set(deref, modes)) {
2098                   lower_explicit_io_access(&b, intrin, addr_format);
2099                   progress = true;
2100                }
2101                break;
2102             }
2103 
2104             case nir_intrinsic_deref_buffer_array_length: {
2105                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2106                if (nir_deref_mode_is_in_set(deref, modes)) {
2107                   lower_explicit_io_array_length(&b, intrin, addr_format);
2108                   progress = true;
2109                }
2110                break;
2111             }
2112 
2113             case nir_intrinsic_deref_mode_is: {
2114                nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
2115                if (nir_deref_mode_is_in_set(deref, modes)) {
2116                   lower_explicit_io_mode_check(&b, intrin, addr_format);
2117                   progress = true;
2118                }
2119                break;
2120             }
2121 
2122             default:
2123                break;
2124             }
2125             break;
2126          }
2127 
2128          default:
2129             /* Nothing to do */
2130             break;
2131          }
2132       }
2133    }
2134 
2135    if (progress) {
2136       nir_metadata_preserve(impl, nir_metadata_block_index |
2137                                   nir_metadata_dominance);
2138    } else {
2139       nir_metadata_preserve(impl, nir_metadata_all);
2140    }
2141 
2142    return progress;
2143 }
2144 
2145 /** Lower explicitly laid out I/O access to byte offset/address intrinsics
2146  *
2147  * This pass is intended to be used for any I/O which touches memory external
2148  * to the shader or which is directly visible to the client.  It requires that
2149  * all data types in the given modes have a explicit stride/offset decorations
2150  * to tell it exactly how to calculate the offset/address for the given load,
2151  * store, or atomic operation.  If the offset/stride information does not come
2152  * from the client explicitly (as with shared variables in GL or Vulkan),
2153  * nir_lower_vars_to_explicit_types() can be used to add them.
2154  *
2155  * Unlike nir_lower_io, this pass is fully capable of handling incomplete
2156  * pointer chains which may contain cast derefs.  It does so by walking the
2157  * deref chain backwards and simply replacing each deref, one at a time, with
2158  * the appropriate address calculation.  The pass takes a nir_address_format
2159  * parameter which describes how the offset or address is to be represented
2160  * during calculations.  By ensuring that the address is always in a
2161  * consistent format, pointers can safely be conjured from thin air by the
2162  * driver, stored to variables, passed through phis, etc.
2163  *
2164  * The one exception to the simple algorithm described above is for handling
2165  * row-major matrices in which case we may look down one additional level of
2166  * the deref chain.
2167  *
2168  * This pass is also capable of handling OpenCL generic pointers.  If the
2169  * address mode is global, it will lower any ambiguous (more than one mode)
2170  * access to global and pass through the deref_mode_is run-time checks as
2171  * addr_mode_is.  This assumes the driver has somehow mapped shared and
2172  * scratch memory to the global address space.  For other modes such as
2173  * 62bit_generic, there is an enum embedded in the address and we lower
2174  * ambiguous access to an if-ladder and deref_mode_is to a check against the
2175  * embedded enum.  If nir_lower_explicit_io is called on any shader that
2176  * contains generic pointers, it must either be used on all of the generic
2177  * modes or none.
2178  */
2179 bool
nir_lower_explicit_io(nir_shader * shader,nir_variable_mode modes,nir_address_format addr_format)2180 nir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
2181                       nir_address_format addr_format)
2182 {
2183    bool progress = false;
2184 
2185    nir_foreach_function(function, shader) {
2186       if (function->impl &&
2187           nir_lower_explicit_io_impl(function->impl, modes, addr_format))
2188          progress = true;
2189    }
2190 
2191    return progress;
2192 }
2193 
2194 static bool
nir_lower_vars_to_explicit_types_impl(nir_function_impl * impl,nir_variable_mode modes,glsl_type_size_align_func type_info)2195 nir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
2196                                       nir_variable_mode modes,
2197                                       glsl_type_size_align_func type_info)
2198 {
2199    bool progress = false;
2200 
2201    nir_foreach_block(block, impl) {
2202       nir_foreach_instr(instr, block) {
2203          if (instr->type != nir_instr_type_deref)
2204             continue;
2205 
2206          nir_deref_instr *deref = nir_instr_as_deref(instr);
2207          if (!nir_deref_mode_is_in_set(deref, modes))
2208             continue;
2209 
2210          unsigned size, alignment;
2211          const struct glsl_type *new_type =
2212             glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
2213          if (new_type != deref->type) {
2214             progress = true;
2215             deref->type = new_type;
2216          }
2217          if (deref->deref_type == nir_deref_type_cast) {
2218             /* See also glsl_type::get_explicit_type_for_size_align() */
2219             unsigned new_stride = align(size, alignment);
2220             if (new_stride != deref->cast.ptr_stride) {
2221                deref->cast.ptr_stride = new_stride;
2222                progress = true;
2223             }
2224          }
2225       }
2226    }
2227 
2228    if (progress) {
2229       nir_metadata_preserve(impl, nir_metadata_block_index |
2230                                   nir_metadata_dominance |
2231                                   nir_metadata_live_ssa_defs |
2232                                   nir_metadata_loop_analysis);
2233    } else {
2234       nir_metadata_preserve(impl, nir_metadata_all);
2235    }
2236 
2237    return progress;
2238 }
2239 
2240 static bool
lower_vars_to_explicit(nir_shader * shader,struct exec_list * vars,nir_variable_mode mode,glsl_type_size_align_func type_info)2241 lower_vars_to_explicit(nir_shader *shader,
2242                        struct exec_list *vars, nir_variable_mode mode,
2243                        glsl_type_size_align_func type_info)
2244 {
2245    bool progress = false;
2246    unsigned offset;
2247    switch (mode) {
2248    case nir_var_uniform:
2249       assert(shader->info.stage == MESA_SHADER_KERNEL);
2250       offset = 0;
2251       break;
2252    case nir_var_function_temp:
2253    case nir_var_shader_temp:
2254       offset = shader->scratch_size;
2255       break;
2256    case nir_var_mem_shared:
2257       offset = shader->info.shared_size;
2258       break;
2259    case nir_var_mem_constant:
2260       offset = shader->constant_data_size;
2261       break;
2262    case nir_var_shader_call_data:
2263    case nir_var_ray_hit_attrib:
2264       offset = 0;
2265       break;
2266    default:
2267       unreachable("Unsupported mode");
2268    }
2269    nir_foreach_variable_in_list(var, vars) {
2270       if (var->data.mode != mode)
2271          continue;
2272 
2273       unsigned size, align;
2274       const struct glsl_type *explicit_type =
2275          glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align);
2276 
2277       if (explicit_type != var->type)
2278          var->type = explicit_type;
2279 
2280       UNUSED bool is_empty_struct =
2281          glsl_type_is_struct_or_ifc(explicit_type) &&
2282          glsl_get_length(explicit_type) == 0;
2283 
2284       assert(util_is_power_of_two_nonzero(align) || is_empty_struct);
2285       var->data.driver_location = ALIGN_POT(offset, align);
2286       offset = var->data.driver_location + size;
2287       progress = true;
2288    }
2289 
2290    switch (mode) {
2291    case nir_var_uniform:
2292       assert(shader->info.stage == MESA_SHADER_KERNEL);
2293       shader->num_uniforms = offset;
2294       break;
2295    case nir_var_shader_temp:
2296    case nir_var_function_temp:
2297       shader->scratch_size = offset;
2298       break;
2299    case nir_var_mem_shared:
2300       shader->info.shared_size = offset;
2301       break;
2302    case nir_var_mem_constant:
2303       shader->constant_data_size = offset;
2304       break;
2305    case nir_var_shader_call_data:
2306    case nir_var_ray_hit_attrib:
2307       break;
2308    default:
2309       unreachable("Unsupported mode");
2310    }
2311 
2312    return progress;
2313 }
2314 
2315 /* If nir_lower_vars_to_explicit_types is called on any shader that contains
2316  * generic pointers, it must either be used on all of the generic modes or
2317  * none.
2318  */
2319 bool
nir_lower_vars_to_explicit_types(nir_shader * shader,nir_variable_mode modes,glsl_type_size_align_func type_info)2320 nir_lower_vars_to_explicit_types(nir_shader *shader,
2321                                  nir_variable_mode modes,
2322                                  glsl_type_size_align_func type_info)
2323 {
2324    /* TODO: Situations which need to be handled to support more modes:
2325     * - row-major matrices
2326     * - compact shader inputs/outputs
2327     * - interface types
2328     */
2329    ASSERTED nir_variable_mode supported =
2330       nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant |
2331       nir_var_shader_temp | nir_var_function_temp | nir_var_uniform |
2332       nir_var_shader_call_data | nir_var_ray_hit_attrib;
2333    assert(!(modes & ~supported) && "unsupported");
2334 
2335    bool progress = false;
2336 
2337    if (modes & nir_var_uniform)
2338       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info);
2339 
2340    if (modes & nir_var_mem_shared) {
2341       assert(!shader->info.shared_memory_explicit_layout);
2342       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info);
2343    }
2344 
2345    if (modes & nir_var_shader_temp)
2346       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info);
2347    if (modes & nir_var_mem_constant)
2348       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info);
2349    if (modes & nir_var_shader_call_data)
2350       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info);
2351    if (modes & nir_var_ray_hit_attrib)
2352       progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info);
2353 
2354    nir_foreach_function(function, shader) {
2355       if (function->impl) {
2356          if (modes & nir_var_function_temp)
2357             progress |= lower_vars_to_explicit(shader, &function->impl->locals, nir_var_function_temp, type_info);
2358 
2359          progress |= nir_lower_vars_to_explicit_types_impl(function->impl, modes, type_info);
2360       }
2361    }
2362 
2363    return progress;
2364 }
2365 
2366 static void
write_constant(void * dst,size_t dst_size,const nir_constant * c,const struct glsl_type * type)2367 write_constant(void *dst, size_t dst_size,
2368                const nir_constant *c, const struct glsl_type *type)
2369 {
2370    if (glsl_type_is_vector_or_scalar(type)) {
2371       const unsigned num_components = glsl_get_vector_elements(type);
2372       const unsigned bit_size = glsl_get_bit_size(type);
2373       if (bit_size == 1) {
2374          /* Booleans are special-cased to be 32-bit
2375           *
2376           * TODO: Make the native bool bit_size an option.
2377           */
2378          assert(num_components * 4 <= dst_size);
2379          for (unsigned i = 0; i < num_components; i++) {
2380             int32_t b32 = -(int)c->values[i].b;
2381             memcpy((char *)dst + i * 4, &b32, 4);
2382          }
2383       } else {
2384          assert(bit_size >= 8 && bit_size % 8 == 0);
2385          const unsigned byte_size = bit_size / 8;
2386          assert(num_components * byte_size <= dst_size);
2387          for (unsigned i = 0; i < num_components; i++) {
2388             /* Annoyingly, thanks to packed structs, we can't make any
2389              * assumptions about the alignment of dst.  To avoid any strange
2390              * issues with unaligned writes, we always use memcpy.
2391              */
2392             memcpy((char *)dst + i * byte_size, &c->values[i], byte_size);
2393          }
2394       }
2395    } else if (glsl_type_is_array_or_matrix(type)) {
2396       const unsigned array_len = glsl_get_length(type);
2397       const unsigned stride = glsl_get_explicit_stride(type);
2398       assert(stride > 0);
2399       const struct glsl_type *elem_type = glsl_get_array_element(type);
2400       for (unsigned i = 0; i < array_len; i++) {
2401          unsigned elem_offset = i * stride;
2402          assert(elem_offset < dst_size);
2403          write_constant((char *)dst + elem_offset, dst_size - elem_offset,
2404                         c->elements[i], elem_type);
2405       }
2406    } else {
2407       assert(glsl_type_is_struct_or_ifc(type));
2408       const unsigned num_fields = glsl_get_length(type);
2409       for (unsigned i = 0; i < num_fields; i++) {
2410          const int field_offset = glsl_get_struct_field_offset(type, i);
2411          assert(field_offset >= 0 && field_offset < dst_size);
2412          const struct glsl_type *field_type = glsl_get_struct_field(type, i);
2413          write_constant((char *)dst + field_offset, dst_size - field_offset,
2414                         c->elements[i], field_type);
2415       }
2416    }
2417 }
2418 
2419 void
nir_gather_explicit_io_initializers(nir_shader * shader,void * dst,size_t dst_size,nir_variable_mode mode)2420 nir_gather_explicit_io_initializers(nir_shader *shader,
2421                                     void *dst, size_t dst_size,
2422                                     nir_variable_mode mode)
2423 {
2424    /* It doesn't really make sense to gather initializers for more than one
2425     * mode at a time.  If this ever becomes well-defined, we can drop the
2426     * assert then.
2427     */
2428    assert(util_bitcount(mode) == 1);
2429 
2430    nir_foreach_variable_with_modes(var, shader, mode) {
2431       assert(var->data.driver_location < dst_size);
2432       write_constant((char *)dst + var->data.driver_location,
2433                      dst_size - var->data.driver_location,
2434                      var->constant_initializer, var->type);
2435    }
2436 }
2437 
2438 /**
2439  * Return the offset source for a load/store intrinsic.
2440  */
2441 nir_src *
nir_get_io_offset_src(nir_intrinsic_instr * instr)2442 nir_get_io_offset_src(nir_intrinsic_instr *instr)
2443 {
2444    switch (instr->intrinsic) {
2445    case nir_intrinsic_load_input:
2446    case nir_intrinsic_load_output:
2447    case nir_intrinsic_load_shared:
2448    case nir_intrinsic_load_uniform:
2449    case nir_intrinsic_load_kernel_input:
2450    case nir_intrinsic_load_global:
2451    case nir_intrinsic_load_global_constant:
2452    case nir_intrinsic_load_scratch:
2453    case nir_intrinsic_load_fs_input_interp_deltas:
2454    case nir_intrinsic_shared_atomic_add:
2455    case nir_intrinsic_shared_atomic_and:
2456    case nir_intrinsic_shared_atomic_comp_swap:
2457    case nir_intrinsic_shared_atomic_exchange:
2458    case nir_intrinsic_shared_atomic_fadd:
2459    case nir_intrinsic_shared_atomic_fcomp_swap:
2460    case nir_intrinsic_shared_atomic_fmax:
2461    case nir_intrinsic_shared_atomic_fmin:
2462    case nir_intrinsic_shared_atomic_imax:
2463    case nir_intrinsic_shared_atomic_imin:
2464    case nir_intrinsic_shared_atomic_or:
2465    case nir_intrinsic_shared_atomic_umax:
2466    case nir_intrinsic_shared_atomic_umin:
2467    case nir_intrinsic_shared_atomic_xor:
2468    case nir_intrinsic_global_atomic_add:
2469    case nir_intrinsic_global_atomic_and:
2470    case nir_intrinsic_global_atomic_comp_swap:
2471    case nir_intrinsic_global_atomic_exchange:
2472    case nir_intrinsic_global_atomic_fadd:
2473    case nir_intrinsic_global_atomic_fcomp_swap:
2474    case nir_intrinsic_global_atomic_fmax:
2475    case nir_intrinsic_global_atomic_fmin:
2476    case nir_intrinsic_global_atomic_imax:
2477    case nir_intrinsic_global_atomic_imin:
2478    case nir_intrinsic_global_atomic_or:
2479    case nir_intrinsic_global_atomic_umax:
2480    case nir_intrinsic_global_atomic_umin:
2481    case nir_intrinsic_global_atomic_xor:
2482       return &instr->src[0];
2483    case nir_intrinsic_load_ubo:
2484    case nir_intrinsic_load_ssbo:
2485    case nir_intrinsic_load_input_vertex:
2486    case nir_intrinsic_load_per_vertex_input:
2487    case nir_intrinsic_load_per_vertex_output:
2488    case nir_intrinsic_load_per_primitive_output:
2489    case nir_intrinsic_load_interpolated_input:
2490    case nir_intrinsic_store_output:
2491    case nir_intrinsic_store_shared:
2492    case nir_intrinsic_store_global:
2493    case nir_intrinsic_store_scratch:
2494    case nir_intrinsic_ssbo_atomic_add:
2495    case nir_intrinsic_ssbo_atomic_imin:
2496    case nir_intrinsic_ssbo_atomic_umin:
2497    case nir_intrinsic_ssbo_atomic_imax:
2498    case nir_intrinsic_ssbo_atomic_umax:
2499    case nir_intrinsic_ssbo_atomic_and:
2500    case nir_intrinsic_ssbo_atomic_or:
2501    case nir_intrinsic_ssbo_atomic_xor:
2502    case nir_intrinsic_ssbo_atomic_exchange:
2503    case nir_intrinsic_ssbo_atomic_comp_swap:
2504    case nir_intrinsic_ssbo_atomic_fadd:
2505    case nir_intrinsic_ssbo_atomic_fmin:
2506    case nir_intrinsic_ssbo_atomic_fmax:
2507    case nir_intrinsic_ssbo_atomic_fcomp_swap:
2508       return &instr->src[1];
2509    case nir_intrinsic_store_ssbo:
2510    case nir_intrinsic_store_per_vertex_output:
2511    case nir_intrinsic_store_per_primitive_output:
2512       return &instr->src[2];
2513    default:
2514       return NULL;
2515    }
2516 }
2517 
2518 /**
2519  * Return the vertex index source for a load/store per_vertex intrinsic.
2520  */
2521 nir_src *
nir_get_io_vertex_index_src(nir_intrinsic_instr * instr)2522 nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
2523 {
2524    switch (instr->intrinsic) {
2525    case nir_intrinsic_load_per_vertex_input:
2526    case nir_intrinsic_load_per_vertex_output:
2527       return &instr->src[0];
2528    case nir_intrinsic_store_per_vertex_output:
2529       return &instr->src[1];
2530    default:
2531       return NULL;
2532    }
2533 }
2534 
2535 /**
2536  * Return the numeric constant that identify a NULL pointer for each address
2537  * format.
2538  */
2539 const nir_const_value *
nir_address_format_null_value(nir_address_format addr_format)2540 nir_address_format_null_value(nir_address_format addr_format)
2541 {
2542    const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
2543       [nir_address_format_32bit_global] = {{0}},
2544       [nir_address_format_64bit_global] = {{0}},
2545       [nir_address_format_64bit_global_32bit_offset] = {{0}},
2546       [nir_address_format_64bit_bounded_global] = {{0}},
2547       [nir_address_format_32bit_index_offset] = {{.u32 = ~0}, {.u32 = ~0}},
2548       [nir_address_format_32bit_index_offset_pack64] = {{.u64 = ~0ull}},
2549       [nir_address_format_vec2_index_32bit_offset] = {{.u32 = ~0}, {.u32 = ~0}, {.u32 = ~0}},
2550       [nir_address_format_32bit_offset] = {{.u32 = ~0}},
2551       [nir_address_format_32bit_offset_as_64bit] = {{.u64 = ~0ull}},
2552       [nir_address_format_62bit_generic] = {{.u64 = 0}},
2553       [nir_address_format_logical] = {{.u32 = ~0}},
2554    };
2555 
2556    assert(addr_format < ARRAY_SIZE(null_values));
2557    return null_values[addr_format];
2558 }
2559 
2560 nir_ssa_def *
nir_build_addr_ieq(nir_builder * b,nir_ssa_def * addr0,nir_ssa_def * addr1,nir_address_format addr_format)2561 nir_build_addr_ieq(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
2562                    nir_address_format addr_format)
2563 {
2564    switch (addr_format) {
2565    case nir_address_format_32bit_global:
2566    case nir_address_format_64bit_global:
2567    case nir_address_format_64bit_bounded_global:
2568    case nir_address_format_32bit_index_offset:
2569    case nir_address_format_vec2_index_32bit_offset:
2570    case nir_address_format_32bit_offset:
2571    case nir_address_format_62bit_generic:
2572       return nir_ball_iequal(b, addr0, addr1);
2573 
2574    case nir_address_format_64bit_global_32bit_offset:
2575       return nir_ball_iequal(b, nir_channels(b, addr0, 0xb),
2576                                 nir_channels(b, addr1, 0xb));
2577 
2578    case nir_address_format_32bit_offset_as_64bit:
2579       assert(addr0->num_components == 1 && addr1->num_components == 1);
2580       return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1));
2581 
2582    case nir_address_format_32bit_index_offset_pack64:
2583       assert(addr0->num_components == 1 && addr1->num_components == 1);
2584       return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1));
2585 
2586    case nir_address_format_logical:
2587       unreachable("Unsupported address format");
2588    }
2589 
2590    unreachable("Invalid address format");
2591 }
2592 
2593 nir_ssa_def *
nir_build_addr_isub(nir_builder * b,nir_ssa_def * addr0,nir_ssa_def * addr1,nir_address_format addr_format)2594 nir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
2595                     nir_address_format addr_format)
2596 {
2597    switch (addr_format) {
2598    case nir_address_format_32bit_global:
2599    case nir_address_format_64bit_global:
2600    case nir_address_format_32bit_offset:
2601    case nir_address_format_32bit_index_offset_pack64:
2602    case nir_address_format_62bit_generic:
2603       assert(addr0->num_components == 1);
2604       assert(addr1->num_components == 1);
2605       return nir_isub(b, addr0, addr1);
2606 
2607    case nir_address_format_32bit_offset_as_64bit:
2608       assert(addr0->num_components == 1);
2609       assert(addr1->num_components == 1);
2610       return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)));
2611 
2612    case nir_address_format_64bit_global_32bit_offset:
2613    case nir_address_format_64bit_bounded_global:
2614       return nir_isub(b, addr_to_global(b, addr0, addr_format),
2615                          addr_to_global(b, addr1, addr_format));
2616 
2617    case nir_address_format_32bit_index_offset:
2618       assert(addr0->num_components == 2);
2619       assert(addr1->num_components == 2);
2620       /* Assume the same buffer index. */
2621       return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
2622 
2623    case nir_address_format_vec2_index_32bit_offset:
2624       assert(addr0->num_components == 3);
2625       assert(addr1->num_components == 3);
2626       /* Assume the same buffer index. */
2627       return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2));
2628 
2629    case nir_address_format_logical:
2630       unreachable("Unsupported address format");
2631    }
2632 
2633    unreachable("Invalid address format");
2634 }
2635 
2636 static bool
is_input(nir_intrinsic_instr * intrin)2637 is_input(nir_intrinsic_instr *intrin)
2638 {
2639    return intrin->intrinsic == nir_intrinsic_load_input ||
2640           intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
2641           intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
2642           intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
2643 }
2644 
2645 static bool
is_output(nir_intrinsic_instr * intrin)2646 is_output(nir_intrinsic_instr *intrin)
2647 {
2648    return intrin->intrinsic == nir_intrinsic_load_output ||
2649           intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
2650           intrin->intrinsic == nir_intrinsic_load_per_primitive_output ||
2651           intrin->intrinsic == nir_intrinsic_store_output ||
2652           intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
2653           intrin->intrinsic == nir_intrinsic_store_per_primitive_output;
2654 }
2655 
is_dual_slot(nir_intrinsic_instr * intrin)2656 static bool is_dual_slot(nir_intrinsic_instr *intrin)
2657 {
2658    if (intrin->intrinsic == nir_intrinsic_store_output ||
2659        intrin->intrinsic == nir_intrinsic_store_per_vertex_output) {
2660       return nir_src_bit_size(intrin->src[0]) == 64 &&
2661              nir_src_num_components(intrin->src[0]) >= 3;
2662    }
2663 
2664    return nir_dest_bit_size(intrin->dest) == 64 &&
2665           nir_dest_num_components(intrin->dest) >= 3;
2666 }
2667 
2668 /**
2669  * This pass adds constant offsets to instr->const_index[0] for input/output
2670  * intrinsics, and resets the offset source to 0.  Non-constant offsets remain
2671  * unchanged - since we don't know what part of a compound variable is
2672  * accessed, we allocate storage for the entire thing. For drivers that use
2673  * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
2674  * the offset source will be 0, so that they don't have to add it in manually.
2675  */
2676 
2677 static bool
add_const_offset_to_base_block(nir_block * block,nir_builder * b,nir_variable_mode modes)2678 add_const_offset_to_base_block(nir_block *block, nir_builder *b,
2679                                nir_variable_mode modes)
2680 {
2681    bool progress = false;
2682    nir_foreach_instr_safe(instr, block) {
2683       if (instr->type != nir_instr_type_intrinsic)
2684          continue;
2685 
2686       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
2687 
2688       if (((modes & nir_var_shader_in) && is_input(intrin)) ||
2689           ((modes & nir_var_shader_out) && is_output(intrin))) {
2690          nir_src *offset = nir_get_io_offset_src(intrin);
2691 
2692          /* TODO: Better handling of per-view variables here */
2693          if (nir_src_is_const(*offset) &&
2694              !nir_intrinsic_io_semantics(intrin).per_view) {
2695             unsigned off = nir_src_as_uint(*offset);
2696 
2697             nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off);
2698 
2699             nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
2700             sem.location += off;
2701             /* non-indirect indexing should reduce num_slots */
2702             sem.num_slots = is_dual_slot(intrin) ? 2 : 1;
2703             nir_intrinsic_set_io_semantics(intrin, sem);
2704 
2705             b->cursor = nir_before_instr(&intrin->instr);
2706             nir_instr_rewrite_src(&intrin->instr, offset,
2707                                   nir_src_for_ssa(nir_imm_int(b, 0)));
2708             progress = true;
2709          }
2710       }
2711    }
2712 
2713    return progress;
2714 }
2715 
2716 bool
nir_io_add_const_offset_to_base(nir_shader * nir,nir_variable_mode modes)2717 nir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes)
2718 {
2719    bool progress = false;
2720 
2721    nir_foreach_function(f, nir) {
2722       if (f->impl) {
2723          nir_builder b;
2724          nir_builder_init(&b, f->impl);
2725          nir_foreach_block(block, f->impl) {
2726             progress |= add_const_offset_to_base_block(block, &b, modes);
2727          }
2728       }
2729    }
2730 
2731    return progress;
2732 }
2733 
2734