1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "compiler/brw_nir.h"
25 #include "compiler/glsl/ir_uniform.h"
26 #include "compiler/nir/nir_builder.h"
27 #include "brw_program.h"
28 
29 static void
brw_nir_setup_glsl_builtin_uniform(nir_variable * var,const struct gl_program * prog,struct brw_stage_prog_data * stage_prog_data,bool is_scalar)30 brw_nir_setup_glsl_builtin_uniform(nir_variable *var,
31                                    const struct gl_program *prog,
32                                    struct brw_stage_prog_data *stage_prog_data,
33                                    bool is_scalar)
34 {
35    const nir_state_slot *const slots = var->state_slots;
36    assert(var->state_slots != NULL);
37 
38    unsigned uniform_index = var->data.driver_location / 4;
39    for (unsigned int i = 0; i < var->num_state_slots; i++) {
40       /* This state reference has already been setup by ir_to_mesa, but we'll
41        * get the same index back here.
42        */
43       int index = _mesa_add_state_reference(prog->Parameters,
44 					    slots[i].tokens);
45 
46       /* Add each of the unique swizzles of the element as a parameter.
47        * This'll end up matching the expected layout of the
48        * array/matrix/structure we're trying to fill in.
49        */
50       int last_swiz = -1;
51       for (unsigned j = 0; j < 4; j++) {
52          int swiz = GET_SWZ(slots[i].swizzle, j);
53 
54          /* If we hit a pair of identical swizzles, this means we've hit the
55           * end of the builtin variable.  In scalar mode, we should just quit
56           * and move on to the next one.  In vec4, we need to continue and pad
57           * it out to 4 components.
58           */
59          if (swiz == last_swiz && is_scalar)
60             break;
61 
62          last_swiz = swiz;
63 
64          stage_prog_data->param[uniform_index++] =
65             BRW_PARAM_PARAMETER(index, swiz);
66       }
67    }
68 }
69 
70 static void
setup_vec4_image_param(uint32_t * params,uint32_t idx,unsigned offset,unsigned n)71 setup_vec4_image_param(uint32_t *params, uint32_t idx,
72                        unsigned offset, unsigned n)
73 {
74    assert(offset % sizeof(uint32_t) == 0);
75    for (unsigned i = 0; i < n; ++i)
76       params[i] = BRW_PARAM_IMAGE(idx, offset / sizeof(uint32_t) + i);
77 
78    for (unsigned i = n; i < 4; ++i)
79       params[i] = BRW_PARAM_BUILTIN_ZERO;
80 }
81 
82 static void
brw_setup_image_uniform_values(nir_variable * var,struct brw_stage_prog_data * prog_data)83 brw_setup_image_uniform_values(nir_variable *var,
84                                struct brw_stage_prog_data *prog_data)
85 {
86    unsigned param_start_index = var->data.driver_location / 4;
87    uint32_t *param = &prog_data->param[param_start_index];
88    unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
89 
90    for (unsigned i = 0; i < num_images; i++) {
91       const unsigned image_idx = var->data.binding + i;
92 
93       /* Upload the brw_image_param structure.  The order is expected to match
94        * the BRW_IMAGE_PARAM_*_OFFSET defines.
95        */
96       setup_vec4_image_param(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
97                              image_idx,
98                              offsetof(brw_image_param, offset), 2);
99       setup_vec4_image_param(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
100                              image_idx,
101                              offsetof(brw_image_param, size), 3);
102       setup_vec4_image_param(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
103                              image_idx,
104                              offsetof(brw_image_param, stride), 4);
105       setup_vec4_image_param(param + BRW_IMAGE_PARAM_TILING_OFFSET,
106                              image_idx,
107                              offsetof(brw_image_param, tiling), 3);
108       setup_vec4_image_param(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
109                              image_idx,
110                              offsetof(brw_image_param, swizzling), 2);
111       param += BRW_IMAGE_PARAM_SIZE;
112    }
113 }
114 
115 static unsigned
count_uniform_storage_slots(const struct glsl_type * type)116 count_uniform_storage_slots(const struct glsl_type *type)
117 {
118    /* gl_uniform_storage can cope with one level of array, so if the
119     * type is a composite type or an array where each element occupies
120     * more than one slot than we need to recursively process it.
121     */
122    if (glsl_type_is_struct_or_ifc(type)) {
123       unsigned location_count = 0;
124 
125       for (unsigned i = 0; i < glsl_get_length(type); i++) {
126          const struct glsl_type *field_type = glsl_get_struct_field(type, i);
127 
128          location_count += count_uniform_storage_slots(field_type);
129       }
130 
131       return location_count;
132    }
133 
134    if (glsl_type_is_array(type)) {
135       const struct glsl_type *element_type = glsl_get_array_element(type);
136 
137       if (glsl_type_is_array(element_type) ||
138           glsl_type_is_struct_or_ifc(element_type)) {
139          unsigned element_count = count_uniform_storage_slots(element_type);
140          return element_count * glsl_get_length(type);
141       }
142    }
143 
144    return 1;
145 }
146 
147 static void
brw_nir_setup_glsl_uniform(gl_shader_stage stage,nir_variable * var,const struct gl_program * prog,struct brw_stage_prog_data * stage_prog_data,bool is_scalar)148 brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
149                            const struct gl_program *prog,
150                            struct brw_stage_prog_data *stage_prog_data,
151                            bool is_scalar)
152 {
153    if (var->type->without_array()->is_sampler())
154       return;
155 
156    if (var->type->without_array()->is_image()) {
157       brw_setup_image_uniform_values(var, stage_prog_data);
158       return;
159    }
160 
161    /* The data for our (non-builtin) uniforms is stored in a series of
162     * gl_uniform_storage structs for each subcomponent that
163     * glGetUniformLocation() could name.  We know it's been set up in the same
164     * order we'd walk the type, so walk the list of storage that matches the
165     * range of slots covered by this variable.
166     */
167    unsigned uniform_index = var->data.driver_location / 4;
168    unsigned num_slots = count_uniform_storage_slots(var->type);
169    for (unsigned u = 0; u < num_slots; u++) {
170       struct gl_uniform_storage *storage =
171          &prog->sh.data->UniformStorage[var->data.location + u];
172 
173       /* We already handled samplers and images via the separate top-level
174        * variables created by gl_nir_lower_samplers_as_deref(), but they're
175        * still part of the structure's storage, and so we'll see them while
176        * walking it to set up the other regular fields.  Just skip over them.
177        */
178       if (storage->builtin ||
179           storage->type->is_sampler() ||
180           storage->type->is_image())
181          continue;
182 
183       gl_constant_value *components = storage->storage;
184       unsigned vector_count = (MAX2(storage->array_elements, 1) *
185                                storage->type->matrix_columns);
186       unsigned vector_size = storage->type->vector_elements;
187       unsigned max_vector_size = 4;
188       if (storage->type->base_type == GLSL_TYPE_DOUBLE ||
189           storage->type->base_type == GLSL_TYPE_UINT64 ||
190           storage->type->base_type == GLSL_TYPE_INT64) {
191          vector_size *= 2;
192          if (vector_size > 4)
193             max_vector_size = 8;
194       }
195 
196       for (unsigned s = 0; s < vector_count; s++) {
197          unsigned i;
198          for (i = 0; i < vector_size; i++) {
199             uint32_t idx = components - prog->sh.data->UniformDataSlots;
200             stage_prog_data->param[uniform_index++] = BRW_PARAM_UNIFORM(idx);
201             components++;
202          }
203 
204          if (!is_scalar) {
205             /* Pad out with zeros if needed (only needed for vec4) */
206             for (; i < max_vector_size; i++) {
207                stage_prog_data->param[uniform_index++] =
208                   BRW_PARAM_BUILTIN_ZERO;
209             }
210          }
211       }
212    }
213 }
214 
215 void
brw_nir_setup_glsl_uniforms(void * mem_ctx,nir_shader * shader,const struct gl_program * prog,struct brw_stage_prog_data * stage_prog_data,bool is_scalar)216 brw_nir_setup_glsl_uniforms(void *mem_ctx, nir_shader *shader,
217                             const struct gl_program *prog,
218                             struct brw_stage_prog_data *stage_prog_data,
219                             bool is_scalar)
220 {
221    unsigned nr_params = shader->num_uniforms / 4;
222    stage_prog_data->nr_params = nr_params;
223    stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
224 
225    nir_foreach_uniform_variable(var, shader) {
226       /* UBO's, atomics and samplers don't take up space in the
227          uniform file */
228       if (var->interface_type != NULL || var->type->contains_atomic())
229          continue;
230 
231       if (var->num_state_slots > 0) {
232          brw_nir_setup_glsl_builtin_uniform(var, prog, stage_prog_data,
233                                             is_scalar);
234       } else {
235          brw_nir_setup_glsl_uniform(shader->info.stage, var, prog,
236                                     stage_prog_data, is_scalar);
237       }
238    }
239 }
240 
241 void
brw_nir_setup_arb_uniforms(void * mem_ctx,nir_shader * shader,struct gl_program * prog,struct brw_stage_prog_data * stage_prog_data)242 brw_nir_setup_arb_uniforms(void *mem_ctx, nir_shader *shader,
243                            struct gl_program *prog,
244                            struct brw_stage_prog_data *stage_prog_data)
245 {
246    struct gl_program_parameter_list *plist = prog->Parameters;
247 
248    unsigned nr_params = plist->NumParameters * 4;
249    stage_prog_data->nr_params = nr_params;
250    stage_prog_data->param = rzalloc_array(mem_ctx, uint32_t, nr_params);
251 
252    /* For ARB programs, prog_to_nir generates a single "parameters" variable
253     * for all uniform data.  There may be additional sampler variables, and
254     * an extra uniform from nir_lower_wpos_ytransform.
255     */
256 
257    for (unsigned p = 0; p < plist->NumParameters; p++) {
258       /* Parameters should be either vec4 uniforms or single component
259        * constants; matrices and other larger types should have been broken
260        * down earlier.
261        */
262       assert(plist->Parameters[p].Size <= 4);
263 
264       unsigned i;
265       for (i = 0; i < plist->Parameters[p].Size; i++)
266          stage_prog_data->param[4 * p + i] = BRW_PARAM_PARAMETER(p, i);
267       for (; i < 4; i++)
268          stage_prog_data->param[4 * p + i] = BRW_PARAM_BUILTIN_ZERO;
269    }
270 }
271 
272 static nir_ssa_def *
get_aoa_deref_offset(nir_builder * b,nir_deref_instr * deref,unsigned elem_size)273 get_aoa_deref_offset(nir_builder *b,
274                      nir_deref_instr *deref,
275                      unsigned elem_size)
276 {
277    unsigned array_size = elem_size;
278    nir_ssa_def *offset = nir_imm_int(b, 0);
279 
280    while (deref->deref_type != nir_deref_type_var) {
281       assert(deref->deref_type == nir_deref_type_array);
282 
283       /* This level's element size is the previous level's array size */
284       nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
285       assert(deref->arr.index.ssa);
286       offset = nir_iadd(b, offset,
287                            nir_imul(b, index, nir_imm_int(b, array_size)));
288 
289       deref = nir_deref_instr_parent(deref);
290       assert(glsl_type_is_array(deref->type));
291       array_size *= glsl_get_length(deref->type);
292    }
293 
294    /* Accessing an invalid surface index with the dataport can result in a
295     * hang.  According to the spec "if the index used to select an individual
296     * element is negative or greater than or equal to the size of the array,
297     * the results of the operation are undefined but may not lead to
298     * termination" -- which is one of the possible outcomes of the hang.
299     * Clamp the index to prevent access outside of the array bounds.
300     */
301    return nir_umin(b, offset, nir_imm_int(b, array_size - elem_size));
302 }
303 
304 void
brw_nir_lower_gl_images(nir_shader * shader,const struct gl_program * prog)305 brw_nir_lower_gl_images(nir_shader *shader,
306                         const struct gl_program *prog)
307 {
308    /* We put image uniforms at the end */
309    nir_foreach_uniform_variable(var, shader) {
310       if (!var->type->contains_image())
311          continue;
312 
313       /* GL Only allows arrays of arrays of images */
314       assert(var->type->without_array()->is_image());
315       const unsigned num_images = MAX2(1, var->type->arrays_of_arrays_size());
316 
317       var->data.driver_location = shader->num_uniforms;
318       shader->num_uniforms += num_images * BRW_IMAGE_PARAM_SIZE * 4;
319    }
320 
321    nir_function_impl *impl = nir_shader_get_entrypoint(shader);
322 
323    nir_builder b;
324    nir_builder_init(&b, impl);
325 
326    nir_foreach_block(block, impl) {
327       nir_foreach_instr_safe(instr, block) {
328          if (instr->type != nir_instr_type_intrinsic)
329             continue;
330 
331          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
332          switch (intrin->intrinsic) {
333          case nir_intrinsic_image_deref_load:
334          case nir_intrinsic_image_deref_store:
335          case nir_intrinsic_image_deref_atomic_add:
336          case nir_intrinsic_image_deref_atomic_imin:
337          case nir_intrinsic_image_deref_atomic_umin:
338          case nir_intrinsic_image_deref_atomic_imax:
339          case nir_intrinsic_image_deref_atomic_umax:
340          case nir_intrinsic_image_deref_atomic_and:
341          case nir_intrinsic_image_deref_atomic_or:
342          case nir_intrinsic_image_deref_atomic_xor:
343          case nir_intrinsic_image_deref_atomic_exchange:
344          case nir_intrinsic_image_deref_atomic_comp_swap:
345          case nir_intrinsic_image_deref_size:
346          case nir_intrinsic_image_deref_samples:
347          case nir_intrinsic_image_deref_load_raw_intel:
348          case nir_intrinsic_image_deref_store_raw_intel: {
349             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
350             nir_variable *var = nir_deref_instr_get_variable(deref);
351 
352             struct gl_uniform_storage *storage =
353                &prog->sh.data->UniformStorage[var->data.location];
354             const unsigned image_var_idx =
355                storage->opaque[shader->info.stage].index;
356 
357             b.cursor = nir_before_instr(&intrin->instr);
358             nir_ssa_def *index = nir_iadd(&b, nir_imm_int(&b, image_var_idx),
359                                           get_aoa_deref_offset(&b, deref, 1));
360             nir_rewrite_image_intrinsic(intrin, index, false);
361             break;
362          }
363 
364          case nir_intrinsic_image_deref_load_param_intel: {
365             nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
366             nir_variable *var = nir_deref_instr_get_variable(deref);
367             const unsigned num_images =
368                MAX2(1, var->type->arrays_of_arrays_size());
369 
370             b.cursor = nir_instr_remove(&intrin->instr);
371 
372             const unsigned param = nir_intrinsic_base(intrin);
373             nir_ssa_def *offset =
374                get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4);
375             offset = nir_iadd(&b, offset, nir_imm_int(&b, param * 16));
376 
377             nir_intrinsic_instr *load =
378                nir_intrinsic_instr_create(b.shader,
379                                           nir_intrinsic_load_uniform);
380             nir_intrinsic_set_base(load, var->data.driver_location);
381             nir_intrinsic_set_range(load, num_images * BRW_IMAGE_PARAM_SIZE * 4);
382             load->src[0] = nir_src_for_ssa(offset);
383             load->num_components = intrin->dest.ssa.num_components;
384             nir_ssa_dest_init(&load->instr, &load->dest,
385                               intrin->dest.ssa.num_components,
386                               intrin->dest.ssa.bit_size, NULL);
387             nir_builder_instr_insert(&b, &load->instr);
388 
389             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
390                                      &load->dest.ssa);
391             break;
392          }
393 
394          default:
395             break;
396          }
397       }
398    }
399 }
400 
401 void
brw_nir_lower_legacy_clipping(nir_shader * nir,int nr_userclip_plane_consts,struct brw_stage_prog_data * prog_data)402 brw_nir_lower_legacy_clipping(nir_shader *nir, int nr_userclip_plane_consts,
403                               struct brw_stage_prog_data *prog_data)
404 {
405    if (nr_userclip_plane_consts == 0)
406       return;
407 
408    nir_function_impl *impl = nir_shader_get_entrypoint(nir);
409 
410    nir_lower_clip_vs(nir, (1 << nr_userclip_plane_consts) - 1, true, false,
411                      NULL);
412    nir_lower_io_to_temporaries(nir, impl, true, false);
413    nir_lower_global_vars_to_local(nir);
414    nir_lower_vars_to_ssa(nir);
415 
416    const unsigned clip_plane_base = nir->num_uniforms;
417 
418    assert(nir->num_uniforms == prog_data->nr_params * 4);
419    const unsigned num_clip_floats = 4 * nr_userclip_plane_consts;
420    uint32_t *clip_param =
421       brw_stage_prog_data_add_params(prog_data, num_clip_floats);
422    nir->num_uniforms += num_clip_floats * sizeof(float);
423    assert(nir->num_uniforms == prog_data->nr_params * 4);
424 
425    for (unsigned i = 0; i < num_clip_floats; i++)
426       clip_param[i] = BRW_PARAM_BUILTIN_CLIP_PLANE(i / 4, i % 4);
427 
428    nir_builder b;
429    nir_builder_init(&b, impl);
430    nir_foreach_block(block, impl) {
431       nir_foreach_instr_safe(instr, block) {
432          if (instr->type != nir_instr_type_intrinsic)
433             continue;
434 
435          nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
436          if (intrin->intrinsic != nir_intrinsic_load_user_clip_plane)
437             continue;
438 
439          b.cursor = nir_before_instr(instr);
440 
441          nir_intrinsic_instr *load =
442             nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
443          load->num_components = 4;
444          load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
445          nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
446          nir_intrinsic_set_base(load, clip_plane_base + 4 * sizeof(float) *
447                                       nir_intrinsic_ucp_id(intrin));
448          nir_intrinsic_set_range(load, 4 * sizeof(float));
449          nir_builder_instr_insert(&b, &load->instr);
450 
451          nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
452                                   &load->dest.ssa);
453          nir_instr_remove(instr);
454       }
455    }
456 }
457