1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file lower_ubo_reference.cpp
26  *
27  * IR lower pass to replace dereferences of variables in a uniform
28  * buffer object with usage of ir_binop_ubo_load expressions, each of
29  * which can read data up to the size of a vec4.
30  *
31  * This relieves drivers of the responsibility to deal with tricky UBO
32  * layout issues like std140 structures and row_major matrices on
33  * their own.
34  */
35 
36 #include "lower_buffer_access.h"
37 #include "ir_builder.h"
38 #include "main/macros.h"
39 #include "glsl_parser_extras.h"
40 #include "main/mtypes.h"
41 
42 using namespace ir_builder;
43 
44 namespace {
45 class lower_ubo_reference_visitor :
46       public lower_buffer_access::lower_buffer_access {
47 public:
lower_ubo_reference_visitor(struct gl_linked_shader * shader,bool clamp_block_indices,bool use_std430_as_default)48    lower_ubo_reference_visitor(struct gl_linked_shader *shader,
49                                bool clamp_block_indices,
50                                bool use_std430_as_default)
51    : shader(shader), clamp_block_indices(clamp_block_indices),
52      struct_field(NULL), variable(NULL)
53    {
54       this->use_std430_as_default = use_std430_as_default;
55    }
56 
57    void handle_rvalue(ir_rvalue **rvalue);
58    ir_visitor_status visit_enter(ir_assignment *ir);
59 
60    void setup_for_load_or_store(void *mem_ctx,
61                                 ir_variable *var,
62                                 ir_rvalue *deref,
63                                 ir_rvalue **offset,
64                                 unsigned *const_offset,
65                                 bool *row_major,
66                                 const glsl_type **matrix_type,
67                                 enum glsl_interface_packing packing);
68    uint32_t ssbo_access_params();
69    ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type,
70 			   ir_rvalue *offset);
71    ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type,
72                       ir_rvalue *offset);
73 
74    bool check_for_buffer_array_copy(ir_assignment *ir);
75    bool check_for_buffer_struct_copy(ir_assignment *ir);
76    void check_for_ssbo_store(ir_assignment *ir);
77    void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var,
78                         ir_variable *write_var, unsigned write_mask);
79    ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset,
80                        unsigned write_mask);
81 
82    enum {
83       ubo_load_access,
84       ssbo_load_access,
85       ssbo_store_access,
86       ssbo_unsized_array_length_access,
87       ssbo_atomic_access,
88    } buffer_access_type;
89 
90    void insert_buffer_access(void *mem_ctx, ir_dereference *deref,
91                              const glsl_type *type, ir_rvalue *offset,
92                              unsigned mask, int channel);
93 
94    ir_visitor_status visit_enter(class ir_expression *);
95    ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr);
96    void check_ssbo_unsized_array_length_expression(class ir_expression *);
97    void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
98 
99    ir_expression *process_ssbo_unsized_array_length(ir_rvalue **,
100                                                     ir_dereference *,
101                                                     ir_variable *);
102    ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx);
103 
104    unsigned calculate_unsized_array_stride(ir_dereference *deref,
105                                            enum glsl_interface_packing packing);
106 
107    ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir);
108    ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir);
109    ir_visitor_status visit_enter(ir_call *ir);
110    ir_visitor_status visit_enter(ir_texture *ir);
111 
112    struct gl_linked_shader *shader;
113    bool clamp_block_indices;
114    const struct glsl_struct_field *struct_field;
115    ir_variable *variable;
116    ir_rvalue *uniform_block;
117    bool progress;
118 };
119 
120 /**
121  * Determine the name of the interface block field
122  *
123  * This is the name of the specific member as it would appear in the
124  * \c gl_uniform_buffer_variable::Name field in the shader's
125  * \c UniformBlocks array.
126  */
127 static const char *
interface_field_name(void * mem_ctx,char * base_name,ir_rvalue * d,ir_rvalue ** nonconst_block_index)128 interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d,
129                      ir_rvalue **nonconst_block_index)
130 {
131    *nonconst_block_index = NULL;
132    char *name_copy = NULL;
133    size_t base_length = 0;
134 
135    /* Loop back through the IR until we find the uniform block */
136    ir_rvalue *ir = d;
137    while (ir != NULL) {
138       switch (ir->ir_type) {
139       case ir_type_dereference_variable: {
140          /* Exit loop */
141          ir = NULL;
142          break;
143       }
144 
145       case ir_type_dereference_record: {
146          ir_dereference_record *r = (ir_dereference_record *) ir;
147          ir = r->record->as_dereference();
148 
149          /* If we got here it means any previous array subscripts belong to
150           * block members and not the block itself so skip over them in the
151           * next pass.
152           */
153          d = ir;
154          break;
155       }
156 
157       case ir_type_dereference_array: {
158          ir_dereference_array *a = (ir_dereference_array *) ir;
159          ir = a->array->as_dereference();
160          break;
161       }
162 
163       case ir_type_swizzle: {
164          ir_swizzle *s = (ir_swizzle *) ir;
165          ir = s->val->as_dereference();
166          /* Skip swizzle in the next pass */
167          d = ir;
168          break;
169       }
170 
171       default:
172          assert(!"Should not get here.");
173          break;
174       }
175    }
176 
177    while (d != NULL) {
178       switch (d->ir_type) {
179       case ir_type_dereference_variable: {
180          ir_dereference_variable *v = (ir_dereference_variable *) d;
181          if (name_copy != NULL &&
182              v->var->is_interface_instance() &&
183              v->var->type->is_array()) {
184             return name_copy;
185          } else {
186             *nonconst_block_index = NULL;
187             return base_name;
188          }
189 
190          break;
191       }
192 
193       case ir_type_dereference_array: {
194          ir_dereference_array *a = (ir_dereference_array *) d;
195          size_t new_length;
196 
197          if (name_copy == NULL) {
198             name_copy = ralloc_strdup(mem_ctx, base_name);
199             base_length = strlen(name_copy);
200          }
201 
202          /* For arrays of arrays we start at the innermost array and work our
203           * way out so we need to insert the subscript at the base of the
204           * name string rather than just attaching it to the end.
205           */
206          new_length = base_length;
207          ir_constant *const_index = a->array_index->as_constant();
208          char *end = ralloc_strdup(NULL, &name_copy[new_length]);
209          if (!const_index) {
210             ir_rvalue *array_index = a->array_index;
211             if (array_index->type != glsl_type::uint_type)
212                array_index = i2u(array_index);
213 
214             if (a->array->type->is_array() &&
215                 a->array->type->fields.array->is_array()) {
216                ir_constant *base_size = new(mem_ctx)
217                   ir_constant(a->array->type->fields.array->arrays_of_arrays_size());
218                array_index = mul(array_index, base_size);
219             }
220 
221             if (*nonconst_block_index) {
222                *nonconst_block_index = add(*nonconst_block_index, array_index);
223             } else {
224                *nonconst_block_index = array_index;
225             }
226 
227             ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s",
228                                          end);
229          } else {
230             ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s",
231                                          const_index->get_uint_component(0),
232                                          end);
233          }
234          ralloc_free(end);
235 
236          d = a->array->as_dereference();
237 
238          break;
239       }
240 
241       default:
242          assert(!"Should not get here.");
243          break;
244       }
245    }
246 
247    assert(!"Should not get here.");
248    return NULL;
249 }
250 
251 static ir_rvalue *
clamp_to_array_bounds(void * mem_ctx,ir_rvalue * index,const glsl_type * type)252 clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type)
253 {
254    assert(type->is_array());
255 
256    const unsigned array_size = type->arrays_of_arrays_size();
257 
258    ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1);
259    max_index->type = index->type;
260 
261    ir_constant *zero = new(mem_ctx) ir_constant(0);
262    zero->type = index->type;
263 
264    if (index->type->base_type == GLSL_TYPE_INT)
265       index = max2(index, zero);
266    index = min2(index, max_index);
267 
268    return index;
269 }
270 
271 void
setup_for_load_or_store(void * mem_ctx,ir_variable * var,ir_rvalue * deref,ir_rvalue ** offset,unsigned * const_offset,bool * row_major,const glsl_type ** matrix_type,enum glsl_interface_packing packing)272 lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
273                                                      ir_variable *var,
274                                                      ir_rvalue *deref,
275                                                      ir_rvalue **offset,
276                                                      unsigned *const_offset,
277                                                      bool *row_major,
278                                                      const glsl_type **matrix_type,
279                                                      enum glsl_interface_packing packing)
280 {
281    /* Determine the name of the interface block */
282    ir_rvalue *nonconst_block_index;
283    const char *const field_name =
284       interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
285                            deref, &nonconst_block_index);
286 
287    if (nonconst_block_index && clamp_block_indices) {
288       nonconst_block_index =
289          clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type);
290    }
291 
292    /* Locate the block by interface name */
293    unsigned num_blocks;
294    struct gl_uniform_block **blocks;
295    if (this->buffer_access_type != ubo_load_access) {
296       num_blocks = shader->Program->info.num_ssbos;
297       blocks = shader->Program->sh.ShaderStorageBlocks;
298    } else {
299       num_blocks = shader->Program->info.num_ubos;
300       blocks = shader->Program->sh.UniformBlocks;
301    }
302    this->uniform_block = NULL;
303    for (unsigned i = 0; i < num_blocks; i++) {
304       if (strcmp(field_name, blocks[i]->Name) == 0) {
305 
306          ir_constant *index = new(mem_ctx) ir_constant(i);
307 
308          if (nonconst_block_index) {
309             this->uniform_block = add(nonconst_block_index, index);
310          } else {
311             this->uniform_block = index;
312          }
313 
314          if (var->is_interface_instance()) {
315             *const_offset = 0;
316          } else {
317             *const_offset = blocks[i]->Uniforms[var->data.location].Offset;
318          }
319 
320          break;
321       }
322    }
323 
324    assert(this->uniform_block);
325 
326    this->struct_field = NULL;
327    setup_buffer_access(mem_ctx, deref, offset, const_offset, row_major,
328                        matrix_type, &this->struct_field, packing);
329 }
330 
331 void
handle_rvalue(ir_rvalue ** rvalue)332 lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
333 {
334    if (!*rvalue)
335       return;
336 
337    ir_dereference *deref = (*rvalue)->as_dereference();
338    if (!deref)
339       return;
340 
341    ir_variable *var = deref->variable_referenced();
342    if (!var || !var->is_in_buffer_block())
343       return;
344 
345    void *mem_ctx = ralloc_parent(shader->ir);
346 
347    ir_rvalue *offset = NULL;
348    unsigned const_offset;
349    bool row_major;
350    const glsl_type *matrix_type;
351 
352    enum glsl_interface_packing packing =
353       var->get_interface_type()->
354          get_internal_ifc_packing(use_std430_as_default);
355 
356    this->buffer_access_type =
357       var->is_in_shader_storage_block() ?
358       ssbo_load_access : ubo_load_access;
359    this->variable = var;
360 
361    /* Compute the offset to the start if the dereference as well as other
362     * information we need to configure the write
363     */
364    setup_for_load_or_store(mem_ctx, var, deref,
365                            &offset, &const_offset,
366                            &row_major, &matrix_type,
367                            packing);
368    assert(offset);
369 
370    /* Now that we've calculated the offset to the start of the
371     * dereference, walk over the type and emit loads into a temporary.
372     */
373    const glsl_type *type = (*rvalue)->type;
374    ir_variable *load_var = new(mem_ctx) ir_variable(type,
375 						    "ubo_load_temp",
376 						    ir_var_temporary);
377    base_ir->insert_before(load_var);
378 
379    ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
380 						       "ubo_load_temp_offset",
381 						       ir_var_temporary);
382    base_ir->insert_before(load_offset);
383    base_ir->insert_before(assign(load_offset, offset));
384 
385    deref = new(mem_ctx) ir_dereference_variable(load_var);
386    emit_access(mem_ctx, false, deref, load_offset, const_offset,
387                row_major, matrix_type, packing, 0);
388    *rvalue = deref;
389 
390    progress = true;
391 }
392 
393 ir_expression *
ubo_load(void * mem_ctx,const glsl_type * type,ir_rvalue * offset)394 lower_ubo_reference_visitor::ubo_load(void *mem_ctx,
395                                       const glsl_type *type,
396 				      ir_rvalue *offset)
397 {
398    ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
399    return new(mem_ctx)
400       ir_expression(ir_binop_ubo_load,
401                     type,
402                     block_ref,
403                     offset);
404 
405 }
406 
407 static bool
shader_storage_buffer_object(const _mesa_glsl_parse_state * state)408 shader_storage_buffer_object(const _mesa_glsl_parse_state *state)
409 {
410    return state->has_shader_storage_buffer_objects();
411 }
412 
413 uint32_t
ssbo_access_params()414 lower_ubo_reference_visitor::ssbo_access_params()
415 {
416    assert(variable);
417 
418    if (variable->is_interface_instance()) {
419       assert(struct_field);
420 
421       return ((struct_field->memory_coherent ? ACCESS_COHERENT : 0) |
422               (struct_field->memory_restrict ? ACCESS_RESTRICT : 0) |
423               (struct_field->memory_volatile ? ACCESS_VOLATILE : 0));
424    } else {
425       return ((variable->data.memory_coherent ? ACCESS_COHERENT : 0) |
426               (variable->data.memory_restrict ? ACCESS_RESTRICT : 0) |
427               (variable->data.memory_volatile ? ACCESS_VOLATILE : 0));
428    }
429 }
430 
431 ir_call *
ssbo_store(void * mem_ctx,ir_rvalue * deref,ir_rvalue * offset,unsigned write_mask)432 lower_ubo_reference_visitor::ssbo_store(void *mem_ctx,
433                                         ir_rvalue *deref,
434                                         ir_rvalue *offset,
435                                         unsigned write_mask)
436 {
437    exec_list sig_params;
438 
439    ir_variable *block_ref = new(mem_ctx)
440       ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
441    sig_params.push_tail(block_ref);
442 
443    ir_variable *offset_ref = new(mem_ctx)
444       ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
445    sig_params.push_tail(offset_ref);
446 
447    ir_variable *val_ref = new(mem_ctx)
448       ir_variable(deref->type, "value" , ir_var_function_in);
449    sig_params.push_tail(val_ref);
450 
451    ir_variable *writemask_ref = new(mem_ctx)
452       ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in);
453    sig_params.push_tail(writemask_ref);
454 
455    ir_variable *access_ref = new(mem_ctx)
456       ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
457    sig_params.push_tail(access_ref);
458 
459    ir_function_signature *sig = new(mem_ctx)
460       ir_function_signature(glsl_type::void_type, shader_storage_buffer_object);
461    assert(sig);
462    sig->replace_parameters(&sig_params);
463    sig->intrinsic_id = ir_intrinsic_ssbo_store;
464 
465    ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo");
466    f->add_signature(sig);
467 
468    exec_list call_params;
469    call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
470    call_params.push_tail(offset->clone(mem_ctx, NULL));
471    call_params.push_tail(deref->clone(mem_ctx, NULL));
472    call_params.push_tail(new(mem_ctx) ir_constant(write_mask));
473    call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
474    return new(mem_ctx) ir_call(sig, NULL, &call_params);
475 }
476 
477 ir_call *
ssbo_load(void * mem_ctx,const struct glsl_type * type,ir_rvalue * offset)478 lower_ubo_reference_visitor::ssbo_load(void *mem_ctx,
479                                        const struct glsl_type *type,
480                                        ir_rvalue *offset)
481 {
482    exec_list sig_params;
483 
484    ir_variable *block_ref = new(mem_ctx)
485       ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
486    sig_params.push_tail(block_ref);
487 
488    ir_variable *offset_ref = new(mem_ctx)
489       ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in);
490    sig_params.push_tail(offset_ref);
491 
492    ir_variable *access_ref = new(mem_ctx)
493       ir_variable(glsl_type::uint_type, "access" , ir_var_function_in);
494    sig_params.push_tail(access_ref);
495 
496    ir_function_signature *sig =
497       new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object);
498    assert(sig);
499    sig->replace_parameters(&sig_params);
500    sig->intrinsic_id = ir_intrinsic_ssbo_load;
501 
502    ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo");
503    f->add_signature(sig);
504 
505    ir_variable *result = new(mem_ctx)
506       ir_variable(type, "ssbo_load_result", ir_var_temporary);
507    base_ir->insert_before(result);
508    ir_dereference_variable *deref_result = new(mem_ctx)
509       ir_dereference_variable(result);
510 
511    exec_list call_params;
512    call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL));
513    call_params.push_tail(offset->clone(mem_ctx, NULL));
514    call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params()));
515 
516    return new(mem_ctx) ir_call(sig, deref_result, &call_params);
517 }
518 
519 void
insert_buffer_access(void * mem_ctx,ir_dereference * deref,const glsl_type * type,ir_rvalue * offset,unsigned mask,int channel)520 lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx,
521                                                   ir_dereference *deref,
522                                                   const glsl_type *type,
523                                                   ir_rvalue *offset,
524                                                   unsigned mask,
525                                                   int channel)
526 {
527    switch (this->buffer_access_type) {
528    case ubo_load_access:
529       base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
530                                     ubo_load(mem_ctx, type, offset),
531                                     mask));
532       break;
533    case ssbo_load_access: {
534       ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset);
535       base_ir->insert_before(load_ssbo);
536       ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL);
537       ir_assignment *assignment =
538          assign(deref->clone(mem_ctx, NULL), value, mask);
539       base_ir->insert_before(assignment);
540       break;
541    }
542    case ssbo_store_access:
543       if (channel >= 0) {
544          base_ir->insert_after(ssbo_store(mem_ctx,
545                                           swizzle(deref, channel, 1),
546                                           offset, 1));
547       } else {
548          base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask));
549       }
550       break;
551    default:
552       unreachable("invalid buffer_access_type in insert_buffer_access");
553    }
554 }
555 
556 void
write_to_memory(void * mem_ctx,ir_dereference * deref,ir_variable * var,ir_variable * write_var,unsigned write_mask)557 lower_ubo_reference_visitor::write_to_memory(void *mem_ctx,
558                                              ir_dereference *deref,
559                                              ir_variable *var,
560                                              ir_variable *write_var,
561                                              unsigned write_mask)
562 {
563    ir_rvalue *offset = NULL;
564    unsigned const_offset;
565    bool row_major;
566    const glsl_type *matrix_type;
567 
568    enum glsl_interface_packing packing =
569       var->get_interface_type()->
570          get_internal_ifc_packing(use_std430_as_default);
571 
572    this->buffer_access_type = ssbo_store_access;
573    this->variable = var;
574 
575    /* Compute the offset to the start if the dereference as well as other
576     * information we need to configure the write
577     */
578    setup_for_load_or_store(mem_ctx, var, deref,
579                            &offset, &const_offset,
580                            &row_major, &matrix_type,
581                            packing);
582    assert(offset);
583 
584    /* Now emit writes from the temporary to memory */
585    ir_variable *write_offset =
586       new(mem_ctx) ir_variable(glsl_type::uint_type,
587                                "ssbo_store_temp_offset",
588                                ir_var_temporary);
589 
590    base_ir->insert_before(write_offset);
591    base_ir->insert_before(assign(write_offset, offset));
592 
593    deref = new(mem_ctx) ir_dereference_variable(write_var);
594    emit_access(mem_ctx, true, deref, write_offset, const_offset,
595                row_major, matrix_type, packing, write_mask);
596 }
597 
598 ir_visitor_status
visit_enter(ir_expression * ir)599 lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
600 {
601    check_ssbo_unsized_array_length_expression(ir);
602    return rvalue_visit(ir);
603 }
604 
605 ir_expression *
calculate_ssbo_unsized_array_length(ir_expression * expr)606 lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr)
607 {
608    if (expr->operation !=
609        ir_expression_operation(ir_unop_ssbo_unsized_array_length))
610       return NULL;
611 
612    ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
613    if (!rvalue ||
614        !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
615       return NULL;
616 
617    ir_dereference *deref = expr->operands[0]->as_dereference();
618    if (!deref)
619       return NULL;
620 
621    ir_variable *var = expr->operands[0]->variable_referenced();
622    if (!var || !var->is_in_shader_storage_block())
623       return NULL;
624    return process_ssbo_unsized_array_length(&rvalue, deref, var);
625 }
626 
627 void
check_ssbo_unsized_array_length_expression(ir_expression * ir)628 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir)
629 {
630    if (ir->operation ==
631        ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
632          /* Don't replace this unop if it is found alone. It is going to be
633           * removed by the optimization passes or replaced if it is part of
634           * an ir_assignment or another ir_expression.
635           */
636          return;
637    }
638 
639    for (unsigned i = 0; i < ir->num_operands; i++) {
640       if (ir->operands[i]->ir_type != ir_type_expression)
641          continue;
642       ir_expression *expr = (ir_expression *) ir->operands[i];
643       ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
644       if (!temp)
645          continue;
646 
647       delete expr;
648       ir->operands[i] = temp;
649    }
650 }
651 
652 void
check_ssbo_unsized_array_length_assignment(ir_assignment * ir)653 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
654 {
655    if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
656       return;
657 
658    ir_expression *expr = (ir_expression *) ir->rhs;
659    ir_expression *temp = calculate_ssbo_unsized_array_length(expr);
660    if (!temp)
661       return;
662 
663    delete expr;
664    ir->rhs = temp;
665    return;
666 }
667 
668 ir_expression *
emit_ssbo_get_buffer_size(void * mem_ctx)669 lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx)
670 {
671    ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
672    return new(mem_ctx) ir_expression(ir_unop_get_buffer_size,
673                                      glsl_type::int_type,
674                                      block_ref);
675 }
676 
677 unsigned
calculate_unsized_array_stride(ir_dereference * deref,enum glsl_interface_packing packing)678 lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref,
679                                                             enum glsl_interface_packing packing)
680 {
681    unsigned array_stride = 0;
682 
683    switch (deref->ir_type) {
684    case ir_type_dereference_variable:
685    {
686       ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
687       const struct glsl_type *unsized_array_type = NULL;
688       /* An unsized array can be sized by other lowering passes, so pick
689        * the first field of the array which has the data type of the unsized
690        * array.
691        */
692       unsized_array_type = deref_var->var->type->fields.array;
693 
694       /* Whether or not the field is row-major (because it might be a
695        * bvec2 or something) does not affect the array itself. We need
696        * to know whether an array element in its entirety is row-major.
697        */
698       const bool array_row_major =
699          is_dereferenced_thing_row_major(deref_var);
700 
701       if (packing == GLSL_INTERFACE_PACKING_STD430) {
702          array_stride = unsized_array_type->std430_array_stride(array_row_major);
703       } else {
704          array_stride = unsized_array_type->std140_size(array_row_major);
705          array_stride = glsl_align(array_stride, 16);
706       }
707       break;
708    }
709    case ir_type_dereference_record:
710    {
711       ir_dereference_record *deref_record = (ir_dereference_record *) deref;
712       ir_dereference *interface_deref =
713          deref_record->record->as_dereference();
714       assert(interface_deref != NULL);
715       const struct glsl_type *interface_type = interface_deref->type;
716       unsigned record_length = interface_type->length;
717       /* Unsized array is always the last element of the interface */
718       const struct glsl_type *unsized_array_type =
719          interface_type->fields.structure[record_length - 1].type->fields.array;
720 
721       const bool array_row_major =
722          is_dereferenced_thing_row_major(deref_record);
723 
724       if (packing == GLSL_INTERFACE_PACKING_STD430) {
725          array_stride = unsized_array_type->std430_array_stride(array_row_major);
726       } else {
727          array_stride = unsized_array_type->std140_size(array_row_major);
728          array_stride = glsl_align(array_stride, 16);
729       }
730       break;
731    }
732    default:
733       unreachable("Unsupported dereference type");
734    }
735    return array_stride;
736 }
737 
738 ir_expression *
process_ssbo_unsized_array_length(ir_rvalue ** rvalue,ir_dereference * deref,ir_variable * var)739 lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
740                                                                ir_dereference *deref,
741                                                                ir_variable *var)
742 {
743    void *mem_ctx = ralloc_parent(*rvalue);
744 
745    ir_rvalue *base_offset = NULL;
746    unsigned const_offset;
747    bool row_major;
748    const glsl_type *matrix_type;
749 
750    enum glsl_interface_packing packing =
751       var->get_interface_type()->
752          get_internal_ifc_packing(use_std430_as_default);
753    int unsized_array_stride =
754       calculate_unsized_array_stride(deref, packing);
755 
756    this->buffer_access_type = ssbo_unsized_array_length_access;
757    this->variable = var;
758 
759    /* Compute the offset to the start if the dereference as well as other
760     * information we need to calculate the length.
761     */
762    setup_for_load_or_store(mem_ctx, var, deref,
763                            &base_offset, &const_offset,
764                            &row_major, &matrix_type,
765                            packing);
766    /* array.length() =
767     *  max((buffer_object_size - offset_of_array) / stride_of_array, 0)
768     */
769    ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx);
770 
771    ir_expression *offset_of_array = new(mem_ctx)
772       ir_expression(ir_binop_add, base_offset,
773                     new(mem_ctx) ir_constant(const_offset));
774    ir_expression *offset_of_array_int = new(mem_ctx)
775       ir_expression(ir_unop_u2i, offset_of_array);
776 
777    ir_expression *sub = new(mem_ctx)
778       ir_expression(ir_binop_sub, buffer_size, offset_of_array_int);
779    ir_expression *div =  new(mem_ctx)
780       ir_expression(ir_binop_div, sub,
781                     new(mem_ctx) ir_constant(unsized_array_stride));
782    ir_expression *max = new(mem_ctx)
783       ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0));
784 
785    return max;
786 }
787 
788 void
check_for_ssbo_store(ir_assignment * ir)789 lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir)
790 {
791    if (!ir || !ir->lhs)
792       return;
793 
794    ir_rvalue *rvalue = ir->lhs->as_rvalue();
795    if (!rvalue)
796       return;
797 
798    ir_dereference *deref = ir->lhs->as_dereference();
799    if (!deref)
800       return;
801 
802    ir_variable *var = ir->lhs->variable_referenced();
803    if (!var || !var->is_in_shader_storage_block())
804       return;
805 
806    /* We have a write to a buffer variable, so declare a temporary and rewrite
807     * the assignment so that the temporary is the LHS.
808     */
809    void *mem_ctx = ralloc_parent(shader->ir);
810 
811    const glsl_type *type = rvalue->type;
812    ir_variable *write_var = new(mem_ctx) ir_variable(type,
813                                                      "ssbo_store_temp",
814                                                      ir_var_temporary);
815    base_ir->insert_before(write_var);
816    ir->lhs = new(mem_ctx) ir_dereference_variable(write_var);
817 
818    /* Now we have to write the value assigned to the temporary back to memory */
819    write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask);
820    progress = true;
821 }
822 
823 static bool
is_buffer_backed_variable(ir_variable * var)824 is_buffer_backed_variable(ir_variable *var)
825 {
826    return var->is_in_buffer_block() ||
827           var->data.mode == ir_var_shader_shared;
828 }
829 
830 bool
check_for_buffer_array_copy(ir_assignment * ir)831 lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir)
832 {
833    if (!ir || !ir->lhs || !ir->rhs)
834       return false;
835 
836    /* LHS and RHS must be arrays
837     * FIXME: arrays of arrays?
838     */
839    if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array())
840       return false;
841 
842    /* RHS must be a buffer-backed variable. This is what can cause the problem
843     * since it would lead to a series of loads that need to live until we
844     * see the writes to the LHS.
845     */
846    ir_variable *rhs_var = ir->rhs->variable_referenced();
847    if (!rhs_var || !is_buffer_backed_variable(rhs_var))
848       return false;
849 
850    /* Split the array copy into individual element copies to reduce
851     * register pressure
852     */
853    ir_dereference *rhs_deref = ir->rhs->as_dereference();
854    if (!rhs_deref)
855       return false;
856 
857    ir_dereference *lhs_deref = ir->lhs->as_dereference();
858    if (!lhs_deref)
859       return false;
860 
861    assert(lhs_deref->type->length == rhs_deref->type->length);
862    void *mem_ctx = ralloc_parent(shader->ir);
863 
864    for (unsigned i = 0; i < lhs_deref->type->length; i++) {
865       ir_dereference *lhs_i =
866          new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL),
867                                            new(mem_ctx) ir_constant(i));
868 
869       ir_dereference *rhs_i =
870          new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL),
871                                            new(mem_ctx) ir_constant(i));
872       ir->insert_after(assign(lhs_i, rhs_i));
873    }
874 
875    ir->remove();
876    progress = true;
877    return true;
878 }
879 
880 bool
check_for_buffer_struct_copy(ir_assignment * ir)881 lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir)
882 {
883    if (!ir || !ir->lhs || !ir->rhs)
884       return false;
885 
886    /* LHS and RHS must be records */
887    if (!ir->lhs->type->is_struct() || !ir->rhs->type->is_struct())
888       return false;
889 
890    /* RHS must be a buffer-backed variable. This is what can cause the problem
891     * since it would lead to a series of loads that need to live until we
892     * see the writes to the LHS.
893     */
894    ir_variable *rhs_var = ir->rhs->variable_referenced();
895    if (!rhs_var || !is_buffer_backed_variable(rhs_var))
896       return false;
897 
898    /* Split the struct copy into individual element copies to reduce
899     * register pressure
900     */
901    ir_dereference *rhs_deref = ir->rhs->as_dereference();
902    if (!rhs_deref)
903       return false;
904 
905    ir_dereference *lhs_deref = ir->lhs->as_dereference();
906    if (!lhs_deref)
907       return false;
908 
909    assert(lhs_deref->type == rhs_deref->type);
910    void *mem_ctx = ralloc_parent(shader->ir);
911 
912    for (unsigned i = 0; i < lhs_deref->type->length; i++) {
913       const char *field_name = lhs_deref->type->fields.structure[i].name;
914       ir_dereference *lhs_field =
915          new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL),
916                                             field_name);
917       ir_dereference *rhs_field =
918          new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL),
919                                             field_name);
920       ir->insert_after(assign(lhs_field, rhs_field));
921    }
922 
923    ir->remove();
924    progress = true;
925    return true;
926 }
927 
928 ir_visitor_status
visit_enter(ir_assignment * ir)929 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
930 {
931    /* Array and struct copies could involve large amounts of load/store
932     * operations. To improve register pressure we want to special-case
933     * these and split them into individual element copies.
934     * This way we avoid emitting all the loads for the RHS first and
935     * all the writes for the LHS second and register usage is more
936     * efficient.
937     */
938    if (check_for_buffer_array_copy(ir))
939       return visit_continue_with_parent;
940 
941    if (check_for_buffer_struct_copy(ir))
942       return visit_continue_with_parent;
943 
944    check_ssbo_unsized_array_length_assignment(ir);
945    check_for_ssbo_store(ir);
946    return rvalue_visit(ir);
947 }
948 
949 /* Lowers the intrinsic call to a new internal intrinsic that swaps the
950  * access to the buffer variable in the first parameter by an offset
951  * and block index. This involves creating the new internal intrinsic
952  * (i.e. the new function signature).
953  */
954 ir_call *
lower_ssbo_atomic_intrinsic(ir_call * ir)955 lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir)
956 {
957    /* SSBO atomics usually have 2 parameters, the buffer variable and an
958     * integer argument. The exception is CompSwap, that has an additional
959     * integer parameter.
960     */
961    int param_count = ir->actual_parameters.length();
962    assert(param_count == 2 || param_count == 3);
963 
964    /* First argument must be a scalar integer buffer variable */
965    exec_node *param = ir->actual_parameters.get_head();
966    ir_instruction *inst = (ir_instruction *) param;
967    assert(inst->ir_type == ir_type_dereference_variable ||
968           inst->ir_type == ir_type_dereference_array ||
969           inst->ir_type == ir_type_dereference_record ||
970           inst->ir_type == ir_type_swizzle);
971 
972    ir_rvalue *deref = (ir_rvalue *) inst;
973    assert(deref->type->is_scalar() &&
974           (deref->type->is_integer_32() || deref->type->is_float()));
975 
976    ir_variable *var = deref->variable_referenced();
977    assert(var);
978 
979    /* Compute the offset to the start if the dereference and the
980     * block index
981     */
982    void *mem_ctx = ralloc_parent(shader->ir);
983 
984    ir_rvalue *offset = NULL;
985    unsigned const_offset;
986    bool row_major;
987    const glsl_type *matrix_type;
988 
989    enum glsl_interface_packing packing =
990       var->get_interface_type()->
991          get_internal_ifc_packing(use_std430_as_default);
992 
993    this->buffer_access_type = ssbo_atomic_access;
994    this->variable = var;
995 
996    setup_for_load_or_store(mem_ctx, var, deref,
997                            &offset, &const_offset,
998                            &row_major, &matrix_type,
999                            packing);
1000    assert(offset);
1001    assert(!row_major);
1002    assert(matrix_type == NULL);
1003 
1004    ir_rvalue *deref_offset =
1005       add(offset, new(mem_ctx) ir_constant(const_offset));
1006    ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL);
1007 
1008    /* Create the new internal function signature that will take a block
1009     * index and offset instead of a buffer variable
1010     */
1011    exec_list sig_params;
1012    ir_variable *sig_param = new(mem_ctx)
1013       ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in);
1014    sig_params.push_tail(sig_param);
1015 
1016    sig_param = new(mem_ctx)
1017       ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in);
1018    sig_params.push_tail(sig_param);
1019 
1020    const glsl_type *type = deref->type->get_scalar_type();
1021    sig_param = new(mem_ctx)
1022          ir_variable(type, "data1", ir_var_function_in);
1023    sig_params.push_tail(sig_param);
1024 
1025    if (param_count == 3) {
1026       sig_param = new(mem_ctx)
1027             ir_variable(type, "data2", ir_var_function_in);
1028       sig_params.push_tail(sig_param);
1029    }
1030 
1031    ir_function_signature *sig =
1032       new(mem_ctx) ir_function_signature(deref->type,
1033                                          shader_storage_buffer_object);
1034    assert(sig);
1035    sig->replace_parameters(&sig_params);
1036 
1037    assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load);
1038    assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap);
1039    sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, ssbo);
1040 
1041    char func_name[64];
1042    sprintf(func_name, "%s_ssbo", ir->callee_name());
1043    ir_function *f = new(mem_ctx) ir_function(func_name);
1044    f->add_signature(sig);
1045 
1046    /* Now, create the call to the internal intrinsic */
1047    exec_list call_params;
1048    call_params.push_tail(block_index);
1049    call_params.push_tail(deref_offset);
1050    param = ir->actual_parameters.get_head()->get_next();
1051    ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1052    call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1053    if (param_count == 3) {
1054       param = param->get_next();
1055       param_as_rvalue = ((ir_instruction *) param)->as_rvalue();
1056       call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL));
1057    }
1058    ir_dereference_variable *return_deref =
1059       ir->return_deref->clone(mem_ctx, NULL);
1060    return new(mem_ctx) ir_call(sig, return_deref, &call_params);
1061 }
1062 
1063 ir_call *
check_for_ssbo_atomic_intrinsic(ir_call * ir)1064 lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir)
1065 {
1066    exec_list& params = ir->actual_parameters;
1067 
1068    if (params.length() < 2 || params.length() > 3)
1069       return ir;
1070 
1071    ir_rvalue *rvalue =
1072       ((ir_instruction *) params.get_head())->as_rvalue();
1073    if (!rvalue)
1074       return ir;
1075 
1076    ir_variable *var = rvalue->variable_referenced();
1077    if (!var || !var->is_in_shader_storage_block())
1078       return ir;
1079 
1080    const enum ir_intrinsic_id id = ir->callee->intrinsic_id;
1081    if (id == ir_intrinsic_generic_atomic_add ||
1082        id == ir_intrinsic_generic_atomic_min ||
1083        id == ir_intrinsic_generic_atomic_max ||
1084        id == ir_intrinsic_generic_atomic_and ||
1085        id == ir_intrinsic_generic_atomic_or ||
1086        id == ir_intrinsic_generic_atomic_xor ||
1087        id == ir_intrinsic_generic_atomic_exchange ||
1088        id == ir_intrinsic_generic_atomic_comp_swap) {
1089       return lower_ssbo_atomic_intrinsic(ir);
1090    }
1091 
1092    return ir;
1093 }
1094 
1095 
1096 ir_visitor_status
visit_enter(ir_call * ir)1097 lower_ubo_reference_visitor::visit_enter(ir_call *ir)
1098 {
1099    ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir);
1100    if (new_ir != ir) {
1101       progress = true;
1102       base_ir->replace_with(new_ir);
1103       return visit_continue_with_parent;
1104    }
1105 
1106    return rvalue_visit(ir);
1107 }
1108 
1109 
1110 ir_visitor_status
visit_enter(ir_texture * ir)1111 lower_ubo_reference_visitor::visit_enter(ir_texture *ir)
1112 {
1113    ir_dereference *sampler = ir->sampler;
1114 
1115    if (sampler->ir_type == ir_type_dereference_record) {
1116       handle_rvalue((ir_rvalue **)&ir->sampler);
1117       return visit_continue_with_parent;
1118    }
1119 
1120    return rvalue_visit(ir);
1121 }
1122 
1123 
1124 } /* unnamed namespace */
1125 
1126 void
lower_ubo_reference(struct gl_linked_shader * shader,bool clamp_block_indices,bool use_std430_as_default)1127 lower_ubo_reference(struct gl_linked_shader *shader,
1128                     bool clamp_block_indices, bool use_std430_as_default)
1129 {
1130    lower_ubo_reference_visitor v(shader, clamp_block_indices,
1131                                  use_std430_as_default);
1132 
1133    /* Loop over the instructions lowering references, because we take
1134     * a deref of a UBO array using a UBO dereference as the index will
1135     * produce a collection of instructions all of which have cloned
1136     * UBO dereferences for that array index.
1137     */
1138    do {
1139       v.progress = false;
1140       visit_list_elements(&v, shader->ir);
1141    } while (v.progress);
1142 }
1143