1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file lower_ubo_reference.cpp
26  *
27  * IR lower pass to replace dereferences of variables in a uniform
28  * buffer object with usage of ir_binop_ubo_load expressions, each of
29  * which can read data up to the size of a vec4.
30  *
31  * This relieves drivers of the responsibility to deal with tricky UBO
32  * layout issues like std140 structures and row_major matrices on
33  * their own.
34  */
35 
36 #include "ir.h"
37 #include "ir_builder.h"
38 #include "ir_rvalue_visitor.h"
39 #include "main/macros.h"
40 
41 using namespace ir_builder;
42 
43 /**
44  * Determine if a thing being dereferenced is row-major
45  *
46  * There is some trickery here.
47  *
48  * If the thing being dereferenced is a member of uniform block \b without an
49  * instance name, then the name of the \c ir_variable is the field name of an
50  * interface type.  If this field is row-major, then the thing referenced is
51  * row-major.
52  *
53  * If the thing being dereferenced is a member of uniform block \b with an
54  * instance name, then the last dereference in the tree will be an
55  * \c ir_dereference_record.  If that record field is row-major, then the
56  * thing referenced is row-major.
57  */
58 static bool
is_dereferenced_thing_row_major(const ir_dereference * deref)59 is_dereferenced_thing_row_major(const ir_dereference *deref)
60 {
61    bool matrix = false;
62    const ir_rvalue *ir = deref;
63 
64    while (true) {
65       matrix = matrix || ir->type->without_array()->is_matrix();
66 
67       switch (ir->ir_type) {
68       case ir_type_dereference_array: {
69          const ir_dereference_array *const array_deref =
70             (const ir_dereference_array *) ir;
71 
72          ir = array_deref->array;
73          break;
74       }
75 
76       case ir_type_dereference_record: {
77          const ir_dereference_record *const record_deref =
78             (const ir_dereference_record *) ir;
79 
80          ir = record_deref->record;
81 
82          const int idx = ir->type->field_index(record_deref->field);
83          assert(idx >= 0);
84 
85          const enum glsl_matrix_layout matrix_layout =
86             glsl_matrix_layout(ir->type->fields.structure[idx].matrix_layout);
87 
88          switch (matrix_layout) {
89          case GLSL_MATRIX_LAYOUT_INHERITED:
90             break;
91          case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR:
92             return false;
93          case GLSL_MATRIX_LAYOUT_ROW_MAJOR:
94             return matrix || deref->type->without_array()->is_record();
95          }
96 
97          break;
98       }
99 
100       case ir_type_dereference_variable: {
101          const ir_dereference_variable *const var_deref =
102             (const ir_dereference_variable *) ir;
103 
104          const enum glsl_matrix_layout matrix_layout =
105             glsl_matrix_layout(var_deref->var->data.matrix_layout);
106 
107          switch (matrix_layout) {
108          case GLSL_MATRIX_LAYOUT_INHERITED:
109             assert(!matrix);
110             return false;
111          case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR:
112             return false;
113          case GLSL_MATRIX_LAYOUT_ROW_MAJOR:
114             return matrix || deref->type->without_array()->is_record();
115          }
116 
117          unreachable("invalid matrix layout");
118          break;
119       }
120 
121       default:
122          return false;
123       }
124    }
125 
126    /* The tree must have ended with a dereference that wasn't an
127     * ir_dereference_variable.  That is invalid, and it should be impossible.
128     */
129    unreachable("invalid dereference tree");
130    return false;
131 }
132 
133 namespace {
134 class lower_ubo_reference_visitor : public ir_rvalue_enter_visitor {
135 public:
lower_ubo_reference_visitor(struct gl_shader * shader)136    lower_ubo_reference_visitor(struct gl_shader *shader)
137    : shader(shader)
138    {
139    }
140 
141    void handle_rvalue(ir_rvalue **rvalue);
142    void emit_ubo_loads(ir_dereference *deref, ir_variable *base_offset,
143 		       unsigned int deref_offset, bool row_major);
144    ir_expression *ubo_load(const struct glsl_type *type,
145 			   ir_rvalue *offset);
146 
147    void *mem_ctx;
148    struct gl_shader *shader;
149    struct gl_uniform_buffer_variable *ubo_var;
150    ir_rvalue *uniform_block;
151    bool progress;
152 };
153 
154 /**
155  * Determine the name of the interface block field
156  *
157  * This is the name of the specific member as it would appear in the
158  * \c gl_uniform_buffer_variable::Name field in the shader's
159  * \c UniformBlocks array.
160  */
161 static const char *
interface_field_name(void * mem_ctx,char * base_name,ir_dereference * d,ir_rvalue ** nonconst_block_index)162 interface_field_name(void *mem_ctx, char *base_name, ir_dereference *d,
163                      ir_rvalue **nonconst_block_index)
164 {
165    ir_rvalue *previous_index = NULL;
166    *nonconst_block_index = NULL;
167 
168    while (d != NULL) {
169       switch (d->ir_type) {
170       case ir_type_dereference_variable: {
171          ir_dereference_variable *v = (ir_dereference_variable *) d;
172          if (previous_index
173              && v->var->is_interface_instance()
174              && v->var->type->is_array()) {
175 
176             ir_constant *const_index = previous_index->as_constant();
177             if (!const_index) {
178                *nonconst_block_index = previous_index;
179                return ralloc_asprintf(mem_ctx, "%s[0]", base_name);
180             } else {
181                return ralloc_asprintf(mem_ctx,
182                                       "%s[%d]",
183                                       base_name,
184                                       const_index->get_uint_component(0));
185             }
186          } else {
187             return base_name;
188          }
189 
190          break;
191       }
192 
193       case ir_type_dereference_record: {
194          ir_dereference_record *r = (ir_dereference_record *) d;
195 
196          d = r->record->as_dereference();
197          break;
198       }
199 
200       case ir_type_dereference_array: {
201          ir_dereference_array *a = (ir_dereference_array *) d;
202 
203          d = a->array->as_dereference();
204          previous_index = a->array_index;
205 
206          break;
207       }
208 
209       default:
210          assert(!"Should not get here.");
211          break;
212       }
213    }
214 
215    assert(!"Should not get here.");
216    return NULL;
217 }
218 
219 void
handle_rvalue(ir_rvalue ** rvalue)220 lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue)
221 {
222    if (!*rvalue)
223       return;
224 
225    ir_dereference *deref = (*rvalue)->as_dereference();
226    if (!deref)
227       return;
228 
229    ir_variable *var = deref->variable_referenced();
230    if (!var || !var->is_in_uniform_block())
231       return;
232 
233    mem_ctx = ralloc_parent(*rvalue);
234 
235    ir_rvalue *nonconst_block_index;
236    const char *const field_name =
237       interface_field_name(mem_ctx, (char *) var->get_interface_type()->name,
238                            deref, &nonconst_block_index);
239 
240    this->uniform_block = NULL;
241    for (unsigned i = 0; i < shader->NumUniformBlocks; i++) {
242       if (strcmp(field_name, shader->UniformBlocks[i].Name) == 0) {
243 
244          ir_constant *index = new(mem_ctx) ir_constant(i);
245 
246          if (nonconst_block_index) {
247             if (nonconst_block_index->type != glsl_type::uint_type)
248                nonconst_block_index = i2u(nonconst_block_index);
249             this->uniform_block = add(nonconst_block_index, index);
250          } else {
251             this->uniform_block = index;
252          }
253 
254          struct gl_uniform_block *block = &shader->UniformBlocks[i];
255 
256          this->ubo_var = var->is_interface_instance()
257             ? &block->Uniforms[0] : &block->Uniforms[var->data.location];
258 
259          break;
260       }
261    }
262 
263    assert(this->uniform_block);
264 
265    ir_rvalue *offset = new(mem_ctx) ir_constant(0u);
266    unsigned const_offset = 0;
267    bool row_major = is_dereferenced_thing_row_major(deref);
268 
269    /* Calculate the offset to the start of the region of the UBO
270     * dereferenced by *rvalue.  This may be a variable offset if an
271     * array dereference has a variable index.
272     */
273    while (deref) {
274       switch (deref->ir_type) {
275       case ir_type_dereference_variable: {
276 	 const_offset += ubo_var->Offset;
277 	 deref = NULL;
278 	 break;
279       }
280 
281       case ir_type_dereference_array: {
282 	 ir_dereference_array *deref_array = (ir_dereference_array *)deref;
283 	 unsigned array_stride;
284 	 if (deref_array->array->type->is_matrix() && row_major) {
285 	    /* When loading a vector out of a row major matrix, the
286 	     * step between the columns (vectors) is the size of a
287 	     * float, while the step between the rows (elements of a
288 	     * vector) is handled below in emit_ubo_loads.
289 	     */
290 	    array_stride = 4;
291          } else if (deref_array->type->is_interface()) {
292             /* We're processing an array dereference of an interface instance
293 	     * array.  The thing being dereferenced *must* be a variable
294 	     * dereference because intefaces cannot be embedded an other
295 	     * types.  In terms of calculating the offsets for the lowering
296 	     * pass, we don't care about the array index.  All elements of an
297 	     * interface instance array will have the same offsets relative to
298 	     * the base of the block that backs them.
299              */
300             assert(deref_array->array->as_dereference_variable());
301             deref = deref_array->array->as_dereference();
302             break;
303 	 } else {
304             /* Whether or not the field is row-major (because it might be a
305              * bvec2 or something) does not affect the array itself.  We need
306              * to know whether an array element in its entirety is row-major.
307              */
308             const bool array_row_major =
309                is_dereferenced_thing_row_major(deref_array);
310 
311 	    array_stride = deref_array->type->std140_size(array_row_major);
312 	    array_stride = glsl_align(array_stride, 16);
313 	 }
314 
315          ir_rvalue *array_index = deref_array->array_index;
316          if (array_index->type->base_type == GLSL_TYPE_INT)
317             array_index = i2u(array_index);
318 
319 	 ir_constant *const_index =
320             array_index->constant_expression_value(NULL);
321 	 if (const_index) {
322 	    const_offset += array_stride * const_index->value.u[0];
323 	 } else {
324 	    offset = add(offset,
325 			 mul(array_index,
326 			     new(mem_ctx) ir_constant(array_stride)));
327 	 }
328 	 deref = deref_array->array->as_dereference();
329 	 break;
330       }
331 
332       case ir_type_dereference_record: {
333 	 ir_dereference_record *deref_record = (ir_dereference_record *)deref;
334 	 const glsl_type *struct_type = deref_record->record->type;
335 	 unsigned intra_struct_offset = 0;
336 
337          /* glsl_type::std140_base_alignment doesn't grok interfaces.  Use
338           * 16-bytes for the alignment because that is the general minimum of
339           * std140.
340           */
341          const unsigned struct_alignment = struct_type->is_interface()
342             ? 16
343             : struct_type->std140_base_alignment(row_major);
344 
345 
346 	 for (unsigned int i = 0; i < struct_type->length; i++) {
347 	    const glsl_type *type = struct_type->fields.structure[i].type;
348 
349             ir_dereference_record *field_deref =
350                new(mem_ctx) ir_dereference_record(deref_record->record,
351                                                   struct_type->fields.structure[i].name);
352             const bool field_row_major =
353                is_dereferenced_thing_row_major(field_deref);
354 
355             ralloc_free(field_deref);
356 
357             unsigned field_align = type->std140_base_alignment(field_row_major);
358 
359 	    intra_struct_offset = glsl_align(intra_struct_offset, field_align);
360 
361 	    if (strcmp(struct_type->fields.structure[i].name,
362 		       deref_record->field) == 0)
363 	       break;
364             intra_struct_offset += type->std140_size(field_row_major);
365 
366             /* If the field just examined was itself a structure, apply rule
367              * #9:
368              *
369              *     "The structure may have padding at the end; the base offset
370              *     of the member following the sub-structure is rounded up to
371              *     the next multiple of the base alignment of the structure."
372              */
373             if (type->without_array()->is_record()) {
374                intra_struct_offset = glsl_align(intra_struct_offset,
375                                                 struct_alignment);
376 
377             }
378 	 }
379 
380 	 const_offset += intra_struct_offset;
381 
382 	 deref = deref_record->record->as_dereference();
383 	 break;
384       }
385       default:
386 	 assert(!"not reached");
387 	 deref = NULL;
388 	 break;
389       }
390    }
391 
392    /* Now that we've calculated the offset to the start of the
393     * dereference, walk over the type and emit loads into a temporary.
394     */
395    const glsl_type *type = (*rvalue)->type;
396    ir_variable *load_var = new(mem_ctx) ir_variable(type,
397 						    "ubo_load_temp",
398 						    ir_var_temporary, (*rvalue)->get_precision());
399    base_ir->insert_before(load_var);
400 
401    ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
402 						       "ubo_load_temp_offset",
403 						       ir_var_temporary, glsl_precision_undefined);
404    base_ir->insert_before(load_offset);
405    base_ir->insert_before(assign(load_offset, offset));
406 
407    deref = new(mem_ctx) ir_dereference_variable(load_var);
408    emit_ubo_loads(deref, load_offset, const_offset, row_major);
409    *rvalue = deref;
410 
411    progress = true;
412 }
413 
414 ir_expression *
ubo_load(const glsl_type * type,ir_rvalue * offset)415 lower_ubo_reference_visitor::ubo_load(const glsl_type *type,
416 				      ir_rvalue *offset)
417 {
418    ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
419    return new(mem_ctx)
420       ir_expression(ir_binop_ubo_load,
421                     type,
422                     block_ref,
423                     offset);
424 
425 }
426 
427 /**
428  * Takes LHS and emits a series of assignments into its components
429  * from the UBO variable at variable_offset + deref_offset.
430  *
431  * Recursively calls itself to break the deref down to the point that
432  * the ir_binop_ubo_load expressions generated are contiguous scalars
433  * or vectors.
434  */
435 void
emit_ubo_loads(ir_dereference * deref,ir_variable * base_offset,unsigned int deref_offset,bool row_major)436 lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref,
437 					    ir_variable *base_offset,
438                                             unsigned int deref_offset,
439                                             bool row_major)
440 {
441    if (deref->type->is_record()) {
442       unsigned int field_offset = 0;
443 
444       for (unsigned i = 0; i < deref->type->length; i++) {
445 	 const struct glsl_struct_field *field =
446 	    &deref->type->fields.structure[i];
447 	 ir_dereference *field_deref =
448 	    new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL),
449 					       field->name);
450 
451 	 field_offset =
452 	    glsl_align(field_offset,
453                        field->type->std140_base_alignment(row_major));
454 
455 	 emit_ubo_loads(field_deref, base_offset, deref_offset + field_offset,
456                         row_major);
457 
458 	 field_offset += field->type->std140_size(row_major);
459       }
460       return;
461    }
462 
463    if (deref->type->is_array()) {
464       unsigned array_stride =
465 	 glsl_align(deref->type->fields.array->std140_size(row_major),
466 		    16);
467 
468       for (unsigned i = 0; i < deref->type->length; i++) {
469 	 ir_constant *element = new(mem_ctx) ir_constant(i);
470 	 ir_dereference *element_deref =
471 	    new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL),
472 					      element);
473 	 emit_ubo_loads(element_deref, base_offset,
474 			deref_offset + i * array_stride,
475                         row_major);
476       }
477       return;
478    }
479 
480    if (deref->type->is_matrix()) {
481       for (unsigned i = 0; i < deref->type->matrix_columns; i++) {
482 	 ir_constant *col = new(mem_ctx) ir_constant(i);
483 	 ir_dereference *col_deref =
484 	    new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL),
485 					      col);
486 
487          if (row_major) {
488             /* For a row-major matrix, the next column starts at the next
489              * element.
490              */
491             emit_ubo_loads(col_deref, base_offset, deref_offset + i * 4,
492                            row_major);
493          } else {
494             /* std140 always rounds the stride of arrays (and matrices) to a
495              * vec4, so matrices are always 16 between columns/rows.
496              */
497             emit_ubo_loads(col_deref, base_offset, deref_offset + i * 16,
498                            row_major);
499          }
500       }
501       return;
502    }
503 
504    assert(deref->type->is_scalar() ||
505 	  deref->type->is_vector());
506 
507    if (!row_major) {
508       ir_rvalue *offset = add(base_offset,
509 			      new(mem_ctx) ir_constant(deref_offset));
510       base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
511 				    ubo_load(deref->type, offset)));
512    } else {
513       /* We're dereffing a column out of a row-major matrix, so we
514        * gather the vector from each stored row.
515       */
516       assert(deref->type->base_type == GLSL_TYPE_FLOAT);
517       /* Matrices, row_major or not, are stored as if they were
518        * arrays of vectors of the appropriate size in std140.
519        * Arrays have their strides rounded up to a vec4, so the
520        * matrix stride is always 16.
521        */
522       unsigned matrix_stride = 16;
523 
524       for (unsigned i = 0; i < deref->type->vector_elements; i++) {
525 	 ir_rvalue *chan_offset =
526 	    add(base_offset,
527 		new(mem_ctx) ir_constant(deref_offset + i * matrix_stride));
528 
529 	 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL),
530 				       ubo_load(glsl_type::float_type,
531 						chan_offset),
532 				       (1U << i)));
533       }
534    }
535 }
536 
537 } /* unnamed namespace */
538 
539 void
lower_ubo_reference(struct gl_shader * shader,exec_list * instructions)540 lower_ubo_reference(struct gl_shader *shader, exec_list *instructions)
541 {
542    lower_ubo_reference_visitor v(shader);
543 
544    /* Loop over the instructions lowering references, because we take
545     * a deref of a UBO array using a UBO dereference as the index will
546     * produce a collection of instructions all of which have cloned
547     * UBO dereferences for that array index.
548     */
549    do {
550       v.progress = false;
551       visit_list_elements(&v, instructions);
552    } while (v.progress);
553 }
554