1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file opt_array_splitting.cpp
26  *
27  * If an array is always dereferenced with a constant index, then
28  * split it apart into its elements, making it more amenable to other
29  * optimization passes.
30  *
31  * This skips uniform/varying arrays, which would need careful
32  * handling due to their ir->location fields tying them to the GL API
33  * and other shader stages.
34  */
35 
36 #include "ir.h"
37 #include "ir_visitor.h"
38 #include "ir_rvalue_visitor.h"
39 #include "compiler/glsl_types.h"
40 
41 static bool debug = false;
42 
43 namespace {
44 
45 namespace opt_array_splitting {
46 
47 class variable_entry : public exec_node
48 {
49 public:
variable_entry(ir_variable * var)50    variable_entry(ir_variable *var)
51    {
52       this->var = var;
53       this->split = true;
54       this->declaration = false;
55       this->components = NULL;
56       this->mem_ctx = NULL;
57       if (var->type->is_array())
58          this->size = var->type->length;
59       else
60          this->size = var->type->matrix_columns;
61    }
62 
63    ir_variable *var; /* The key: the variable's pointer. */
64    unsigned size; /* array length or matrix columns */
65 
66    /** Whether this array should be split or not. */
67    bool split;
68 
69    /* If the variable had a decl we can work with in the instruction
70     * stream.  We can't do splitting on function arguments, which
71     * don't get this variable set.
72     */
73    bool declaration;
74 
75    ir_variable **components;
76 
77    /** ralloc_parent(this->var) -- the shader's talloc context. */
78    void *mem_ctx;
79 };
80 
81 } /* namespace */
82 
83 using namespace opt_array_splitting;
84 
85 /**
86  * This class does a walk over the tree, coming up with the set of
87  * variables that could be split by looking to see if they are arrays
88  * that are only ever constant-index dereferenced.
89  */
90 class ir_array_reference_visitor : public ir_hierarchical_visitor {
91 public:
ir_array_reference_visitor(void)92    ir_array_reference_visitor(void)
93    {
94       this->mem_ctx = ralloc_context(NULL);
95       this->variable_list.make_empty();
96       this->in_whole_array_copy = false;
97    }
98 
~ir_array_reference_visitor(void)99    ~ir_array_reference_visitor(void)
100    {
101       ralloc_free(mem_ctx);
102    }
103 
104    bool get_split_list(exec_list *instructions, bool linked);
105 
106    virtual ir_visitor_status visit(ir_variable *);
107    virtual ir_visitor_status visit(ir_dereference_variable *);
108    virtual ir_visitor_status visit_enter(ir_assignment *);
109    virtual ir_visitor_status visit_leave(ir_assignment *);
110    virtual ir_visitor_status visit_enter(ir_dereference_array *);
111    virtual ir_visitor_status visit_enter(ir_function_signature *);
112 
113    variable_entry *get_variable_entry(ir_variable *var);
114 
115    /* List of variable_entry */
116    exec_list variable_list;
117 
118    void *mem_ctx;
119 
120    bool in_whole_array_copy;
121 };
122 
123 } /* namespace */
124 
125 variable_entry *
get_variable_entry(ir_variable * var)126 ir_array_reference_visitor::get_variable_entry(ir_variable *var)
127 {
128    assert(var);
129 
130    if (var->data.mode != ir_var_auto &&
131        var->data.mode != ir_var_temporary)
132       return NULL;
133 
134    if (!(var->type->is_array() || var->type->is_matrix()))
135       return NULL;
136 
137    /* If the array hasn't been sized yet, we can't split it.  After
138     * linking, this should be resolved.
139     */
140    if (var->type->is_unsized_array())
141       return NULL;
142 
143    /* FIXME: arrays of arrays are not handled correctly by this pass so we
144     * skip it for now. While the pass will create functioning code it actually
145     * produces worse code.
146     *
147     * For example the array:
148     *
149     *    int[3][2] a;
150     *
151     * ends up being split up into:
152     *
153     *    int[3][2] a_0;
154     *    int[3][2] a_1;
155     *    int[3][2] a_2;
156     *
157     * And we end up referencing each of these new arrays for example:
158     *
159     *    a[0][1] will be turned into a_0[0][1]
160     *    a[1][0] will be turned into a_1[1][0]
161     *    a[2][0] will be turned into a_2[2][0]
162     */
163    if (var->type->is_array() && var->type->fields.array->is_array())
164       return NULL;
165 
166    foreach_in_list(variable_entry, entry, &this->variable_list) {
167       if (entry->var == var)
168          return entry;
169    }
170 
171    variable_entry *entry = new(mem_ctx) variable_entry(var);
172    this->variable_list.push_tail(entry);
173    return entry;
174 }
175 
176 
177 ir_visitor_status
visit(ir_variable * ir)178 ir_array_reference_visitor::visit(ir_variable *ir)
179 {
180    variable_entry *entry = this->get_variable_entry(ir);
181 
182    if (entry)
183       entry->declaration = true;
184 
185    return visit_continue;
186 }
187 
188 ir_visitor_status
visit_enter(ir_assignment * ir)189 ir_array_reference_visitor::visit_enter(ir_assignment *ir)
190 {
191    in_whole_array_copy =
192       ir->lhs->type->is_array() && ir->whole_variable_written();
193 
194    return visit_continue;
195 }
196 
197 ir_visitor_status
visit_leave(ir_assignment *)198 ir_array_reference_visitor::visit_leave(ir_assignment *)
199 {
200    in_whole_array_copy = false;
201 
202    return visit_continue;
203 }
204 
205 ir_visitor_status
visit(ir_dereference_variable * ir)206 ir_array_reference_visitor::visit(ir_dereference_variable *ir)
207 {
208    variable_entry *entry = this->get_variable_entry(ir->var);
209 
210    /* Allow whole-array assignments on the LHS.  We can split those
211     * by "unrolling" the assignment into component-wise assignments.
212     */
213    if (in_assignee && in_whole_array_copy)
214       return visit_continue;
215 
216    /* If we made it to here without seeing an ir_dereference_array,
217     * then the dereference of this array didn't have a constant index
218     * (see the visit_continue_with_parent below), so we can't split
219     * the variable.
220     */
221    if (entry)
222       entry->split = false;
223 
224    return visit_continue;
225 }
226 
227 ir_visitor_status
visit_enter(ir_dereference_array * ir)228 ir_array_reference_visitor::visit_enter(ir_dereference_array *ir)
229 {
230    ir_dereference_variable *deref = ir->array->as_dereference_variable();
231    if (!deref)
232       return visit_continue;
233 
234    variable_entry *entry = this->get_variable_entry(deref->var);
235 
236    /* If the access to the array has a variable index, we wouldn't
237     * know which split variable this dereference should go to.
238     */
239    if (!ir->array_index->as_constant()) {
240       if (entry)
241          entry->split = false;
242       /* This variable indexing could come from a different array dereference
243        * that also has variable indexing, that is, something like a[b[a[b[0]]]].
244        * If we return visit_continue_with_parent here for the first appearence
245        * of a, then we can miss that b also has indirect indexing (if this is
246        * the only place in the program where such indirect indexing into b
247        * happens), so keep going.
248        */
249       return visit_continue;
250    }
251 
252    /* If the index is also array dereference, visit index. */
253    if (ir->array_index->as_dereference_array())
254       visit_enter(ir->array_index->as_dereference_array());
255 
256    return visit_continue_with_parent;
257 }
258 
259 ir_visitor_status
visit_enter(ir_function_signature * ir)260 ir_array_reference_visitor::visit_enter(ir_function_signature *ir)
261 {
262    /* We don't have logic for array-splitting function arguments,
263     * so just look at the body instructions and not the parameter
264     * declarations.
265     */
266    visit_list_elements(this, &ir->body);
267    return visit_continue_with_parent;
268 }
269 
270 bool
get_split_list(exec_list * instructions,bool linked)271 ir_array_reference_visitor::get_split_list(exec_list *instructions,
272                                            bool linked)
273 {
274    visit_list_elements(this, instructions);
275 
276    /* If the shaders aren't linked yet, we can't mess with global
277     * declarations, which need to be matched by name across shaders.
278     */
279    if (!linked) {
280       foreach_in_list(ir_instruction, node, instructions) {
281          ir_variable *var = node->as_variable();
282          if (var) {
283             variable_entry *entry = get_variable_entry(var);
284             if (entry)
285                entry->remove();
286          }
287       }
288    }
289 
290    /* Trim out variables we found that we can't split. */
291    foreach_in_list_safe(variable_entry, entry, &variable_list) {
292       if (debug) {
293          printf("array %s@%p: decl %d, split %d\n",
294                 entry->var->name, (void *) entry->var, entry->declaration,
295                 entry->split);
296       }
297 
298       if (!(entry->declaration && entry->split)) {
299          entry->remove();
300       }
301    }
302 
303    return !variable_list.is_empty();
304 }
305 
306 /**
307  * This class rewrites the dereferences of arrays that have been split
308  * to use the newly created ir_variables for each component.
309  */
310 class ir_array_splitting_visitor : public ir_rvalue_visitor {
311 public:
ir_array_splitting_visitor(exec_list * vars)312    ir_array_splitting_visitor(exec_list *vars)
313    {
314       this->variable_list = vars;
315    }
316 
~ir_array_splitting_visitor()317    virtual ~ir_array_splitting_visitor()
318    {
319    }
320 
321    virtual ir_visitor_status visit_leave(ir_assignment *);
322 
323    void split_deref(ir_dereference **deref);
324    void handle_rvalue(ir_rvalue **rvalue);
325    variable_entry *get_splitting_entry(ir_variable *var);
326 
327    exec_list *variable_list;
328 };
329 
330 variable_entry *
get_splitting_entry(ir_variable * var)331 ir_array_splitting_visitor::get_splitting_entry(ir_variable *var)
332 {
333    assert(var);
334 
335    foreach_in_list(variable_entry, entry, this->variable_list) {
336       if (entry->var == var) {
337          return entry;
338       }
339    }
340 
341    return NULL;
342 }
343 
344 void
split_deref(ir_dereference ** deref)345 ir_array_splitting_visitor::split_deref(ir_dereference **deref)
346 {
347    ir_dereference_array *deref_array = (*deref)->as_dereference_array();
348    if (!deref_array)
349       return;
350 
351    ir_dereference_variable *deref_var = deref_array->array->as_dereference_variable();
352    if (!deref_var)
353       return;
354    ir_variable *var = deref_var->var;
355 
356    variable_entry *entry = get_splitting_entry(var);
357    if (!entry)
358       return;
359 
360    ir_constant *constant = deref_array->array_index->as_constant();
361    assert(constant);
362 
363    if (constant->value.i[0] >= 0 && constant->value.i[0] < (int)entry->size) {
364       *deref = new(entry->mem_ctx)
365                ir_dereference_variable(entry->components[constant->value.i[0]]);
366    } else {
367       /* There was a constant array access beyond the end of the
368        * array.  This might have happened due to constant folding
369        * after the initial parse.  This produces an undefined value,
370        * but shouldn't crash.  Just give them an uninitialized
371        * variable.
372        */
373       ir_variable *temp = new(entry->mem_ctx) ir_variable(deref_array->type,
374                                                           "undef",
375                                                           ir_var_temporary);
376       entry->components[0]->insert_before(temp);
377       *deref = new(entry->mem_ctx) ir_dereference_variable(temp);
378    }
379 }
380 
381 void
handle_rvalue(ir_rvalue ** rvalue)382 ir_array_splitting_visitor::handle_rvalue(ir_rvalue **rvalue)
383 {
384    if (!*rvalue)
385       return;
386 
387    ir_dereference *deref = (*rvalue)->as_dereference();
388 
389    if (!deref)
390       return;
391 
392    split_deref(&deref);
393    *rvalue = deref;
394 }
395 
396 ir_visitor_status
visit_leave(ir_assignment * ir)397 ir_array_splitting_visitor::visit_leave(ir_assignment *ir)
398 {
399    /* The normal rvalue visitor skips the LHS of assignments, but we
400     * need to process those just the same.
401     */
402    ir_rvalue *lhs = ir->lhs;
403 
404    /* "Unroll" any whole array assignments, creating assignments for
405     * each array element.  Then, do splitting on each new assignment.
406     */
407    if (lhs->type->is_array() && ir->whole_variable_written() &&
408        get_splitting_entry(ir->whole_variable_written())) {
409       void *mem_ctx = ralloc_parent(ir);
410 
411       for (unsigned i = 0; i < lhs->type->length; i++) {
412          ir_rvalue *lhs_i =
413             new(mem_ctx) ir_dereference_array(ir->lhs->clone(mem_ctx, NULL),
414                                               new(mem_ctx) ir_constant(i));
415          ir_rvalue *rhs_i =
416             new(mem_ctx) ir_dereference_array(ir->rhs->clone(mem_ctx, NULL),
417                                               new(mem_ctx) ir_constant(i));
418          ir_rvalue *condition_i =
419             ir->condition ? ir->condition->clone(mem_ctx, NULL) : NULL;
420 
421          ir_assignment *assign_i =
422             new(mem_ctx) ir_assignment(lhs_i, rhs_i, condition_i);
423 
424          ir->insert_before(assign_i);
425          assign_i->accept(this);
426       }
427       ir->remove();
428       return visit_continue;
429    }
430 
431    handle_rvalue(&lhs);
432    ir->lhs = lhs->as_dereference();
433 
434    ir->lhs->accept(this);
435 
436    handle_rvalue(&ir->rhs);
437    ir->rhs->accept(this);
438 
439    if (ir->condition) {
440       handle_rvalue(&ir->condition);
441       ir->condition->accept(this);
442    }
443 
444    return visit_continue;
445 }
446 
447 bool
optimize_split_arrays(exec_list * instructions,bool linked)448 optimize_split_arrays(exec_list *instructions, bool linked)
449 {
450    ir_array_reference_visitor refs;
451    if (!refs.get_split_list(instructions, linked))
452       return false;
453 
454    void *mem_ctx = ralloc_context(NULL);
455 
456    /* Replace the decls of the arrays to be split with their split
457     * components.
458     */
459    foreach_in_list(variable_entry, entry, &refs.variable_list) {
460       const struct glsl_type *type = entry->var->type;
461       const struct glsl_type *subtype;
462 
463       if (type->is_matrix())
464          subtype = type->column_type();
465       else
466          subtype = type->fields.array;
467 
468       entry->mem_ctx = ralloc_parent(entry->var);
469 
470       entry->components = ralloc_array(mem_ctx, ir_variable *, entry->size);
471 
472       for (unsigned int i = 0; i < entry->size; i++) {
473          const char *name = ralloc_asprintf(mem_ctx, "%s_%d",
474                                             entry->var->name, i);
475          ir_variable *new_var =
476             new(entry->mem_ctx) ir_variable(subtype, name, ir_var_temporary);
477 
478          /* Do not lose memory/format qualifiers when arrays of images are
479           * split.
480           */
481          new_var->data.memory_read_only = entry->var->data.memory_read_only;
482          new_var->data.memory_write_only = entry->var->data.memory_write_only;
483          new_var->data.memory_coherent = entry->var->data.memory_coherent;
484          new_var->data.memory_volatile = entry->var->data.memory_volatile;
485          new_var->data.memory_restrict = entry->var->data.memory_restrict;
486          new_var->data.image_format = entry->var->data.image_format;
487 
488          entry->components[i] = new_var;
489          entry->var->insert_before(entry->components[i]);
490       }
491 
492       entry->var->remove();
493    }
494 
495    ir_array_splitting_visitor split(&refs.variable_list);
496    visit_list_elements(&split, instructions);
497 
498    if (debug)
499       _mesa_print_ir(stdout, instructions, NULL);
500 
501    ralloc_free(mem_ctx);
502 
503    return true;
504 
505 }
506