1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "compiler/glsl_types.h"
25 #include "loop_analysis.h"
26 #include "ir_hierarchical_visitor.h"
27 
28 #include "main/mtypes.h"
29 
30 namespace {
31 
32 class loop_unroll_visitor : public ir_hierarchical_visitor {
33 public:
loop_unroll_visitor(loop_state * state,const struct gl_shader_compiler_options * options)34    loop_unroll_visitor(loop_state *state,
35                        const struct gl_shader_compiler_options *options)
36    {
37       this->state = state;
38       this->progress = false;
39       this->options = options;
40    }
41 
42    virtual ir_visitor_status visit_leave(ir_loop *ir);
43    void simple_unroll(ir_loop *ir, int iterations);
44    void complex_unroll(ir_loop *ir, int iterations,
45                        bool continue_from_then_branch,
46                        bool limiting_term_first,
47                        bool lt_continue_from_then_branch);
48    void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest);
49 
50    loop_state *state;
51 
52    bool progress;
53    const struct gl_shader_compiler_options *options;
54 };
55 
56 } /* anonymous namespace */
57 
58 class loop_unroll_count : public ir_hierarchical_visitor {
59 public:
60    int nodes;
61    bool unsupported_variable_indexing;
62    bool array_indexed_by_induction_var_with_exact_iterations;
63    /* If there are nested loops, the node count will be inaccurate. */
64    bool nested_loop;
65 
loop_unroll_count(exec_list * list,loop_variable_state * ls,const struct gl_shader_compiler_options * options)66    loop_unroll_count(exec_list *list, loop_variable_state *ls,
67                      const struct gl_shader_compiler_options *options)
68       : ls(ls), options(options)
69    {
70       nodes = 0;
71       nested_loop = false;
72       unsupported_variable_indexing = false;
73       array_indexed_by_induction_var_with_exact_iterations = false;
74 
75       run(list);
76    }
77 
visit_enter(ir_assignment *)78    virtual ir_visitor_status visit_enter(ir_assignment *)
79    {
80       nodes++;
81       return visit_continue;
82    }
83 
visit_enter(ir_expression *)84    virtual ir_visitor_status visit_enter(ir_expression *)
85    {
86       nodes++;
87       return visit_continue;
88    }
89 
visit_enter(ir_loop *)90    virtual ir_visitor_status visit_enter(ir_loop *)
91    {
92       nested_loop = true;
93       return visit_continue;
94    }
95 
visit_enter(ir_dereference_array * ir)96    virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
97    {
98       /* Force unroll in case of dynamic indexing with sampler arrays
99        * when EmitNoIndirectSampler is set.
100        */
101       if (options->EmitNoIndirectSampler) {
102          if ((ir->array->type->is_array() &&
103               ir->array->type->contains_sampler()) &&
104              !ir->array_index->constant_expression_value(ralloc_parent(ir))) {
105             unsupported_variable_indexing = true;
106             return visit_continue;
107          }
108       }
109 
110       /* Check for arrays variably-indexed by a loop induction variable.
111        * Unrolling the loop may convert that access into constant-indexing.
112        *
113        * Many drivers don't support particular kinds of variable indexing,
114        * and have to resort to using lower_variable_index_to_cond_assign to
115        * handle it.  This results in huge amounts of horrible code, so we'd
116        * like to avoid that if possible.  Here, we just note that it will
117        * happen.
118        */
119       if ((ir->array->type->is_array() || ir->array->type->is_matrix()) &&
120           !ir->array_index->as_constant()) {
121          ir_variable *array = ir->array->variable_referenced();
122          loop_variable *lv = ls->get(ir->array_index->variable_referenced());
123          if (array && lv && lv->is_induction_var()) {
124             /* If an array is indexed by a loop induction variable, and the
125              * array size is exactly the number of loop iterations, this is
126              * probably a simple for-loop trying to access each element in
127              * turn; the application may expect it to be unrolled.
128              */
129             if (int(array->type->length) == ls->limiting_terminator->iterations)
130                array_indexed_by_induction_var_with_exact_iterations = true;
131 
132             switch (array->data.mode) {
133             case ir_var_auto:
134             case ir_var_temporary:
135             case ir_var_const_in:
136             case ir_var_function_in:
137             case ir_var_function_out:
138             case ir_var_function_inout:
139                if (options->EmitNoIndirectTemp)
140                   unsupported_variable_indexing = true;
141                break;
142             case ir_var_uniform:
143             case ir_var_shader_storage:
144                if (options->EmitNoIndirectUniform)
145                   unsupported_variable_indexing = true;
146                break;
147             case ir_var_shader_in:
148                if (options->EmitNoIndirectInput)
149                   unsupported_variable_indexing = true;
150                break;
151             case ir_var_shader_out:
152                if (options->EmitNoIndirectOutput)
153                   unsupported_variable_indexing = true;
154                break;
155             }
156          }
157       }
158       return visit_continue;
159    }
160 
161 private:
162    loop_variable_state *ls;
163    const struct gl_shader_compiler_options *options;
164 };
165 
166 
167 /**
168  * Unroll a loop which does not contain any jumps.  For example, if the input
169  * is:
170  *
171  *     (loop (...) ...instrs...)
172  *
173  * And the iteration count is 3, the output will be:
174  *
175  *     ...instrs... ...instrs... ...instrs...
176  */
177 void
simple_unroll(ir_loop * ir,int iterations)178 loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations)
179 {
180    void *const mem_ctx = ralloc_parent(ir);
181    loop_variable_state *const ls = this->state->get(ir);
182 
183    /* If there are no terminators, then the loop iteration count must be 1.
184     * This is the 'do { } while (false);' case.
185     */
186    assert(!ls->terminators.is_empty() || iterations == 1);
187 
188    ir_instruction *first_ir =
189       (ir_instruction *) ir->body_instructions.get_head();
190 
191    if (!first_ir) {
192       /* The loop is empty remove it and return */
193       ir->remove();
194       return;
195    }
196 
197    ir_if *limit_if = NULL;
198    bool exit_branch_has_instructions = false;
199    if (ls->limiting_terminator) {
200       limit_if = ls->limiting_terminator->ir;
201       ir_instruction *ir_if_last = (ir_instruction *)
202          limit_if->then_instructions.get_tail();
203 
204       if (is_break(ir_if_last)) {
205          if (ir_if_last != limit_if->then_instructions.get_head())
206             exit_branch_has_instructions = true;
207 
208          splice_post_if_instructions(limit_if, &limit_if->else_instructions);
209          ir_if_last->remove();
210       } else {
211          ir_if_last = (ir_instruction *)
212             limit_if->else_instructions.get_tail();
213          assert(is_break(ir_if_last));
214 
215          if (ir_if_last != limit_if->else_instructions.get_head())
216             exit_branch_has_instructions = true;
217 
218          splice_post_if_instructions(limit_if, &limit_if->then_instructions);
219          ir_if_last->remove();
220       }
221    }
222 
223    /* Because 'iterations' is the number of times we pass over the *entire*
224     * loop body before hitting the first break, we need to bump the number of
225     * iterations if the limiting terminator is not the first instruction in
226     * the loop, or it the exit branch contains instructions. This ensures we
227     * execute any instructions before the terminator or in its exit branch.
228     */
229    if (!ls->terminators.is_empty() &&
230        (limit_if != first_ir->as_if() || exit_branch_has_instructions))
231       iterations++;
232 
233    for (int i = 0; i < iterations; i++) {
234       exec_list copy_list;
235 
236       copy_list.make_empty();
237       clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
238 
239       ir->insert_before(&copy_list);
240    }
241 
242    /* The loop has been replaced by the unrolled copies.  Remove the original
243     * loop from the IR sequence.
244     */
245    ir->remove();
246 
247    this->progress = true;
248 }
249 
250 
251 /**
252  * Unroll a loop whose last statement is an ir_if.  If \c
253  * continue_from_then_branch is true, the loop is repeated only when the
254  * "then" branch of the if is taken; otherwise it is repeated only when the
255  * "else" branch of the if is taken.
256  *
257  * For example, if the input is:
258  *
259  *     (loop (...)
260  *      ...body...
261  *      (if (cond)
262  *          (...then_instrs...)
263  *        (...else_instrs...)))
264  *
265  * And the iteration count is 3, and \c continue_from_then_branch is true,
266  * then the output will be:
267  *
268  *     ...body...
269  *     (if (cond)
270  *         (...then_instrs...
271  *          ...body...
272  *          (if (cond)
273  *              (...then_instrs...
274  *               ...body...
275  *               (if (cond)
276  *                   (...then_instrs...)
277  *                 (...else_instrs...)))
278  *            (...else_instrs...)))
279  *       (...else_instrs))
280  */
281 void
complex_unroll(ir_loop * ir,int iterations,bool second_term_then_continue,bool extra_iteration_required,bool first_term_then_continue)282 loop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations,
283                                     bool second_term_then_continue,
284                                     bool extra_iteration_required,
285                                     bool first_term_then_continue)
286 {
287    void *const mem_ctx = ralloc_parent(ir);
288    ir_instruction *ir_to_replace = ir;
289 
290    /* Because 'iterations' is the number of times we pass over the *entire*
291     * loop body before hitting the first break, we need to bump the number of
292     * iterations if the limiting terminator is not the first instruction in
293     * the loop, or it the exit branch contains instructions. This ensures we
294     * execute any instructions before the terminator or in its exit branch.
295     */
296    if (extra_iteration_required)
297       iterations++;
298 
299    for (int i = 0; i < iterations; i++) {
300       exec_list copy_list;
301 
302       copy_list.make_empty();
303       clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
304 
305       ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if();
306       assert(ir_if != NULL);
307 
308       exec_list *const first_list = first_term_then_continue
309          ? &ir_if->then_instructions : &ir_if->else_instructions;
310       ir_if = ((ir_instruction *) first_list->get_tail())->as_if();
311 
312       ir_to_replace->insert_before(&copy_list);
313       ir_to_replace->remove();
314 
315       /* placeholder that will be removed in the next iteration */
316       ir_to_replace =
317          new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue);
318 
319       exec_list *const second_term_continue_list = second_term_then_continue
320          ? &ir_if->then_instructions : &ir_if->else_instructions;
321 
322       second_term_continue_list->push_tail(ir_to_replace);
323    }
324 
325    ir_to_replace->remove();
326 
327    this->progress = true;
328 }
329 
330 
331 /**
332  * Move all of the instructions which follow \c ir_if to the end of
333  * \c splice_dest.
334  *
335  * For example, in the code snippet:
336  *
337  *     (if (cond)
338  *         (...then_instructions...
339  *          break)
340  *       (...else_instructions...))
341  *     ...post_if_instructions...
342  *
343  * If \c ir_if points to the "if" instruction, and \c splice_dest points to
344  * (...else_instructions...), the code snippet is transformed into:
345  *
346  *     (if (cond)
347  *         (...then_instructions...
348  *          break)
349  *       (...else_instructions...
350  *        ...post_if_instructions...))
351  */
352 void
splice_post_if_instructions(ir_if * ir_if,exec_list * splice_dest)353 loop_unroll_visitor::splice_post_if_instructions(ir_if *ir_if,
354                                                  exec_list *splice_dest)
355 {
356    while (!ir_if->get_next()->is_tail_sentinel()) {
357       ir_instruction *move_ir = (ir_instruction *) ir_if->get_next();
358 
359       move_ir->remove();
360       splice_dest->push_tail(move_ir);
361    }
362 }
363 
364 static bool
exit_branch_has_instructions(ir_if * term_if,bool lt_then_continue)365 exit_branch_has_instructions(ir_if *term_if, bool lt_then_continue)
366 {
367    if (lt_then_continue) {
368       if (term_if->else_instructions.get_head() ==
369           term_if->else_instructions.get_tail())
370          return false;
371    } else {
372       if (term_if->then_instructions.get_head() ==
373           term_if->then_instructions.get_tail())
374          return false;
375    }
376 
377    return true;
378 }
379 
380 ir_visitor_status
visit_leave(ir_loop * ir)381 loop_unroll_visitor::visit_leave(ir_loop *ir)
382 {
383    loop_variable_state *const ls = this->state->get(ir);
384 
385    /* If we've entered a loop that hasn't been analyzed, something really,
386     * really bad has happened.
387     */
388    if (ls == NULL) {
389       assert(ls != NULL);
390       return visit_continue;
391    }
392 
393    /* Limiting terminator may have iteration count of zero,
394     * this is a valid case because the loop may break during
395     * the first iteration.
396     */
397 
398    /* Remove the conditional break statements associated with all terminators
399     * that are associated with a fixed iteration count, except for the one
400     * associated with the limiting terminator--that one needs to stay, since
401     * it terminates the loop.  Exception: if the loop still has a normative
402     * bound, then that terminates the loop, so we don't even need the limiting
403     * terminator.
404     */
405    foreach_in_list_safe(loop_terminator, t, &ls->terminators) {
406       if (t->iterations < 0)
407          continue;
408 
409       exec_list *branch_instructions;
410       if (t != ls->limiting_terminator) {
411          ir_instruction *ir_if_last = (ir_instruction *)
412             t->ir->then_instructions.get_tail();
413          if (is_break(ir_if_last)) {
414             branch_instructions = &t->ir->else_instructions;
415          } else {
416             branch_instructions = &t->ir->then_instructions;
417             assert(is_break((ir_instruction *)
418                             t->ir->else_instructions.get_tail()));
419          }
420 
421          exec_list copy_list;
422          copy_list.make_empty();
423          clone_ir_list(ir, &copy_list, branch_instructions);
424 
425          t->ir->insert_before(&copy_list);
426          t->ir->remove();
427 
428          assert(ls->num_loop_jumps > 0);
429          ls->num_loop_jumps--;
430 
431          /* Also remove it from the terminator list */
432          t->remove();
433 
434          this->progress = true;
435       }
436    }
437 
438    if (ls->limiting_terminator == NULL) {
439       ir_instruction *last_ir =
440          (ir_instruction *) ir->body_instructions.get_tail();
441 
442       /* If a loop has no induction variable and the last instruction is
443        * a break, unroll the loop with a count of 1.  This is the classic
444        *
445        *    do {
446        *        // ...
447        *    } while (false)
448        *
449        * that is used to wrap multi-line macros.
450        *
451        * If num_loop_jumps is not zero, last_ir cannot be NULL... there has to
452        * be at least num_loop_jumps instructions in the loop.
453        */
454       if (ls->num_loop_jumps == 1 && is_break(last_ir)) {
455          last_ir->remove();
456 
457          simple_unroll(ir, 1);
458       }
459 
460       /* Don't try to unroll loops where the number of iterations is not known
461        * at compile-time.
462        */
463       return visit_continue;
464    }
465 
466    int iterations = ls->limiting_terminator->iterations;
467 
468    const int max_iterations = options->MaxUnrollIterations;
469 
470    /* Don't try to unroll loops that have zillions of iterations either.
471     */
472    if (iterations > max_iterations)
473       return visit_continue;
474 
475    /* Don't try to unroll nested loops and loops with a huge body.
476     */
477    loop_unroll_count count(&ir->body_instructions, ls, options);
478 
479    bool loop_too_large =
480       count.nested_loop || count.nodes * iterations > max_iterations * 5;
481 
482    if (loop_too_large && !count.unsupported_variable_indexing &&
483        !count.array_indexed_by_induction_var_with_exact_iterations)
484       return visit_continue;
485 
486    /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps.
487     * We'll be removing the limiting terminator before we unroll.
488     */
489    assert(ls->num_loop_jumps > 0);
490    unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1;
491 
492    if (predicted_num_loop_jumps > 1)
493       return visit_continue;
494 
495    if (predicted_num_loop_jumps == 0) {
496       simple_unroll(ir, iterations);
497       return visit_continue;
498    }
499 
500    ir_instruction *last_ir = (ir_instruction *) ir->body_instructions.get_tail();
501    assert(last_ir != NULL);
502 
503    if (is_break(last_ir)) {
504       /* If the only loop-jump is a break at the end of the loop, the loop
505        * will execute exactly once.  Remove the break and use the simple
506        * unroller with an iteration count of 1.
507        */
508       last_ir->remove();
509 
510       simple_unroll(ir, 1);
511       return visit_continue;
512    }
513 
514    /* Complex unrolling can only handle two terminators. One with an unknown
515     * iteration count and one with a known iteration count. We have already
516     * made sure we have a known iteration count above and removed any
517     * unreachable terminators with a known count. Here we make sure there
518     * isn't any additional unknown terminators, or any other jumps nested
519     * inside futher ifs.
520     */
521    if (ls->num_loop_jumps != 2 || ls->terminators.length() != 2)
522       return visit_continue;
523 
524    ir_instruction *first_ir =
525       (ir_instruction *) ir->body_instructions.get_head();
526 
527    unsigned term_count = 0;
528    bool first_term_then_continue = false;
529    foreach_in_list(loop_terminator, t, &ls->terminators) {
530       ir_if *ir_if = t->ir->as_if();
531       assert(ir_if != NULL);
532 
533       ir_instruction *ir_if_last =
534          (ir_instruction *) ir_if->then_instructions.get_tail();
535 
536       if (is_break(ir_if_last)) {
537          splice_post_if_instructions(ir_if, &ir_if->else_instructions);
538          ir_if_last->remove();
539          if (term_count == 1) {
540             bool ebi =
541                exit_branch_has_instructions(ls->limiting_terminator->ir,
542                                             first_term_then_continue);
543             complex_unroll(ir, iterations, false,
544                            first_ir->as_if() != ls->limiting_terminator->ir ||
545                            ebi,
546                            first_term_then_continue);
547             return visit_continue;
548          }
549       } else {
550          ir_if_last =
551             (ir_instruction *) ir_if->else_instructions.get_tail();
552 
553          assert(is_break(ir_if_last));
554          if (is_break(ir_if_last)) {
555             splice_post_if_instructions(ir_if, &ir_if->then_instructions);
556             ir_if_last->remove();
557             if (term_count == 1) {
558                bool ebi =
559                   exit_branch_has_instructions(ls->limiting_terminator->ir,
560                                                first_term_then_continue);
561                complex_unroll(ir, iterations, true,
562                               first_ir->as_if() != ls->limiting_terminator->ir ||
563                               ebi,
564                               first_term_then_continue);
565                return visit_continue;
566             } else {
567                first_term_then_continue = true;
568             }
569          }
570       }
571 
572       term_count++;
573    }
574 
575    /* Did not find the break statement.  It must be in a complex if-nesting,
576     * so don't try to unroll.
577     */
578    return visit_continue;
579 }
580 
581 
582 bool
unroll_loops(exec_list * instructions,loop_state * ls,const struct gl_shader_compiler_options * options)583 unroll_loops(exec_list *instructions, loop_state *ls,
584              const struct gl_shader_compiler_options *options)
585 {
586    loop_unroll_visitor v(ls, options);
587 
588    v.run(instructions);
589 
590    return v.progress;
591 }
592