1 /*
2  * Copyright (c) 2019 Connor Abbott
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include "gpir.h"
26 
27 /* Here we perform a few optimizations that can't currently be done in NIR:
28  *
29  * - Optimize the result of a conditional break/continue. In NIR something
30  *   like:
31  *
32  * loop {
33  *    ...
34  *    if (cond)
35  *       continue;
36  *
37  * would get lowered to:
38  *
39  * block_0:
40  * ...
41  * block_1:
42  * branch_cond !cond block_3
43  * block_2:
44  * branch_uncond block_0
45  * block_3:
46  * ...
47  *
48  *   We recognize the conditional branch skipping over the unconditional
49  *   branch, and turn it into:
50  *
51  * block_0:
52  * ...
53  * block_1:
54  * branch_cond cond block_0
55  * block_2:
56  * block_3:
57  * ...
58  *
59  * - Optimize away nots of comparisons as produced by lowering ifs to
60  *   branches, and nots of nots produced by the above optimization.
61  *
62  * - DCE
63  */
64 
65 static void
optimize_branches(gpir_compiler * comp)66 optimize_branches(gpir_compiler *comp)
67 {
68    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
69       /* Look for a block with a single unconditional branch. */
70       if (!list_is_singular(&block->node_list))
71          continue;
72 
73       gpir_node *node = list_first_entry(&block->node_list, gpir_node, list);
74       if (node->op != gpir_op_branch_uncond)
75          continue;
76 
77       gpir_block *target = gpir_node_to_branch(node)->dest;
78 
79       /* Find the previous block */
80       if (block->list.prev == &comp->block_list)
81          continue;
82 
83       gpir_block *prev_block = LIST_ENTRY(gpir_block, block->list.prev, list);
84       if (list_is_empty(&prev_block->node_list))
85          continue;
86 
87       /* Previous block must end with a conditional branch */
88       gpir_node *prev_block_last =
89          list_last_entry(&prev_block->node_list, gpir_node, list);
90       if (prev_block_last->op != gpir_op_branch_cond)
91          continue;
92 
93       /* That branch must branch to the block after this */
94       gpir_branch_node *prev_branch = gpir_node_to_branch(prev_block_last);
95       gpir_block *prev_target = prev_branch->dest;
96 
97       if (&prev_target->list != block->list.next)
98          continue;
99 
100       /* Hooray! Invert the conditional branch and change the target */
101       gpir_alu_node *cond = gpir_node_create(prev_block, gpir_op_not);
102       cond->children[0] = prev_branch->cond;
103       cond->num_child = 1;
104       gpir_node_add_dep(&cond->node, cond->children[0], GPIR_DEP_INPUT);
105       list_addtail(&cond->node.list, &prev_block_last->list);
106       gpir_node_insert_child(prev_block_last, prev_branch->cond, &cond->node);
107       prev_branch->dest = target;
108       prev_block->successors[1] = target;
109 
110       /* Delete the branch */
111       list_del(&node->list);
112       block->successors[0] = LIST_ENTRY(gpir_block, block->list.next, list);
113    }
114 }
115 
116 static void
optimize_not(gpir_compiler * comp)117 optimize_not(gpir_compiler *comp)
118 {
119    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
120       list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
121          if (node->op != gpir_op_not)
122             continue;
123 
124          gpir_alu_node *alu = gpir_node_to_alu(node);
125 
126          gpir_node *replace = NULL;
127          if (alu->children[0]->op == gpir_op_not) {
128             /* (not (not a)) -> a */
129             gpir_alu_node *child = gpir_node_to_alu(alu->children[0]);
130             replace = child->children[0];
131          } else if (alu->children[0]->op == gpir_op_ge ||
132                     alu->children[0]->op == gpir_op_lt) {
133             /* (not (ge a, b)) -> (lt a, b) and
134              * (not (lt a, b)) -> (ge a, b)
135              */
136             gpir_alu_node *child = gpir_node_to_alu(alu->children[0]);
137             gpir_op op = alu->children[0]->op == gpir_op_ge ?
138                gpir_op_lt : gpir_op_ge;
139             gpir_alu_node *new = gpir_node_create(block, op);
140             new->children[0] = child->children[0];
141             new->children[1] = child->children[1];
142             new->num_child = 2;
143             gpir_node_add_dep(&new->node, new->children[0], GPIR_DEP_INPUT);
144             gpir_node_add_dep(&new->node, new->children[1], GPIR_DEP_INPUT);
145             list_addtail(&new->node.list, &alu->children[0]->list);
146             replace = &new->node;
147          }
148 
149          if (replace) {
150             gpir_node_replace_succ(replace, node);
151          }
152       }
153    }
154 }
155 
156 /* Does what it says. In addition to removing unused nodes from the not
157  * optimization above, we need this to remove unused load_const nodes which
158  * were created from store_output intrinsics in NIR, since we ignore the
159  * offset.
160  */
161 
162 static void
dead_code_eliminate(gpir_compiler * comp)163 dead_code_eliminate(gpir_compiler *comp)
164 {
165    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
166       list_for_each_entry_safe_rev(gpir_node, node, &block->node_list, list) {
167          if (node->type != gpir_node_type_store &&
168              node->type != gpir_node_type_branch &&
169              list_is_empty(&node->succ_list)) {
170             gpir_node_delete(node);
171          }
172       }
173    }
174 
175    /* Kill all the writes to regs that are never read. All the known
176     * instances of these are coming from the cycle-breaking register
177     * created in out-of-SSA. See resolve_parallel_copy() in nir_from_ssa.c
178     * Since we kill redundant movs when we translate nir into gpir, it
179     * results in this reg being written, but never read.
180     */
181    BITSET_WORD *regs = rzalloc_array(comp, BITSET_WORD, comp->cur_reg);
182    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
183       list_for_each_entry(gpir_node, node, &block->node_list, list) {
184          if (node->op != gpir_op_load_reg)
185             continue;
186          gpir_load_node *load = gpir_node_to_load(node);
187          BITSET_SET(regs, load->reg->index);
188       }
189    }
190 
191    list_for_each_entry(gpir_block, block, &comp->block_list, list) {
192       list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {
193          if (node->op != gpir_op_store_reg)
194             continue;
195          gpir_store_node *store = gpir_node_to_store(node);
196          if (!BITSET_TEST(regs, store->reg->index))
197             gpir_node_delete(node);
198       }
199    }
200 
201    ralloc_free(regs);
202 }
203 
204 bool
gpir_optimize(gpir_compiler * comp)205 gpir_optimize(gpir_compiler *comp)
206 {
207    optimize_branches(comp);
208    optimize_not(comp);
209    dead_code_eliminate(comp);
210 
211    gpir_debug("after optimization\n");
212    gpir_node_print_prog_seq(comp);
213 
214    return true;
215 }
216 
217