1 /*
2  * Copyright (C) 2021 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 /* This pass lowers array accesses to SSA.
25  *
26  * After this pass, instructions writing arrays implicitly read the contents of
27  * the array defined in instr->dsts[0]->def (possibly a phi node), perform the
28  * operation, and store to instr->dsts[0].
29  *
30  * This makes arrays appear like "normal" SSA values, even if the false
31  * dependencies mean that they always stay in CSSA form (i.e. able to removed
32  * out-of-SSA with no copies.) While hopefully they shouldn't induce copies in
33  * most cases, we can't make that guarantee while also splitting spilling from
34  * RA and guaranteeing a certain number of registers are used, so we have to
35  * insert the phi nodes to be able to know when copying should happen.
36  *
37  * The implementation is based on the idea in "Simple and Efficient Construction
38  * of Static Single Assignment Form" of scanning backwards to find the
39  * definition. However, since we're not doing this on-the-fly we can simplify
40  * things a little by doing a pre-pass to get the last definition of each array
41  * in each block. Then we optimize trivial phis in a separate pass, "on the fly"
42  * so that we don't have to rewrite (and keep track of) users.
43  */
44 
45 #include <stdlib.h>
46 #include "ir3.h"
47 
48 struct array_state {
49    struct ir3_register *live_in_definition;
50    struct ir3_register *live_out_definition;
51    bool constructed;
52    bool optimized;
53 };
54 
55 struct array_ctx {
56    struct array_state *states;
57    struct ir3 *ir;
58    unsigned array_count;
59 };
60 
61 static struct array_state *
get_state(struct array_ctx * ctx,struct ir3_block * block,unsigned id)62 get_state(struct array_ctx *ctx, struct ir3_block *block, unsigned id)
63 {
64    return &ctx->states[ctx->array_count * block->index + id];
65 }
66 
67 static struct ir3_register *read_value_beginning(struct array_ctx *ctx,
68                                                  struct ir3_block *block,
69                                                  struct ir3_array *arr);
70 
71 static struct ir3_register *
read_value_end(struct array_ctx * ctx,struct ir3_block * block,struct ir3_array * arr)72 read_value_end(struct array_ctx *ctx, struct ir3_block *block,
73                struct ir3_array *arr)
74 {
75    struct array_state *state = get_state(ctx, block, arr->id);
76    if (state->live_out_definition)
77       return state->live_out_definition;
78 
79    state->live_out_definition = read_value_beginning(ctx, block, arr);
80    return state->live_out_definition;
81 }
82 
83 /* Roughly equivalent to readValueRecursive from the paper: */
84 static struct ir3_register *
read_value_beginning(struct array_ctx * ctx,struct ir3_block * block,struct ir3_array * arr)85 read_value_beginning(struct array_ctx *ctx, struct ir3_block *block,
86                      struct ir3_array *arr)
87 {
88    struct array_state *state = get_state(ctx, block, arr->id);
89 
90    if (state->constructed)
91       return state->live_in_definition;
92 
93    if (block->predecessors_count == 0) {
94       state->constructed = true;
95       return NULL;
96    }
97 
98    if (block->predecessors_count == 1) {
99       state->live_in_definition =
100          read_value_end(ctx, block->predecessors[0], arr);
101       state->constructed = true;
102       return state->live_in_definition;
103    }
104 
105    unsigned flags = IR3_REG_ARRAY | (arr->half ? IR3_REG_HALF : 0);
106    struct ir3_instruction *phi =
107       ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count);
108    list_del(&phi->node);
109    list_add(&phi->node, &block->instr_list);
110 
111    struct ir3_register *dst = __ssa_dst(phi);
112    dst->flags |= flags;
113    dst->array.id = arr->id;
114    dst->size = arr->length;
115 
116    state->live_in_definition = phi->dsts[0];
117    state->constructed = true;
118 
119    for (unsigned i = 0; i < block->predecessors_count; i++) {
120       struct ir3_register *src =
121          read_value_end(ctx, block->predecessors[i], arr);
122       struct ir3_register *src_reg;
123       if (src) {
124          src_reg = __ssa_src(phi, src->instr, flags);
125       } else {
126          src_reg = ir3_src_create(phi, INVALID_REG, flags | IR3_REG_SSA);
127       }
128       src_reg->array.id = arr->id;
129       src_reg->size = arr->length;
130    }
131    return phi->dsts[0];
132 }
133 
134 static struct ir3_register *
remove_trivial_phi(struct ir3_instruction * phi)135 remove_trivial_phi(struct ir3_instruction *phi)
136 {
137    /* Break cycles */
138    if (phi->data)
139       return phi->data;
140 
141    phi->data = phi->dsts[0];
142 
143    struct ir3_register *unique_def = NULL;
144    bool unique = true;
145    for (unsigned i = 0; i < phi->block->predecessors_count; i++) {
146       struct ir3_register *src = phi->srcs[i];
147 
148       /* If there are any undef sources, then the remaining sources may not
149        * dominate the phi node, even if they are all equal. So we need to
150        * bail out in this case.
151        *
152        * This seems to be a bug in the original paper.
153        */
154       if (!src->def) {
155          unique = false;
156          break;
157       }
158 
159       struct ir3_instruction *src_instr = src->def->instr;
160 
161       /* phi sources which point to the phi itself don't count for
162        * figuring out if the phi is trivial
163        */
164       if (src_instr == phi)
165          continue;
166 
167       if (src_instr->opc == OPC_META_PHI) {
168          src->def = remove_trivial_phi(src->def->instr);
169       }
170 
171       if (unique_def) {
172          if (unique_def != src->def) {
173             unique = false;
174             break;
175          }
176       } else {
177          unique_def = src->def;
178       }
179    }
180 
181    if (unique) {
182       phi->data = unique_def;
183       return unique_def;
184    } else {
185       return phi->dsts[0];
186    }
187 }
188 
189 static struct ir3_register *
lookup_value(struct ir3_register * reg)190 lookup_value(struct ir3_register *reg)
191 {
192    if (reg->instr->opc == OPC_META_PHI)
193       return reg->instr->data;
194    return reg;
195 }
196 
197 static struct ir3_register *
lookup_live_in(struct array_ctx * ctx,struct ir3_block * block,unsigned id)198 lookup_live_in(struct array_ctx *ctx, struct ir3_block *block, unsigned id)
199 {
200    struct array_state *state = get_state(ctx, block, id);
201    if (state->live_in_definition)
202       return lookup_value(state->live_in_definition);
203 
204    return NULL;
205 }
206 
207 bool
ir3_array_to_ssa(struct ir3 * ir)208 ir3_array_to_ssa(struct ir3 *ir)
209 {
210    struct array_ctx ctx = {};
211 
212    foreach_array (array, &ir->array_list) {
213       ctx.array_count = MAX2(ctx.array_count, array->id + 1);
214    }
215 
216    if (ctx.array_count == 0)
217       return false;
218 
219    unsigned i = 0;
220    foreach_block (block, &ir->block_list) {
221       block->index = i++;
222    }
223 
224    ctx.ir = ir;
225    ctx.states = calloc(ctx.array_count * i, sizeof(struct array_state));
226 
227    foreach_block (block, &ir->block_list) {
228       foreach_instr (instr, &block->instr_list) {
229          foreach_dst (dst, instr) {
230             if (dst->flags & IR3_REG_ARRAY) {
231                struct array_state *state =
232                   get_state(&ctx, block, dst->array.id);
233                state->live_out_definition = dst;
234             }
235          }
236       }
237    }
238 
239    foreach_block (block, &ir->block_list) {
240       foreach_instr (instr, &block->instr_list) {
241          if (instr->opc == OPC_META_PHI)
242             continue;
243 
244          foreach_dst (reg, instr) {
245             if ((reg->flags & IR3_REG_ARRAY) && !reg->tied) {
246                struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id);
247 
248                /* Construct any phi nodes necessary to read this value */
249                read_value_beginning(&ctx, block, arr);
250             }
251          }
252          foreach_src (reg, instr) {
253             if ((reg->flags & IR3_REG_ARRAY) && !reg->def) {
254                struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id);
255 
256                /* Construct any phi nodes necessary to read this value */
257                read_value_beginning(&ctx, block, arr);
258             }
259          }
260       }
261    }
262 
263    foreach_block (block, &ir->block_list) {
264       foreach_instr_safe (instr, &block->instr_list) {
265          if (instr->opc == OPC_META_PHI)
266             remove_trivial_phi(instr);
267          else
268             break;
269       }
270    }
271 
272    foreach_block (block, &ir->block_list) {
273       foreach_instr_safe (instr, &block->instr_list) {
274          if (instr->opc == OPC_META_PHI) {
275             if (!(instr->flags & IR3_REG_ARRAY))
276                continue;
277             if (instr->data != instr->dsts[0]) {
278                list_del(&instr->node);
279                continue;
280             }
281             for (unsigned i = 0; i < instr->srcs_count; i++) {
282                instr->srcs[i] = lookup_value(instr->srcs[i]);
283             }
284          } else {
285             foreach_dst (reg, instr) {
286                if ((reg->flags & IR3_REG_ARRAY)) {
287                   if (!reg->tied) {
288                      struct ir3_register *def =
289                         lookup_live_in(&ctx, block, reg->array.id);
290                      if (def)
291                         ir3_reg_set_last_array(instr, reg, def);
292                   }
293                   reg->flags |= IR3_REG_SSA;
294                }
295             }
296             foreach_src (reg, instr) {
297                if ((reg->flags & IR3_REG_ARRAY)) {
298                   /* It is assumed that before calling
299                    * ir3_array_to_ssa(), reg->def was set to the
300                    * previous writer of the array within the current
301                    * block or NULL if none.
302                    */
303                   if (!reg->def) {
304                      reg->def = lookup_live_in(&ctx, block, reg->array.id);
305                   }
306                   reg->flags |= IR3_REG_SSA;
307                }
308             }
309          }
310       }
311    }
312 
313    free(ctx.states);
314    return true;
315 }
316