1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Connor Abbott (cwabbott0@gmail.com)
25  *
26  */
27 
28 #include "nir.h"
29 #include "nir_builder.h"
30 #include "nir_phi_builder.h"
31 #include "nir_vla.h"
32 
33 struct regs_to_ssa_state {
34    nir_shader *shader;
35 
36    struct nir_phi_builder_value **values;
37 };
38 
39 static bool
rewrite_src(nir_src * src,void * _state)40 rewrite_src(nir_src *src, void *_state)
41 {
42    struct regs_to_ssa_state *state = _state;
43 
44    if (src->is_ssa)
45       return true;
46 
47    nir_instr *instr = src->parent_instr;
48    nir_register *reg = src->reg.reg;
49    struct nir_phi_builder_value *value = state->values[reg->index];
50    if (!value)
51       return true;
52 
53    nir_block *block;
54    if (instr->type == nir_instr_type_phi) {
55       nir_phi_src *phi_src = exec_node_data(nir_phi_src, src, src);
56       block = phi_src->pred;
57    } else {
58       block = instr->block;
59    }
60 
61    nir_ssa_def *def = nir_phi_builder_value_get_block_def(value, block);
62    nir_instr_rewrite_src(instr, src, nir_src_for_ssa(def));
63 
64    return true;
65 }
66 
67 static void
rewrite_if_condition(nir_if * nif,struct regs_to_ssa_state * state)68 rewrite_if_condition(nir_if *nif, struct regs_to_ssa_state *state)
69 {
70    if (nif->condition.is_ssa)
71       return;
72 
73    nir_block *block = nir_cf_node_as_block(nir_cf_node_prev(&nif->cf_node));
74    nir_register *reg = nif->condition.reg.reg;
75    struct nir_phi_builder_value *value = state->values[reg->index];
76    if (!value)
77       return;
78 
79    nir_ssa_def *def = nir_phi_builder_value_get_block_def(value, block);
80    nir_if_rewrite_condition(nif, nir_src_for_ssa(def));
81 }
82 
83 static bool
rewrite_dest(nir_dest * dest,void * _state)84 rewrite_dest(nir_dest *dest, void *_state)
85 {
86    struct regs_to_ssa_state *state = _state;
87 
88    if (dest->is_ssa)
89       return true;
90 
91    nir_instr *instr = dest->reg.parent_instr;
92    nir_register *reg = dest->reg.reg;
93    struct nir_phi_builder_value *value = state->values[reg->index];
94    if (!value)
95       return true;
96 
97    list_del(&dest->reg.def_link);
98    nir_ssa_dest_init(instr, dest, reg->num_components,
99                      reg->bit_size, NULL);
100 
101    nir_phi_builder_value_set_block_def(value, instr->block, &dest->ssa);
102 
103    return true;
104 }
105 
106 static void
rewrite_alu_instr(nir_alu_instr * alu,struct regs_to_ssa_state * state)107 rewrite_alu_instr(nir_alu_instr *alu, struct regs_to_ssa_state *state)
108 {
109    nir_foreach_src(&alu->instr, rewrite_src, state);
110 
111    if (alu->dest.dest.is_ssa)
112       return;
113 
114    nir_register *reg = alu->dest.dest.reg.reg;
115    struct nir_phi_builder_value *value = state->values[reg->index];
116    if (!value)
117       return;
118 
119    unsigned write_mask = alu->dest.write_mask;
120    if (write_mask == (1 << reg->num_components) - 1) {
121       /* This is the simple case where the instruction writes all the
122        * components.  We can handle that the same as any other destination.
123        */
124       rewrite_dest(&alu->dest.dest, state);
125       return;
126    }
127 
128    /* Calculate the number of components the final instruction, which for
129     * per-component things is the number of output components of the
130     * instruction and non-per-component things is the number of enabled
131     * channels in the write mask.
132     */
133    unsigned num_components;
134    uint8_t vec_swizzle[NIR_MAX_VEC_COMPONENTS];
135    for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)
136       vec_swizzle[i] = i;
137 
138    if (nir_op_infos[alu->op].output_size == 0) {
139       /* Figure out the swizzle we need on the vecN operation and compute
140        * the number of components in the SSA def at the same time.
141        */
142       num_components = 0;
143       for (unsigned index = 0; index < 4; index++) {
144          if (write_mask & (1 << index))
145             vec_swizzle[index] = num_components++;
146       }
147 
148       /* When we change the output writemask, we need to change
149        * the swizzles for per-component inputs too
150        */
151       for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
152          if (nir_op_infos[alu->op].input_sizes[i] != 0)
153             continue;
154 
155          /*
156           * We keep two indices:
157           * 1. The index of the original (non-SSA) component
158           * 2. The index of the post-SSA, compacted, component
159           *
160           * We need to map the swizzle component at index 1 to the swizzle
161           * component at index 2.  Since index 1 is always larger than
162           * index 2, we can do it in a single loop.
163           */
164 
165          unsigned ssa_index = 0;
166          for (unsigned index = 0; index < 4; index++) {
167             if (!((write_mask >> index) & 1))
168                continue;
169 
170             alu->src[i].swizzle[ssa_index++] = alu->src[i].swizzle[index];
171          }
172          assert(ssa_index == num_components);
173       }
174    } else {
175       num_components = nir_op_infos[alu->op].output_size;
176    }
177    assert(num_components <= 4);
178 
179    alu->dest.write_mask = (1 << num_components) - 1;
180    list_del(&alu->dest.dest.reg.def_link);
181    nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components,
182                      reg->bit_size, NULL);
183 
184    nir_op vecN_op = nir_op_vec(reg->num_components);
185 
186    nir_alu_instr *vec = nir_alu_instr_create(state->shader, vecN_op);
187 
188    nir_ssa_def *old_src =
189       nir_phi_builder_value_get_block_def(value, alu->instr.block);
190    nir_ssa_def *new_src = &alu->dest.dest.ssa;
191 
192    for (unsigned i = 0; i < reg->num_components; i++) {
193       if (write_mask & (1 << i)) {
194          vec->src[i].src = nir_src_for_ssa(new_src);
195          vec->src[i].swizzle[0] = vec_swizzle[i];
196       } else {
197          vec->src[i].src = nir_src_for_ssa(old_src);
198          vec->src[i].swizzle[0] = i;
199       }
200    }
201 
202    nir_ssa_dest_init(&vec->instr, &vec->dest.dest, reg->num_components,
203                      reg->bit_size, NULL);
204    nir_instr_insert(nir_after_instr(&alu->instr), &vec->instr);
205 
206    nir_phi_builder_value_set_block_def(value, alu->instr.block,
207                                        &vec->dest.dest.ssa);
208 }
209 
210 bool
nir_lower_regs_to_ssa_impl(nir_function_impl * impl)211 nir_lower_regs_to_ssa_impl(nir_function_impl *impl)
212 {
213    if (exec_list_is_empty(&impl->registers))
214       return false;
215 
216    nir_metadata_require(impl, nir_metadata_block_index |
217                               nir_metadata_dominance);
218    nir_index_local_regs(impl);
219 
220    void *dead_ctx = ralloc_context(NULL);
221    struct regs_to_ssa_state state;
222    state.shader = impl->function->shader;
223    state.values = ralloc_array(dead_ctx, struct nir_phi_builder_value *,
224                                impl->reg_alloc);
225 
226    struct nir_phi_builder *phi_build = nir_phi_builder_create(impl);
227 
228    const unsigned block_set_words = BITSET_WORDS(impl->num_blocks);
229    BITSET_WORD *defs = ralloc_array(dead_ctx, BITSET_WORD, block_set_words);
230 
231    nir_foreach_register(reg, &impl->registers) {
232       if (reg->num_array_elems != 0) {
233          /* This pass only really works on "plain" registers.  If it's a
234           * packed or array register, just set the value to NULL so that the
235           * rewrite portion of the pass will know to ignore it.
236           */
237          state.values[reg->index] = NULL;
238          continue;
239       }
240 
241       memset(defs, 0, block_set_words * sizeof(*defs));
242 
243       nir_foreach_def(dest, reg)
244          BITSET_SET(defs, dest->reg.parent_instr->block->index);
245 
246       state.values[reg->index] =
247          nir_phi_builder_add_value(phi_build, reg->num_components,
248                                    reg->bit_size, defs);
249    }
250 
251    nir_foreach_block(block, impl) {
252       nir_foreach_instr(instr, block) {
253          switch (instr->type) {
254          case nir_instr_type_alu:
255             rewrite_alu_instr(nir_instr_as_alu(instr), &state);
256             break;
257 
258          case nir_instr_type_phi:
259             /* We rewrite sources as a separate pass */
260             nir_foreach_dest(instr, rewrite_dest, &state);
261             break;
262 
263          default:
264             nir_foreach_src(instr, rewrite_src, &state);
265             nir_foreach_dest(instr, rewrite_dest, &state);
266          }
267       }
268 
269       nir_if *following_if = nir_block_get_following_if(block);
270       if (following_if)
271          rewrite_if_condition(following_if, &state);
272 
273       /* Handle phi sources that source from this block.  We have to do this
274        * as a separate pass because the phi builder assumes that uses and
275        * defs are processed in an order that respects dominance.  When we have
276        * loops, a phi source may be a back-edge so we have to handle it as if
277        * it were one of the last instructions in the predecessor block.
278        */
279       nir_foreach_phi_src_leaving_block(block, rewrite_src, &state);
280    }
281 
282    nir_phi_builder_finish(phi_build);
283 
284    nir_foreach_register_safe(reg, &impl->registers) {
285       if (state.values[reg->index]) {
286          assert(list_is_empty(&reg->uses));
287          assert(list_is_empty(&reg->if_uses));
288          assert(list_is_empty(&reg->defs));
289          exec_node_remove(&reg->node);
290       }
291    }
292 
293    ralloc_free(dead_ctx);
294 
295    nir_metadata_preserve(impl, nir_metadata_block_index |
296                                nir_metadata_dominance);
297    return true;
298 }
299 
300 bool
nir_lower_regs_to_ssa(nir_shader * shader)301 nir_lower_regs_to_ssa(nir_shader *shader)
302 {
303    bool progress = false;
304 
305    nir_foreach_function(function, shader) {
306       if (function->impl)
307          progress |= nir_lower_regs_to_ssa_impl(function->impl);
308    }
309 
310    return progress;
311 }
312