1 /*
2  * Copyright (C) 2020 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #include "compiler.h"
28 #include "bi_print.h"
29 #include "panfrost/util/lcra.h"
30 #include "util/u_memory.h"
31 
32 static void
bi_compute_interference(bi_context * ctx,struct lcra_state * l)33 bi_compute_interference(bi_context *ctx, struct lcra_state *l)
34 {
35         bi_compute_liveness(ctx);
36 
37         bi_foreach_block(ctx, _blk) {
38                 bi_block *blk = (bi_block *) _blk;
39                 uint16_t *live = mem_dup(_blk->live_out, l->node_count * sizeof(uint16_t));
40 
41                 bi_foreach_instr_in_block_rev(blk, ins) {
42                         /* Mark all registers live after the instruction as
43                          * interfering with the destination */
44 
45                         if (ins->dest && (ins->dest < l->node_count)) {
46                                 for (unsigned i = 1; i < l->node_count; ++i) {
47                                         if (live[i])
48                                                 lcra_add_node_interference(l, ins->dest, bi_writemask(ins), i, live[i]);
49                                 }
50                         }
51 
52                         /* Update live_in */
53                         bi_liveness_ins_update(live, ins, l->node_count);
54                 }
55 
56                 free(live);
57         }
58 }
59 
60 enum {
61         BI_REG_CLASS_WORK = 0,
62 } bi_reg_class;
63 
64 static struct lcra_state *
bi_allocate_registers(bi_context * ctx,bool * success)65 bi_allocate_registers(bi_context *ctx, bool *success)
66 {
67         unsigned node_count = bi_max_temp(ctx);
68 
69         struct lcra_state *l =
70                 lcra_alloc_equations(node_count, 1);
71 
72         l->class_start[BI_REG_CLASS_WORK] = 0;
73         l->class_size[BI_REG_CLASS_WORK] = 64 * 4; /* R0 - R63, all 32-bit */
74 
75         bi_foreach_instr_global(ctx, ins) {
76                 unsigned dest = ins->dest;
77 
78                 if (!dest || (dest >= node_count))
79                         continue;
80 
81                 l->class[dest] = BI_REG_CLASS_WORK;
82                 lcra_set_alignment(l, dest, 2, 16); /* 2^2 = 4 */
83                 lcra_restrict_range(l, dest, 4);
84         }
85 
86         bi_compute_interference(ctx, l);
87 
88         *success = lcra_solve(l);
89 
90         return l;
91 }
92 
93 static unsigned
bi_reg_from_index(struct lcra_state * l,unsigned index,unsigned offset)94 bi_reg_from_index(struct lcra_state *l, unsigned index, unsigned offset)
95 {
96         /* Did we run RA for this index at all */
97         if (index >= l->node_count)
98                 return index;
99 
100         /* LCRA didn't bother solving this index (how lazy!) */
101         signed solution = l->solutions[index];
102         if (solution < 0)
103                 return index;
104 
105         assert((solution & 0x3) == 0);
106         unsigned reg = solution / 4;
107         reg += offset;
108 
109         return BIR_INDEX_REGISTER | reg;
110 }
111 
112 static void
bi_adjust_src_ra(bi_instruction * ins,struct lcra_state * l,unsigned src)113 bi_adjust_src_ra(bi_instruction *ins, struct lcra_state *l, unsigned src)
114 {
115         if (ins->src[src] >= l->node_count)
116                 return;
117 
118         bool vector = (bi_class_props[ins->type] & BI_VECTOR) && src == 0;
119         unsigned offset = 0;
120 
121         if (vector) {
122                 /* TODO: Do we do anything here? */
123         } else {
124                 /* Use the swizzle as component select */
125                 unsigned components = bi_get_component_count(ins, src);
126 
127                 nir_alu_type T = ins->src_types[src];
128                 unsigned size = nir_alu_type_get_type_size(T);
129 
130                 /* TODO: 64-bit? */
131                 unsigned components_per_word = MAX2(32 / size, 1);
132 
133                 for (unsigned i = 0; i < components; ++i) {
134                         unsigned off = ins->swizzle[src][i] / components_per_word;
135 
136                         /* We can't cross register boundaries in a swizzle */
137                         if (i == 0)
138                                 offset = off;
139                         else
140                                 assert(off == offset);
141 
142                         ins->swizzle[src][i] %= components_per_word;
143                 }
144         }
145 
146         ins->src[src] = bi_reg_from_index(l, ins->src[src], offset);
147 }
148 
149 static void
bi_adjust_dest_ra(bi_instruction * ins,struct lcra_state * l)150 bi_adjust_dest_ra(bi_instruction *ins, struct lcra_state *l)
151 {
152         if (ins->dest >= l->node_count)
153                 return;
154 
155         ins->dest = bi_reg_from_index(l, ins->dest, ins->dest_offset);
156         ins->dest_offset = 0;
157 }
158 
159 static void
bi_install_registers(bi_context * ctx,struct lcra_state * l)160 bi_install_registers(bi_context *ctx, struct lcra_state *l)
161 {
162         bi_foreach_instr_global(ctx, ins) {
163                 bi_adjust_dest_ra(ins, l);
164 
165                 bi_foreach_src(ins, s)
166                         bi_adjust_src_ra(ins, l, s);
167         }
168 }
169 
170 void
bi_register_allocate(bi_context * ctx)171 bi_register_allocate(bi_context *ctx)
172 {
173         struct lcra_state *l = NULL;
174         bool success = false;
175 
176         do {
177                 if (l) {
178                         lcra_free(l);
179                         l = NULL;
180                 }
181 
182                 bi_invalidate_liveness(ctx);
183                 l = bi_allocate_registers(ctx, &success);
184 
185                 /* TODO: Spilling */
186                 assert(success);
187         } while(!success);
188 
189         bi_install_registers(ctx, l);
190 
191         lcra_free(l);
192 }
193