1 /*
2  * Copyright © 2012 Intel Corporation
3  * Copyright © 2016 Broadcom
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #define MAX_INSTRUCTION (1 << 30)
26 
27 #include "util/ralloc.h"
28 #include "util/register_allocate.h"
29 #include "vc4_context.h"
30 #include "vc4_qir.h"
31 
32 struct partial_update_state {
33         struct qinst *insts[4];
34         uint8_t channels;
35 };
36 
37 static int
qir_reg_to_var(struct qreg reg)38 qir_reg_to_var(struct qreg reg)
39 {
40         if (reg.file == QFILE_TEMP)
41                 return reg.index;
42 
43         return -1;
44 }
45 
46 static void
qir_setup_use(struct vc4_compile * c,struct qblock * block,int ip,struct qreg src)47 qir_setup_use(struct vc4_compile *c, struct qblock *block, int ip,
48               struct qreg src)
49 {
50         int var = qir_reg_to_var(src);
51         if (var == -1)
52                 return;
53 
54         c->temp_start[var] = MIN2(c->temp_start[var], ip);
55         c->temp_end[var] = MAX2(c->temp_end[var], ip);
56 
57         /* The use[] bitset marks when the block makes
58          * use of a variable without having completely
59          * defined that variable within the block.
60          */
61         if (!BITSET_TEST(block->def, var))
62                 BITSET_SET(block->use, var);
63 }
64 
65 static struct partial_update_state *
get_partial_update_state(struct hash_table * partial_update_ht,struct qinst * inst)66 get_partial_update_state(struct hash_table *partial_update_ht,
67                          struct qinst *inst)
68 {
69         struct hash_entry *entry =
70                 _mesa_hash_table_search(partial_update_ht,
71                                         &inst->dst.index);
72         if (entry)
73                 return entry->data;
74 
75         struct partial_update_state *state =
76                 rzalloc(partial_update_ht, struct partial_update_state);
77 
78         _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);
79 
80         return state;
81 }
82 
83 static void
qir_setup_def(struct vc4_compile * c,struct qblock * block,int ip,struct hash_table * partial_update_ht,struct qinst * inst)84 qir_setup_def(struct vc4_compile *c, struct qblock *block, int ip,
85               struct hash_table *partial_update_ht, struct qinst *inst)
86 {
87         /* The def[] bitset marks when an initialization in a
88          * block completely screens off previous updates of
89          * that variable.
90          */
91         int var = qir_reg_to_var(inst->dst);
92         if (var == -1)
93                 return;
94 
95         c->temp_start[var] = MIN2(c->temp_start[var], ip);
96         c->temp_end[var] = MAX2(c->temp_end[var], ip);
97 
98         /* If we've already tracked this as a def, or already used it within
99          * the block, there's nothing to do.
100          */
101         if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
102                 return;
103 
104         /* Easy, common case: unconditional full register update.
105          *
106          * We treat conditioning on the exec mask as the same as not being
107          * conditional.  This makes sure that if the register gets set on
108          * either side of an if, it is treated as being screened off before
109          * the if.  Otherwise, if there was no intervening def, its live
110          * interval doesn't extend back to the start of he program, and if too
111          * many registers did that we'd fail to register allocate.
112          */
113         if ((inst->cond == QPU_COND_ALWAYS ||
114              inst->cond_is_exec_mask) && !inst->dst.pack) {
115                 BITSET_SET(block->def, var);
116                 return;
117         }
118 
119         /* Finally, look at the condition code and packing and mark it as a
120          * def.  We need to make sure that we understand sequences
121          * instructions like:
122          *
123          *     mov.zs t0, t1
124          *     mov.zc t0, t2
125          *
126          * or:
127          *
128          *     mmov t0.8a, t1
129          *     mmov t0.8b, t2
130          *     mmov t0.8c, t3
131          *     mmov t0.8d, t4
132          *
133          * as defining the temp within the block, because otherwise dst's live
134          * range will get extended up the control flow to the top of the
135          * program.
136          */
137         struct partial_update_state *state =
138                 get_partial_update_state(partial_update_ht, inst);
139         uint8_t mask = qir_channels_written(inst);
140 
141         if (inst->cond == QPU_COND_ALWAYS) {
142                 state->channels |= mask;
143         } else {
144                 for (int i = 0; i < 4; i++) {
145                         if (!(mask & (1 << i)))
146                                 continue;
147 
148                         if (state->insts[i] &&
149                             state->insts[i]->cond ==
150                             qpu_cond_complement(inst->cond))
151                                 state->channels |= 1 << i;
152                         else
153                                 state->insts[i] = inst;
154                 }
155         }
156 
157         if (state->channels == 0xf)
158                 BITSET_SET(block->def, var);
159 }
160 
161 static void
sf_state_clear(struct hash_table * partial_update_ht)162 sf_state_clear(struct hash_table *partial_update_ht)
163 {
164         hash_table_foreach(partial_update_ht, entry) {
165                 struct partial_update_state *state = entry->data;
166 
167                 for (int i = 0; i < 4; i++) {
168                         if (state->insts[i] && state->insts[i]->cond)
169                                 state->insts[i] = NULL;
170                 }
171         }
172 }
173 
174 /* Sets up the def/use arrays for when variables are used-before-defined or
175  * defined-before-used in the block.
176  *
177  * Also initializes the temp_start/temp_end to cover just the instruction IPs
178  * where the variable is used, which will be extended later in
179  * qir_compute_start_end().
180  */
181 static void
qir_setup_def_use(struct vc4_compile * c)182 qir_setup_def_use(struct vc4_compile *c)
183 {
184         struct hash_table *partial_update_ht =
185                 _mesa_hash_table_create(c, _mesa_hash_int, _mesa_key_int_equal);
186         int ip = 0;
187 
188         qir_for_each_block(block, c) {
189                 block->start_ip = ip;
190 
191                 _mesa_hash_table_clear(partial_update_ht, NULL);
192 
193                 qir_for_each_inst(inst, block) {
194                         for (int i = 0; i < qir_get_nsrc(inst); i++)
195                                 qir_setup_use(c, block, ip, inst->src[i]);
196 
197                         qir_setup_def(c, block, ip, partial_update_ht, inst);
198 
199                         if (inst->sf)
200                                 sf_state_clear(partial_update_ht);
201 
202                         switch (inst->op) {
203                         case QOP_FRAG_Z:
204                         case QOP_FRAG_W:
205                                 /* The payload registers have values
206                                  * implicitly loaded at the start of the
207                                  * program.
208                                  */
209                                 if (inst->dst.file == QFILE_TEMP)
210                                         c->temp_start[inst->dst.index] = 0;
211                                 break;
212                         default:
213                                 break;
214                         }
215                         ip++;
216                 }
217                 block->end_ip = ip;
218         }
219 
220         _mesa_hash_table_destroy(partial_update_ht, NULL);
221 }
222 
223 static bool
qir_live_variables_dataflow(struct vc4_compile * c,int bitset_words)224 qir_live_variables_dataflow(struct vc4_compile *c, int bitset_words)
225 {
226         bool cont = false;
227 
228         qir_for_each_block_rev(block, c) {
229                 /* Update live_out: Any successor using the variable
230                  * on entrance needs us to have the variable live on
231                  * exit.
232                  */
233                 qir_for_each_successor(succ, block) {
234                         for (int i = 0; i < bitset_words; i++) {
235                                 BITSET_WORD new_live_out = (succ->live_in[i] &
236                                                             ~block->live_out[i]);
237                                 if (new_live_out) {
238                                         block->live_out[i] |= new_live_out;
239                                         cont = true;
240                                 }
241                         }
242                 }
243 
244                 /* Update live_in */
245                 for (int i = 0; i < bitset_words; i++) {
246                         BITSET_WORD new_live_in = (block->use[i] |
247                                                    (block->live_out[i] &
248                                                     ~block->def[i]));
249                         if (new_live_in & ~block->live_in[i]) {
250                                 block->live_in[i] |= new_live_in;
251                                 cont = true;
252                         }
253                 }
254         }
255 
256         return cont;
257 }
258 
259 /**
260  * Extend the start/end ranges for each variable to account for the
261  * new information calculated from control flow.
262  */
263 static void
qir_compute_start_end(struct vc4_compile * c,int num_vars)264 qir_compute_start_end(struct vc4_compile *c, int num_vars)
265 {
266         qir_for_each_block(block, c) {
267                 for (int i = 0; i < num_vars; i++) {
268                         if (BITSET_TEST(block->live_in, i)) {
269                                 c->temp_start[i] = MIN2(c->temp_start[i],
270                                                         block->start_ip);
271                                 c->temp_end[i] = MAX2(c->temp_end[i],
272                                                       block->start_ip);
273                         }
274 
275                         if (BITSET_TEST(block->live_out, i)) {
276                                 c->temp_start[i] = MIN2(c->temp_start[i],
277                                                         block->end_ip);
278                                 c->temp_end[i] = MAX2(c->temp_end[i],
279                                                       block->end_ip);
280                         }
281                 }
282         }
283 }
284 
285 void
qir_calculate_live_intervals(struct vc4_compile * c)286 qir_calculate_live_intervals(struct vc4_compile *c)
287 {
288         int bitset_words = BITSET_WORDS(c->num_temps);
289 
290         /* If we called this function more than once, then we should be
291          * freeing the previous arrays.
292          */
293         assert(!c->temp_start);
294 
295         c->temp_start = rzalloc_array(c, int, c->num_temps);
296         c->temp_end = rzalloc_array(c, int, c->num_temps);
297 
298         for (int i = 0; i < c->num_temps; i++) {
299                 c->temp_start[i] = MAX_INSTRUCTION;
300                 c->temp_end[i] = -1;
301         }
302 
303         qir_for_each_block(block, c) {
304                 block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
305                 block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
306                 block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
307                 block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
308         }
309 
310         qir_setup_def_use(c);
311 
312         while (qir_live_variables_dataflow(c, bitset_words))
313                 ;
314 
315         qir_compute_start_end(c, c->num_temps);
316 
317         if (vc4_debug & VC4_DEBUG_SHADERDB) {
318                 int last_ip = 0;
319                 for (int i = 0; i < c->num_temps; i++)
320                         last_ip = MAX2(last_ip, c->temp_end[i]);
321 
322                 int reg_pressure = 0;
323                 int max_reg_pressure = 0;
324                 for (int i = 0; i < last_ip; i++) {
325                         for (int j = 0; j < c->num_temps; j++) {
326                                 if (c->temp_start[j] == i)
327                                         reg_pressure++;
328                                 if (c->temp_end[j] == i)
329                                         reg_pressure--;
330                         }
331                         max_reg_pressure = MAX2(max_reg_pressure, reg_pressure);
332                 }
333 
334                 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d max temps\n",
335                         qir_get_stage_name(c->stage),
336                         c->program_id, c->variant_id,
337                         max_reg_pressure);
338         }
339 }
340