1 /*
2  * Copyright © 2012 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Eric Anholt <eric@anholt.net>
25  *
26  */
27 
28 #include "brw_vec4.h"
29 #include "brw_vec4_live_variables.h"
30 
31 using namespace brw;
32 
33 #define MAX_INSTRUCTION (1 << 30)
34 
35 /** @file brw_vec4_live_variables.cpp
36  *
37  * Support for computing at the basic block level which variables
38  * (virtual GRFs in our case) are live at entry and exit.
39  *
40  * See Muchnick's Advanced Compiler Design and Implementation, section
41  * 14.1 (p444).
42  */
43 
44 /**
45  * Sets up the use/def arrays and block-local approximation of the live ranges.
46  *
47  * The basic-block-level live variable analysis needs to know which
48  * variables get used before they're completely defined, and which
49  * variables are completely defined before they're used.
50  *
51  * We independently track each channel of a vec4.  This is because we need to
52  * be able to recognize a sequence like:
53  *
54  * ...
55  * DP4 tmp.x a b;
56  * DP4 tmp.y c d;
57  * MUL result.xy tmp.xy e.xy
58  * ...
59  *
60  * as having tmp live only across that sequence (assuming it's used nowhere
61  * else), because it's a common pattern.  A more conservative approach that
62  * doesn't get tmp marked a deffed in this block will tend to result in
63  * spilling.
64  */
65 void
setup_def_use()66 vec4_live_variables::setup_def_use()
67 {
68    int ip = 0;
69 
70    foreach_block (block, cfg) {
71       assert(ip == block->start_ip);
72       if (block->num > 0)
73 	 assert(cfg->blocks[block->num - 1]->end_ip == ip - 1);
74 
75       foreach_inst_in_block(vec4_instruction, inst, block) {
76          struct block_data *bd = &block_data[block->num];
77 
78          /* Set up the instruction uses. */
79 	 for (unsigned int i = 0; i < 3; i++) {
80 	    if (inst->src[i].file == VGRF) {
81                for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) {
82                   for (int c = 0; c < 4; c++) {
83                      const unsigned v = var_from_reg(alloc, inst->src[i], c, j);
84 
85                      start[v] = MIN2(start[v], ip);
86                      end[v] = ip;
87 
88                      if (!BITSET_TEST(bd->def, v))
89                         BITSET_SET(bd->use, v);
90                   }
91                }
92 	    }
93 	 }
94          for (unsigned c = 0; c < 4; c++) {
95             if (inst->reads_flag(c) &&
96                 !BITSET_TEST(bd->flag_def, c)) {
97                BITSET_SET(bd->flag_use, c);
98             }
99          }
100 
101          /* Set up the instruction defs. */
102          if (inst->dst.file == VGRF) {
103             for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
104                for (int c = 0; c < 4; c++) {
105                   if (inst->dst.writemask & (1 << c)) {
106                      const unsigned v = var_from_reg(alloc, inst->dst, c, i);
107 
108                      start[v] = MIN2(start[v], ip);
109                      end[v] = ip;
110 
111                      /* Check for unconditional register writes, these are the
112                       * things that screen off preceding definitions of a
113                       * variable, and thus qualify for being in def[].
114                       */
115                      if ((!inst->predicate || inst->opcode == BRW_OPCODE_SEL) &&
116                          !BITSET_TEST(bd->use, v))
117                         BITSET_SET(bd->def, v);
118                   }
119                }
120             }
121          }
122          if (inst->writes_flag(devinfo)) {
123             for (unsigned c = 0; c < 4; c++) {
124                if ((inst->dst.writemask & (1 << c)) &&
125                    !BITSET_TEST(bd->flag_use, c)) {
126                   BITSET_SET(bd->flag_def, c);
127                }
128             }
129          }
130 
131 	 ip++;
132       }
133    }
134 }
135 
136 /**
137  * The algorithm incrementally sets bits in liveout and livein,
138  * propagating it through control flow.  It will eventually terminate
139  * because it only ever adds bits, and stops when no bits are added in
140  * a pass.
141  */
142 void
compute_live_variables()143 vec4_live_variables::compute_live_variables()
144 {
145    bool cont = true;
146 
147    while (cont) {
148       cont = false;
149 
150       foreach_block_reverse (block, cfg) {
151          struct block_data *bd = &block_data[block->num];
152 
153 	 /* Update liveout */
154 	 foreach_list_typed(bblock_link, child_link, link, &block->children) {
155        struct block_data *child_bd = &block_data[child_link->block->num];
156 
157 	    for (int i = 0; i < bitset_words; i++) {
158                BITSET_WORD new_liveout = (child_bd->livein[i] &
159                                           ~bd->liveout[i]);
160                if (new_liveout) {
161                   bd->liveout[i] |= new_liveout;
162 		  cont = true;
163 	       }
164 	    }
165             BITSET_WORD new_liveout = (child_bd->flag_livein[0] &
166                                        ~bd->flag_liveout[0]);
167             if (new_liveout) {
168                bd->flag_liveout[0] |= new_liveout;
169                cont = true;
170             }
171 	 }
172 
173          /* Update livein */
174          for (int i = 0; i < bitset_words; i++) {
175             BITSET_WORD new_livein = (bd->use[i] |
176                                       (bd->liveout[i] &
177                                        ~bd->def[i]));
178             if (new_livein & ~bd->livein[i]) {
179                bd->livein[i] |= new_livein;
180                cont = true;
181             }
182          }
183          BITSET_WORD new_livein = (bd->flag_use[0] |
184                                    (bd->flag_liveout[0] &
185                                     ~bd->flag_def[0]));
186          if (new_livein & ~bd->flag_livein[0]) {
187             bd->flag_livein[0] |= new_livein;
188             cont = true;
189          }
190       }
191    }
192 }
193 
194 /**
195  * Extend the start/end ranges for each variable to account for the
196  * new information calculated from control flow.
197  */
198 void
compute_start_end()199 vec4_live_variables::compute_start_end()
200 {
201    foreach_block (block, cfg) {
202       const struct block_data &bd = block_data[block->num];
203 
204       for (int i = 0; i < num_vars; i++) {
205          if (BITSET_TEST(bd.livein, i)) {
206             start[i] = MIN2(start[i], block->start_ip);
207             end[i] = MAX2(end[i], block->start_ip);
208          }
209 
210          if (BITSET_TEST(bd.liveout, i)) {
211             start[i] = MIN2(start[i], block->end_ip);
212             end[i] = MAX2(end[i], block->end_ip);
213          }
214       }
215    }
216 }
217 
vec4_live_variables(const backend_shader * s)218 vec4_live_variables::vec4_live_variables(const backend_shader *s)
219    : alloc(s->alloc), cfg(s->cfg)
220 {
221    mem_ctx = ralloc_context(NULL);
222 
223    num_vars = alloc.total_size * 8;
224    start = ralloc_array(mem_ctx, int, num_vars);
225    end = ralloc_array(mem_ctx, int, num_vars);
226 
227    for (int i = 0; i < num_vars; i++) {
228       start[i] = MAX_INSTRUCTION;
229       end[i] = -1;
230    }
231 
232    devinfo = s->compiler->devinfo;
233 
234    block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
235 
236    bitset_words = BITSET_WORDS(num_vars);
237    for (int i = 0; i < cfg->num_blocks; i++) {
238       block_data[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
239       block_data[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
240       block_data[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
241       block_data[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
242 
243       block_data[i].flag_def[0] = 0;
244       block_data[i].flag_use[0] = 0;
245       block_data[i].flag_livein[0] = 0;
246       block_data[i].flag_liveout[0] = 0;
247    }
248 
249    setup_def_use();
250    compute_live_variables();
251    compute_start_end();
252 }
253 
~vec4_live_variables()254 vec4_live_variables::~vec4_live_variables()
255 {
256    ralloc_free(mem_ctx);
257 }
258 
259 static bool
check_register_live_range(const vec4_live_variables * live,int ip,unsigned var,unsigned n)260 check_register_live_range(const vec4_live_variables *live, int ip,
261                           unsigned var, unsigned n)
262 {
263    for (unsigned j = 0; j < n; j += 4) {
264       if (var + j >= unsigned(live->num_vars) ||
265           live->start[var + j] > ip || live->end[var + j] < ip)
266          return false;
267    }
268 
269    return true;
270 }
271 
272 bool
validate(const backend_shader * s) const273 vec4_live_variables::validate(const backend_shader *s) const
274 {
275    unsigned ip = 0;
276 
277    foreach_block_and_inst(block, vec4_instruction, inst, s->cfg) {
278       for (unsigned c = 0; c < 4; c++) {
279          if (inst->dst.writemask & (1 << c)) {
280             for (unsigned i = 0; i < 3; i++) {
281                if (inst->src[i].file == VGRF &&
282                    !check_register_live_range(this, ip,
283                                               var_from_reg(alloc, inst->src[i], c),
284                                               regs_read(inst, i)))
285                   return false;
286             }
287 
288             if (inst->dst.file == VGRF &&
289                 !check_register_live_range(this, ip,
290                                            var_from_reg(alloc, inst->dst, c),
291                                            regs_written(inst)))
292                return false;
293          }
294       }
295 
296       ip++;
297    }
298 
299    return true;
300 }
301 
302 int
var_range_start(unsigned v,unsigned n) const303 vec4_live_variables::var_range_start(unsigned v, unsigned n) const
304 {
305    int ip = INT_MAX;
306 
307    for (unsigned i = 0; i < n; i++)
308       ip = MIN2(ip, start[v + i]);
309 
310    return ip;
311 }
312 
313 int
var_range_end(unsigned v,unsigned n) const314 vec4_live_variables::var_range_end(unsigned v, unsigned n) const
315 {
316    int ip = INT_MIN;
317 
318    for (unsigned i = 0; i < n; i++)
319       ip = MAX2(ip, end[v + i]);
320 
321    return ip;
322 }
323 
324 bool
vgrfs_interfere(int a,int b) const325 vec4_live_variables::vgrfs_interfere(int a, int b) const
326 {
327    return !((var_range_end(8 * alloc.offsets[a], 8 * alloc.sizes[a]) <=
328              var_range_start(8 * alloc.offsets[b], 8 * alloc.sizes[b])) ||
329             (var_range_end(8 * alloc.offsets[b], 8 * alloc.sizes[b]) <=
330              var_range_start(8 * alloc.offsets[a], 8 * alloc.sizes[a])));
331 }
332