1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #define IFC_DEBUG 0
28 
29 #if IFC_DEBUG
30 #define IFC_DUMP(q) do { q } while (0)
31 #else
32 #define IFC_DUMP(q)
33 #endif
34 
35 #include "sb_shader.h"
36 #include "sb_pass.h"
37 
38 namespace r600_sb {
39 
run()40 int if_conversion::run() {
41 
42 	regions_vec &rv = sh.get_regions();
43 
44 	unsigned converted = 0;
45 	for (regions_vec::reverse_iterator I = rv.rbegin(); I != rv.rend(); ) {
46 		region_node *r = *I;
47 		if (run_on(r)) {
48 			I = regions_vec::reverse_iterator(rv.erase((++I).base()));
49 			++converted;
50 		} else
51 			++I;
52 	}
53 	return 0;
54 }
55 
convert_kill_instructions(region_node * r,value * em,bool branch,container_node * c)56 void if_conversion::convert_kill_instructions(region_node *r,
57                                               value *em, bool branch,
58                                               container_node *c) {
59 	value *cnd = NULL;
60 
61 	for (node_iterator I = c->begin(), E = c->end(), N; I != E; I = N) {
62 		N = I + 1;
63 
64 		if (!I->is_alu_inst())
65 			continue;
66 
67 		alu_node *a = static_cast<alu_node*>(*I);
68 		unsigned flags = a->bc.op_ptr->flags;
69 
70 		if (!(flags & AF_KILL))
71 			continue;
72 
73 		// ignore predicated or non-const kill instructions
74 		if (a->pred || !a->src[0]->is_const() || !a->src[1]->is_const())
75 			continue;
76 
77 		literal l0 = a->src[0]->literal_value;
78 		literal l1 = a->src[1]->literal_value;
79 
80 		expr_handler::apply_alu_src_mod(a->bc, 0, l0);
81 		expr_handler::apply_alu_src_mod(a->bc, 1, l1);
82 
83 		if (expr_handler::evaluate_condition(flags, l0, l1)) {
84 			// kill with constant 'true' condition, we'll convert it to the
85 			// conditional kill outside of the if-then-else block
86 
87 			a->remove();
88 
89 			if (!cnd) {
90 				cnd = get_select_value_for_em(sh, em);
91 			} else {
92 				// more than one kill with the same condition, just remove it
93 				continue;
94 			}
95 
96 			r->insert_before(a);
97 			a->bc.set_op(branch ? ALU_OP2_KILLE_INT : ALU_OP2_KILLNE_INT);
98 
99 			a->src[0] = cnd;
100 			a->src[1] = sh.get_const_value(0);
101 			// clear modifiers
102 			a->bc.src[0].clear();
103 			a->bc.src[1].clear();
104 		} else {
105 			// kill with constant 'false' condition, this shouldn't happen
106 			// but remove it anyway
107 			a->remove();
108 		}
109 	}
110 }
111 
check_and_convert(region_node * r)112 bool if_conversion::check_and_convert(region_node *r) {
113 
114 	depart_node *nd1 = static_cast<depart_node*>(r->first);
115 	if (!nd1->is_depart() || nd1->target != r)
116 		return false;
117 	if_node *nif = static_cast<if_node*>(nd1->first);
118 	if (!nif->is_if())
119 		return false;
120 	depart_node *nd2 = static_cast<depart_node*>(nif->first);
121 	if (!nd2->is_depart() || nd2->target != r)
122 		return false;
123 
124 	value* &em = nif->cond;
125 
126 	node_stats s;
127 
128 	r->collect_stats(s);
129 
130 	IFC_DUMP(
131 		sblog << "ifcvt: region " << r->region_id << " :\n";
132 		s.dump();
133 	);
134 
135 	if (s.region_count || s.fetch_count || s.alu_kill_count ||
136                        s.if_count != 1 || s.repeat_count || s.uses_ar)
137 		return false;
138 
139 	unsigned real_alu_count = s.alu_count - s.alu_copy_mov_count;
140 
141 	// if_conversion allows to eliminate JUMP-ALU_POP_AFTER or
142 	// JUMP-ALU-ELSE-ALU_POP_AFTER, for now let's assume that 3 CF instructions
143 	// are eliminated. According to the docs, cost of CF instruction is
144 	// equal to ~40 ALU VLIW instructions (instruction groups),
145 	// so we have eliminated cost equal to ~120 groups in total.
146 	// Let's also assume that we have avg 3 ALU instructions per group,
147 	// This means that potential eliminated cost is about 360 single alu inst.
148 	// On the other hand, we are speculatively executing conditional code now,
149 	// so we are increasing the cost in some cases. In the worst case, we'll
150 	// have to execute real_alu_count additional alu instructions instead of
151 	// jumping over them. Let's assume for now that average added cost is
152 	//
153 	// (0.9 * real_alu_count)
154 	//
155 	// So we should perform if_conversion if
156 	//
157 	// (0.9 * real_alu_count) < 360, or
158 	//
159 	// real_alu_count < 400
160 	//
161 	// So if real_alu_count is more than 400, than we think that if_conversion
162 	// doesn't make sense.
163 
164 	// FIXME: We can use more precise heuristic, taking into account sizes of
165 	// the branches and their probability instead of total size.
166 	// Another way to improve this is to consider the number of the groups
167 	// instead of the number of instructions (taking into account actual VLIW
168 	// packing).
169 	// (Currently we don't know anything about packing at this stage, but
170 	// probably we can make some more precise estimations anyway)
171 
172 	if (real_alu_count > 400)
173 		return false;
174 
175 	IFC_DUMP( sblog << "if_cvt: processing...\n"; );
176 
177 	value *select = get_select_value_for_em(sh, em);
178 
179 	if (!select)
180 		return false;
181 
182 	for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E; ++I) {
183 		node *n = *I;
184 
185 		alu_node *ns = convert_phi(select, n);
186 
187 		if (ns)
188 			r->insert_after(ns);
189 	}
190 
191 	nd2->expand();
192 	nif->expand();
193 	nd1->expand();
194 	r->expand();
195 
196 	return true;
197 }
198 
run_on(region_node * r)199 bool if_conversion::run_on(region_node* r) {
200 
201 	if (r->dep_count() != 2 || r->rep_count() != 1)
202 		return false;
203 
204 	depart_node *nd1 = static_cast<depart_node*>(r->first);
205 	if (!nd1->is_depart())
206 		return false;
207 	if_node *nif = static_cast<if_node*>(nd1->first);
208 	if (!nif->is_if())
209 		return false;
210 	depart_node *nd2 = static_cast<depart_node*>(nif->first);
211 	if (!nd2->is_depart())
212 		return false;
213 
214 	value* &em = nif->cond;
215 
216 	convert_kill_instructions(r, em, true, nd2);
217 	convert_kill_instructions(r, em, false, nd1);
218 
219 	if (check_and_convert(r))
220 		return true;
221 
222 	if (nd2->empty() && nif->next) {
223 		// empty true branch, non-empty false branch
224 		// we'll invert it to get rid of 'else'
225 
226 		assert(em && em->def);
227 
228 		alu_node *predset = static_cast<alu_node*>(em->def);
229 
230 		// create clone of PREDSET instruction with inverted condition.
231 		// PREDSET has 3 dst operands in our IR (value written to gpr,
232 		// predicate value and exec mask value), we'll split it such that
233 		// new PREDSET will define exec mask value only, and two others will
234 		// be defined in the old PREDSET (if they are not used then DCE will
235 		// simply remove old PREDSET).
236 
237 		alu_node *newpredset = sh.clone(predset);
238 		predset->insert_after(newpredset);
239 
240 		predset->dst[2] = NULL;
241 
242 		newpredset->dst[0] = NULL;
243 		newpredset->dst[1] = NULL;
244 
245 		em->def = newpredset;
246 
247 		unsigned cc = newpredset->bc.op_ptr->flags & AF_CC_MASK;
248 		unsigned cmptype = newpredset->bc.op_ptr->flags & AF_CMP_TYPE_MASK;
249 		bool swapargs = false;
250 
251 		cc = invert_setcc_condition(cc, swapargs);
252 
253 		if (swapargs) {
254 			std::swap(newpredset->src[0], newpredset->src[1]);
255 			std::swap(newpredset->bc.src[0], newpredset->bc.src[1]);
256 		}
257 
258 		unsigned newopcode = get_predsetcc_op(cc, cmptype);
259 		newpredset->bc.set_op(newopcode);
260 
261 		// move the code from the 'false' branch ('else') to the 'true' branch
262 		nd2->move(nif->next, NULL);
263 
264 		// swap phi operands
265 		for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E;
266 				++I) {
267 			node *p = *I;
268 			assert(p->src.size() == 2);
269 			std::swap(p->src[0], p->src[1]);
270 		}
271 	}
272 
273 	return false;
274 }
275 
convert_phi(value * select,node * phi)276 alu_node* if_conversion::convert_phi(value* select, node* phi) {
277 	assert(phi->dst.size() == 1 || phi->src.size() == 2);
278 
279 	value *d = phi->dst[0];
280 	value *v1 = phi->src[0];
281 	value *v2 = phi->src[1];
282 
283 	assert(d);
284 
285 	if (!d->is_any_gpr())
286 		return NULL;
287 
288 	if (v1->is_undef()) {
289 		if (v2->is_undef()) {
290 			return NULL;
291 		} else {
292 			return sh.create_mov(d, v2);
293 		}
294 	} else if (v2->is_undef())
295 		return sh.create_mov(d, v1);
296 
297 	alu_node* n = sh.create_alu();
298 
299 	n->bc.set_op(ALU_OP3_CNDE_INT);
300 	n->dst.push_back(d);
301 	n->src.push_back(select);
302 	n->src.push_back(v1);
303 	n->src.push_back(v2);
304 
305 	return n;
306 }
307 
308 } // namespace r600_sb
309