1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #ifndef SB_SHADER_H_
28 #define SB_SHADER_H_
29 
30 #include <list>
31 #include <string>
32 #include <map>
33 
34 #include "sb_ir.h"
35 #include "sb_expr.h"
36 
37 namespace r600_sb {
38 
39 struct shader_input {
40 	unsigned comp_mask;
41 	unsigned preloaded;
42 };
43 
44 struct error_info {
45 	node *n;
46 	unsigned arg_index;
47 	std::string message;
48 };
49 
50 typedef std::multimap<node*, error_info> error_map;
51 
52 class sb_context;
53 
54 typedef std::vector<shader_input> inputs_vec;
55 typedef std::vector<gpr_array*> gpr_array_vec;
56 
57 struct ra_edge {
58 	value *a, *b;
59 	unsigned cost;
60 
ra_edgera_edge61 	ra_edge(value *a, value *b, unsigned cost) : a(a), b(b), cost(cost) {}
62 };
63 
64 enum chunk_flags {
65 	RCF_GLOBAL = (1 << 0),
66 	RCF_PIN_CHAN = (1 << 1),
67 	RCF_PIN_REG = (1 << 2),
68 
69 	RCF_FIXED = (1 << 3),
70 
71 	RCF_PREALLOC = (1 << 4)
72 };
73 
74 enum dce_flags {
75 	DF_REMOVE_DEAD  = (1 << 0),
76 	DF_REMOVE_UNUSED = (1 << 1),
77 	DF_EXPAND = (1 << 2),
78 };
79 
80 inline dce_flags operator |(dce_flags l, dce_flags r) {
81 	return (dce_flags)((unsigned)l|(unsigned)r);
82 }
83 
84 inline chunk_flags operator |(chunk_flags l, chunk_flags r) {
85 	return (chunk_flags)((unsigned)l|(unsigned)r);
86 }
87 inline chunk_flags& operator |=(chunk_flags &l, chunk_flags r) {
88 	l = l | r;
89 	return l;
90 }
91 
92 inline chunk_flags& operator &=(chunk_flags &l, chunk_flags r) {
93 	l = (chunk_flags)((unsigned)l & (unsigned)r);
94 	return l;
95 }
96 
97 inline chunk_flags operator ~(chunk_flags r) {
98 	return (chunk_flags)~(unsigned)r;
99 }
100 
101 struct ra_chunk {
102 	vvec values;
103 	chunk_flags flags;
104 	unsigned cost;
105 	sel_chan pin;
106 
ra_chunkra_chunk107 	ra_chunk() : values(), flags(), cost(), pin() {}
108 
is_fixedra_chunk109 	bool is_fixed() { return flags & RCF_FIXED; }
fixra_chunk110 	void fix() { flags |= RCF_FIXED; }
111 
is_globalra_chunk112 	bool is_global() { return flags & RCF_GLOBAL; }
set_globalra_chunk113 	void set_global() {	flags |= RCF_GLOBAL; }
114 
is_reg_pinnedra_chunk115 	bool is_reg_pinned() { return flags & RCF_PIN_REG; }
is_chan_pinnedra_chunk116 	bool is_chan_pinned() { return flags & RCF_PIN_CHAN; }
117 
is_preallocra_chunk118 	bool is_prealloc() { return flags & RCF_PREALLOC; }
set_preallocra_chunk119 	void set_prealloc() { flags |= RCF_PREALLOC; }
120 };
121 
122 typedef std::vector<ra_chunk*> chunk_vector;
123 
124 class ra_constraint {
125 public:
ra_constraint(constraint_kind kind)126 	ra_constraint(constraint_kind kind) : kind(kind), cost(0) {}
127 
128 	constraint_kind kind;
129 	vvec values;
130 	unsigned cost;
131 
132 	void update_values();
133 	bool check();
134 };
135 
136 typedef std::vector<ra_constraint*> constraint_vec;
137 typedef std::vector<ra_chunk*> chunk_vec;
138 
139 // priority queue
140 // FIXME use something more suitale or custom class ?
141 
142 template <class T>
143 struct cost_compare {
operatorcost_compare144 	bool operator ()(const T& t1, const T& t2) {
145 		return t1->cost > t2->cost;
146 	}
147 };
148 
149 template <class T, class Comp>
150 class queue {
151 	typedef std::vector<T> container;
152 	container cont;
153 
154 public:
queue()155 	queue() : cont() {}
156 
157 	typedef typename container::iterator iterator;
158 
begin()159 	iterator begin() { return cont.begin(); }
end()160 	iterator end() { return cont.end(); }
161 
insert(const T & t)162 	iterator insert(const T& t) {
163 		iterator I = std::upper_bound(begin(), end(), t, Comp());
164 		if (I == end())
165 			cont.push_back(t);
166 		else
167 			cont.insert(I, t);
168 
169 		return I;
170 	}
171 
erase(const T & t)172 	void erase(const T& t) {
173 		std::pair<iterator, iterator> R =
174 				std::equal_range(begin(), end(), t, Comp());
175 		iterator F = std::find(R.first, R.second, t);
176 		if (F != R.second)
177 			cont.erase(F);
178 	}
179 };
180 
181 typedef queue<ra_chunk*, cost_compare<ra_chunk*> > chunk_queue;
182 typedef queue<ra_edge*, cost_compare<ra_edge*> > edge_queue;
183 typedef queue<ra_constraint*, cost_compare<ra_constraint*> > constraint_queue;
184 
185 typedef std::set<ra_chunk*> chunk_set;
186 
187 class shader;
188 
189 class coalescer {
190 
191 	shader &sh;
192 
193 	edge_queue edges;
194 	chunk_queue chunks;
195 	constraint_queue constraints;
196 
197 	constraint_vec all_constraints;
198 	chunk_vec all_chunks;
199 
200 public:
201 
coalescer(shader & sh)202 	coalescer(shader &sh) : sh(sh), edges(), chunks(), constraints() {}
203 	~coalescer();
204 
205 	int run();
206 
207 	void add_edge(value *a, value *b, unsigned cost);
208 	void build_chunks();
209 	void build_constraint_queue();
210 	void build_chunk_queue();
211 	int color_constraints();
212 	void color_chunks();
213 
214 	ra_constraint* create_constraint(constraint_kind kind);
215 
216 	enum ac_cost {
217 		phi_cost = 10000,
218 		copy_cost = 1,
219 	};
220 
221 	void dump_edges();
222 	void dump_chunks();
223 	void dump_constraint_queue();
224 
225 	static void dump_chunk(ra_chunk *c);
226 	static void dump_constraint(ra_constraint* c);
227 
228 	void get_chunk_interferences(ra_chunk *c, val_set &s);
229 
230 private:
231 
232 	void create_chunk(value *v);
233 	void unify_chunks(ra_edge *e);
234 	bool chunks_interference(ra_chunk *c1, ra_chunk *c2);
235 
236 	int color_reg_constraint(ra_constraint *c);
237 	void color_phi_constraint(ra_constraint *c);
238 
239 
240 	void init_reg_bitset(sb_bitset &bs, val_set &vs);
241 
242 	void color_chunk(ra_chunk *c, sel_chan color);
243 
244 	ra_chunk* detach_value(value *v);
245 };
246 
247 
248 
249 class shader {
250 
251 	sb_context &ctx;
252 
253 	typedef sb_map<uint32_t, value*> value_map;
254 	value_map reg_values;
255 
256 	// read-only values
257 	value_map const_values; // immediate constants key -const  value (uint32_t)
258 	value_map special_ro_values; //  key - hw alu_sel & chan
259 	value_map kcache_values;
260 
261 	gpr_array_vec gpr_arrays;
262 
263 	unsigned next_temp_value_index;
264 
265 	unsigned prep_regs_count;
266 
267 	value* pred_sels[2];
268 
269 	regions_vec regions;
270 	inputs_vec inputs;
271 
272 	value *undef;
273 
274 	sb_value_pool val_pool;
275 	sb_pool pool;
276 
277 	std::vector<node*> all_nodes;
278 
279 public:
280 	shader_stats src_stats, opt_stats;
281 
282 	error_map errors;
283 
284 	bool optimized;
285 
286 	unsigned id;
287 
288 	coalescer coal;
289 
290 	static const unsigned temp_regid_offset = 512;
291 
292 	bbs_vec bbs;
293 
294 	const shader_target target;
295 
296 	value_table vt;
297 	expr_handler ex;
298 
299 	container_node *root;
300 
301 	bool compute_interferences;
302 
303 	bool has_alu_predication;
304 	bool uses_gradients;
305 
306 	bool safe_math;
307 
308 	unsigned ngpr, nstack;
309 
310 	unsigned dce_flags;
311 
312 	shader(sb_context &sctx, shader_target t, unsigned id);
313 
314 	~shader();
315 
get_ctx()316 	sb_context &get_ctx() const { return ctx; }
317 
318 	value* get_const_value(const literal & v);
319 	value* get_special_value(unsigned sv_id, unsigned version = 0);
320 	value* create_temp_value();
321 	value* get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel,
322                          unsigned version = 0);
323 
324 
325 	value* get_special_ro_value(unsigned sel);
326 	value* get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode);
327 
328 	value* get_value_version(value* v, unsigned ver);
329 
330 	void init();
331 	void add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, bool src);
332 
333 	void dump_ir();
334 
335 	void add_gpr_array(unsigned gpr_start, unsigned gpr_count,
336 	                   unsigned comp_mask);
337 
338 	value* get_pred_sel(int sel);
339 	bool assign_slot(alu_node *n, alu_node *slots[5]);
340 
341 	gpr_array* get_gpr_array(unsigned reg, unsigned chan);
342 
343 	void add_input(unsigned gpr, bool preloaded = false,
344 	               unsigned comp_mask = 0xF);
345 
get_inputs()346 	const inputs_vec & get_inputs() {return inputs; }
347 
get_regions()348 	regions_vec & get_regions() { return regions; }
349 
350 	void init_call_fs(cf_node *cf);
351 
352 	value *get_undef_value();
353 	void set_undef(val_set &s);
354 
355 	node* create_node(node_type nt, node_subtype nst,
356 	                  node_flags flags = NF_EMPTY);
357 	alu_node* create_alu();
358 	alu_group_node* create_alu_group();
359 	alu_packed_node* create_alu_packed();
360 	cf_node* create_cf();
361 	cf_node* create_cf(unsigned op);
362 	fetch_node* create_fetch();
363 	region_node* create_region();
364 	depart_node* create_depart(region_node *target);
365 	repeat_node* create_repeat(region_node *target);
366 	container_node* create_container(node_type nt = NT_LIST,
367 	                                 node_subtype nst = NST_LIST,
368 	                                 node_flags flags = NF_EMPTY);
369 	if_node* create_if();
370 	bb_node* create_bb(unsigned id, unsigned loop_level);
371 
get_value_by_uid(unsigned id)372 	value* get_value_by_uid(unsigned id) { return val_pool[id - 1]; }
373 
374 	cf_node* create_clause(node_subtype nst);
375 
376 	void create_bbs();
377 	void expand_bbs();
378 
379 	alu_node* create_mov(value* dst, value* src);
380 	alu_node* create_copy_mov(value *dst, value *src, unsigned affcost = 1);
381 
382 	const char * get_shader_target_name();
383 
384 	std::string get_full_target_name();
385 
386 	void create_bbs(container_node* n, bbs_vec &bbs, int loop_level = 0);
387 	void expand_bbs(bbs_vec &bbs);
388 
389 	sched_queue_id get_queue_id(node* n);
390 
391 	void simplify_dep_rep(node *dr);
392 
393 	unsigned first_temp_gpr();
394 	unsigned num_nontemp_gpr();
395 
arrays()396 	gpr_array_vec& arrays() { return gpr_arrays; }
397 
398 	void set_uses_kill();
399 
400 	void fill_array_values(gpr_array *a, vvec &vv);
401 
402 	alu_node* clone(alu_node *n);
403 
get_value_pool()404 	sb_value_pool& get_value_pool() { return val_pool; }
405 
406 	void collect_stats(bool opt);
407 
408 private:
409 	value* create_value(value_kind k, sel_chan regid, unsigned ver);
410 	value* get_value(value_kind kind, sel_chan id,
411 	                         unsigned version = 0);
412 	value* get_ro_value(value_map &vm, value_kind vk, unsigned key);
413 };
414 
415 }
416 
417 #endif /* SHADER_H_ */
418