1 /*
2  * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * on the rights to use, copy, modify, merge, publish, distribute, sub
8  * license, and/or sell copies of the Software, and to permit persons to whom
9  * the Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *      Vadim Girlin
25  */
26 
27 #ifndef SB_BC_H_
28 #define SB_BC_H_
29 
30 #include <stdint.h>
31 #include "r600_isa.h"
32 
33 #include <cstdio>
34 #include <string>
35 #include <vector>
36 #include <stack>
37 
38 struct r600_bytecode;
39 struct r600_shader;
40 
41 namespace r600_sb {
42 
43 class hw_encoding_format;
44 class node;
45 class alu_node;
46 class cf_node;
47 class fetch_node;
48 class alu_group_node;
49 class region_node;
50 class shader;
51 class value;
52 
53 class sb_ostream {
54 public:
sb_ostream()55 	sb_ostream() {}
56 
57 	virtual void write(const char *s) = 0;
58 
59 	sb_ostream& operator <<(const char *s) {
60 		write(s);
61 		return *this;
62 	}
63 
64 	sb_ostream& operator <<(const std::string& s) {
65 		return *this << s.c_str();
66 	}
67 
68 	sb_ostream& operator <<(void *p) {
69 		char b[32];
70 		sprintf(b, "%p", p);
71 		return *this << b;
72 	}
73 
74 	sb_ostream& operator <<(char c) {
75 		char b[2];
76 		sprintf(b, "%c", c);
77 		return *this << b;
78 	}
79 
80 	sb_ostream& operator <<(int n) {
81 		char b[32];
82 		sprintf(b, "%d", n);
83 		return *this << b;
84 	}
85 
86 	sb_ostream& operator <<(unsigned n) {
87 		char b[32];
88 		sprintf(b, "%u", n);
89 		return *this << b;
90 	}
91 
92 	sb_ostream& operator <<(double d) {
93 		char b[32];
94 		snprintf(b, 32, "%g", d);
95 		return *this << b;
96 	}
97 
98 	// print as field of specified width, right aligned
print_w(int n,int width)99 	void print_w(int n, int width) {
100 		char b[256],f[8];
101 		sprintf(f, "%%%dd", width);
102 		snprintf(b, 256, f, n);
103 		write(b);
104 	}
105 
106 	// print as field of specified width, left aligned
print_wl(int n,int width)107 	void print_wl(int n, int width) {
108 		char b[256],f[8];
109 		sprintf(f, "%%-%dd", width);
110 		snprintf(b, 256, f, n);
111 		write(b);
112 	}
113 
114 	// print as field of specified width, left aligned
print_wl(const std::string & s,int width)115 	void print_wl(const std::string &s, int width) {
116 		write(s.c_str());
117 		int l = s.length();
118 		while (l++ < width) {
119 			write(" ");
120 		}
121 	}
122 
123 	// print int as field of specified width, right aligned, zero-padded
print_zw(int n,int width)124 	void print_zw(int n, int width) {
125 		char b[256],f[8];
126 		sprintf(f, "%%0%dd", width);
127 		snprintf(b, 256, f, n);
128 		write(b);
129 	}
130 
131 	// print int as field of specified width, right aligned, zero-padded, hex
print_zw_hex(int n,int width)132 	void print_zw_hex(int n, int width) {
133 		char b[256],f[8];
134 		sprintf(f, "%%0%dx", width);
135 		snprintf(b, 256, f, n);
136 		write(b);
137 	}
138 };
139 
140 class sb_ostringstream : public sb_ostream {
141 	std::string data;
142 public:
sb_ostringstream()143 	sb_ostringstream() : data() {}
144 
write(const char * s)145 	virtual void write(const char *s) {
146 		data += s;
147 	}
148 
clear()149 	void clear() { data.clear(); }
150 
c_str()151 	const char* c_str() { return data.c_str(); }
str()152 	std::string& str() { return data; }
153 };
154 
155 class sb_log : public sb_ostream {
156 	FILE *o;
157 public:
sb_log()158 	sb_log() : o(stderr) {}
159 
write(const char * s)160 	virtual void write(const char *s) {
161 		fputs(s, o);
162 	}
163 };
164 
165 extern sb_log sblog;
166 
167 enum shader_target
168 {
169 	TARGET_UNKNOWN,
170 	TARGET_VS,
171 	TARGET_ES,
172 	TARGET_PS,
173 	TARGET_GS,
174 	TARGET_GS_COPY,
175 	TARGET_COMPUTE,
176 	TARGET_FETCH,
177 	TARGET_HS,
178 	TARGET_LS,
179 
180 	TARGET_NUM
181 };
182 
183 enum sb_hw_class_bits
184 {
185 	HB_R6	= (1<<0),
186 	HB_R7	= (1<<1),
187 	HB_EG	= (1<<2),
188 	HB_CM	= (1<<3),
189 
190 	HB_R6R7 = (HB_R6 | HB_R7),
191 	HB_EGCM = (HB_EG | HB_CM),
192 	HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG),
193 	HB_R7EGCM = (HB_R7 | HB_EG | HB_CM),
194 
195 	HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM)
196 };
197 
198 enum sb_hw_chip
199 {
200 	HW_CHIP_UNKNOWN,
201 	HW_CHIP_R600,
202 	HW_CHIP_RV610,
203 	HW_CHIP_RV630,
204 	HW_CHIP_RV670,
205 	HW_CHIP_RV620,
206 	HW_CHIP_RV635,
207 	HW_CHIP_RS780,
208 	HW_CHIP_RS880,
209 	HW_CHIP_RV770,
210 	HW_CHIP_RV730,
211 	HW_CHIP_RV710,
212 	HW_CHIP_RV740,
213 	HW_CHIP_CEDAR,
214 	HW_CHIP_REDWOOD,
215 	HW_CHIP_JUNIPER,
216 	HW_CHIP_CYPRESS,
217 	HW_CHIP_HEMLOCK,
218 	HW_CHIP_PALM,
219 	HW_CHIP_SUMO,
220 	HW_CHIP_SUMO2,
221 	HW_CHIP_BARTS,
222 	HW_CHIP_TURKS,
223 	HW_CHIP_CAICOS,
224 	HW_CHIP_CAYMAN,
225 	HW_CHIP_ARUBA
226 };
227 
228 enum sb_hw_class
229 {
230 	HW_CLASS_UNKNOWN,
231 	HW_CLASS_R600,
232 	HW_CLASS_R700,
233 	HW_CLASS_EVERGREEN,
234 	HW_CLASS_CAYMAN
235 };
236 
237 enum alu_slots {
238 	SLOT_X = 0,
239 	SLOT_Y = 1,
240 	SLOT_Z = 2,
241 	SLOT_W = 3,
242 	SLOT_TRANS = 4
243 };
244 
245 enum misc_consts {
246 	MAX_ALU_LITERALS = 4,
247 	MAX_ALU_SLOTS = 128,
248 	MAX_GPR = 128,
249 	MAX_CHAN = 4
250 
251 };
252 
253 enum alu_src_sel {
254 
255 	ALU_SRC_LDS_OQ_A = 219,
256 	ALU_SRC_LDS_OQ_B = 220,
257 	ALU_SRC_LDS_OQ_A_POP = 221,
258 	ALU_SRC_LDS_OQ_B_POP = 222,
259 	ALU_SRC_LDS_DIRECT_A = 223,
260 	ALU_SRC_LDS_DIRECT_B = 224,
261 	ALU_SRC_TIME_HI = 227,
262 	ALU_SRC_TIME_LO = 228,
263 	ALU_SRC_MASK_HI = 229,
264 	ALU_SRC_MASK_LO = 230,
265 	ALU_SRC_HW_WAVE_ID = 231,
266 	ALU_SRC_SIMD_ID = 232,
267 	ALU_SRC_SE_ID = 233,
268 	ALU_SRC_HW_THREADGRP_ID = 234,
269 	ALU_SRC_WAVE_ID_IN_GRP = 235,
270 	ALU_SRC_NUM_THREADGRP_WAVES = 236,
271 	ALU_SRC_HW_ALU_ODD = 237,
272 	ALU_SRC_LOOP_IDX = 238,
273 	ALU_SRC_PARAM_BASE_ADDR = 240,
274 	ALU_SRC_NEW_PRIM_MASK = 241,
275 	ALU_SRC_PRIM_MASK_HI = 242,
276 	ALU_SRC_PRIM_MASK_LO = 243,
277 	ALU_SRC_1_DBL_L = 244,
278 	ALU_SRC_1_DBL_M = 245,
279 	ALU_SRC_0_5_DBL_L = 246,
280 	ALU_SRC_0_5_DBL_M = 247,
281 	ALU_SRC_0 = 248,
282 	ALU_SRC_1 = 249,
283 	ALU_SRC_1_INT = 250,
284 	ALU_SRC_M_1_INT = 251,
285 	ALU_SRC_0_5 = 252,
286 	ALU_SRC_LITERAL = 253,
287 	ALU_SRC_PV = 254,
288 	ALU_SRC_PS = 255,
289 
290 	ALU_SRC_PARAM_OFFSET = 448
291 };
292 
293 enum alu_predicate_select
294 {
295 	PRED_SEL_OFF	= 0,
296 //	RESERVED		= 1,
297 	PRED_SEL_0		= 2,
298 	PRED_SEL_1		= 3
299 };
300 
301 
302 enum alu_omod {
303 	OMOD_OFF  = 0,
304 	OMOD_M2   = 1,
305 	OMOD_M4   = 2,
306 	OMOD_D2   = 3
307 };
308 
309 enum alu_index_mode {
310 	INDEX_AR_X        = 0,
311 	INDEX_AR_Y_R600   = 1,
312 	INDEX_AR_Z_R600   = 2,
313 	INDEX_AR_W_R600   = 3,
314 
315 	INDEX_LOOP        = 4,
316 	INDEX_GLOBAL      = 5,
317 	INDEX_GLOBAL_AR_X = 6
318 };
319 
320 enum alu_cayman_mova_dst {
321 	CM_MOVADST_AR_X,
322 	CM_MOVADST_PC,
323 	CM_MOVADST_IDX0,
324 	CM_MOVADST_IDX1,
325 	CM_MOVADST_CG0,		// clause-global byte 0
326 	CM_MOVADST_CG1,
327 	CM_MOVADST_CG2,
328 	CM_MOVADST_CG3
329 };
330 
331 enum alu_cayman_exec_mask_op {
332 	CM_EMO_DEACTIVATE,
333 	CM_EMO_BREAK,
334 	CM_EMO_CONTINUE,
335 	CM_EMO_KILL
336 };
337 
338 
339 enum cf_exp_type {
340 	EXP_PIXEL,
341 	EXP_POS,
342 	EXP_PARAM,
343 
344 	EXP_TYPE_COUNT
345 };
346 
347 enum cf_mem_type {
348 	MEM_WRITE,
349 	MEM_WRITE_IND,
350 	MEM_WRITE_ACK,
351 	MEM_WRITE_IND_ACK
352 };
353 
354 
355 enum alu_kcache_mode {
356 	KC_LOCK_NONE,
357 	KC_LOCK_1,
358 	KC_LOCK_2,
359 	KC_LOCK_LOOP
360 };
361 
362 enum alu_kcache_index_mode {
363 	KC_INDEX_NONE,
364 	KC_INDEX_0,
365 	KC_INDEX_1,
366 	KC_INDEX_INVALID
367 };
368 
369 enum chan_select {
370 	SEL_X	= 0,
371 	SEL_Y	= 1,
372 	SEL_Z	= 2,
373 	SEL_W	= 3,
374 	SEL_0	= 4,
375 	SEL_1	= 5,
376 //	RESERVED = 6,
377 	SEL_MASK = 7
378 };
379 
380 enum bank_swizzle {
381 	VEC_012 = 0,
382 	VEC_021 = 1,
383 	VEC_120 = 2,
384 	VEC_102 = 3,
385 	VEC_201 = 4,
386 	VEC_210 = 5,
387 
388 	VEC_NUM = 6,
389 
390 	SCL_210 = 0,
391 	SCL_122 = 1,
392 	SCL_212 = 2,
393 	SCL_221 = 3,
394 
395 	SCL_NUM = 4
396 
397 };
398 
399 enum sched_queue_id {
400 	SQ_CF,
401 	SQ_ALU,
402 	SQ_TEX,
403 	SQ_VTX,
404 	SQ_GDS,
405 
406 	SQ_NUM
407 };
408 
409 struct literal {
410 	union {
411 		int32_t i;
412 		uint32_t u;
413 		float f;
414 	};
415 
iliteral416 	literal(int32_t i = 0) : i(i) {}
literalliteral417 	literal(uint32_t u) : u(u) {}
literalliteral418 	literal(float f) : f(f) {}
literalliteral419 	literal(double f) : f(f) {}
uint32_tliteral420 	operator uint32_t() const { return u; }
421 	bool operator ==(literal l) { return u == l.u; }
422 	bool operator ==(int v_int) { return i == v_int; }
423 	bool operator ==(unsigned v_uns) { return u == v_uns; }
424 };
425 
426 struct bc_kcache {
427 	unsigned mode;
428 	unsigned bank;
429 	unsigned addr;
430 	unsigned index_mode;
431 } ;
432 
433 // TODO optimize bc structures
434 
435 struct bc_cf {
436 
437 	bc_kcache kc[4];
438 
439 	unsigned id;
440 
441 
442 	const cf_op_info * op_ptr;
443 	unsigned op;
444 
445 	unsigned addr:32;
446 
447 	unsigned alt_const:1;
448 	unsigned uses_waterfall:1;
449 
450 	unsigned barrier:1;
451 	unsigned count:7;
452 	unsigned pop_count:3;
453 	unsigned call_count:6;
454 	unsigned whole_quad_mode:1;
455 	unsigned valid_pixel_mode:1;
456 
457 	unsigned jumptable_sel:3;
458 	unsigned cf_const:5;
459 	unsigned cond:2;
460 	unsigned end_of_program:1;
461 
462 	unsigned array_base:13;
463 	unsigned elem_size:2;
464 	unsigned index_gpr:7;
465 	unsigned rw_gpr:7;
466 	unsigned rw_rel:1;
467 	unsigned type:2;
468 
469 	unsigned burst_count:4;
470 	unsigned mark:1;
471 	unsigned sel[4];
472 
473 	unsigned array_size:12;
474 	unsigned comp_mask:4;
475 
476 	unsigned rat_id:4;
477 	unsigned rat_inst:6;
478 	unsigned rat_index_mode:2;
479 
set_opbc_cf480 	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); }
481 
is_alu_extendedbc_cf482 	bool is_alu_extended() {
483 		assert(op_ptr->flags & CF_ALU);
484 		return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE ||
485 			kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE ||
486 			kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE;
487 	}
488 
489 };
490 
491 struct bc_alu_src {
492 	unsigned sel:9;
493 	unsigned chan:2;
494 	unsigned neg:1;
495 	unsigned abs:1;
496 	unsigned rel:1;
497 	literal value;
498 
clearbc_alu_src499 	void clear() {
500 		sel = 0;
501 		chan = 0;
502 		neg = 0;
503 		abs = 0;
504 		rel = 0;
505 		value = 0;
506 	}
507 };
508 
509 struct bc_alu {
510 	const alu_op_info * op_ptr;
511 	unsigned op;
512 
513 	bc_alu_src src[3];
514 
515 	unsigned dst_gpr:7;
516 	unsigned dst_chan:2;
517 	unsigned dst_rel:1;
518 	unsigned clamp:1;
519 	unsigned omod:2;
520 	unsigned bank_swizzle:3;
521 
522 	unsigned index_mode:3;
523 	unsigned last:1;
524 	unsigned pred_sel:2;
525 
526 	unsigned fog_merge:1;
527 	unsigned write_mask:1;
528 	unsigned update_exec_mask:1;
529 	unsigned update_pred:1;
530 
531 	unsigned slot:3;
532 
533 	unsigned lds_idx_offset:6;
534 
535 	alu_op_flags slot_flags;
536 
set_opbc_alu537 	void set_op(unsigned op) {
538 		this->op = op;
539 		op_ptr = r600_isa_alu(op);
540 	}
clearbc_alu541 	void clear() {
542 		op_ptr = nullptr;
543 		op = 0;
544 		for (int i = 0; i < 3; ++i)
545 			src[i].clear();
546 		dst_gpr = 0;
547 		dst_chan = 0;
548 		dst_rel = 0;
549 		clamp = 0;
550 		omod = 0;
551 		bank_swizzle = 0;
552 		index_mode = 0;
553 		last = 0;
554 		pred_sel = 0;
555 		fog_merge = 0;
556 		write_mask = 0;
557 		update_exec_mask = 0;
558 		update_pred = 0;
559 		slot = 0;
560 		lds_idx_offset = 0;
561 		slot_flags = AF_NONE;
562 	}
bc_alubc_alu563 	bc_alu() {
564 		clear();
565 	}
566 };
567 
568 struct bc_fetch {
569 	const fetch_op_info * op_ptr;
570 	unsigned op;
571 
572 	unsigned bc_frac_mode:1;
573 	unsigned fetch_whole_quad:1;
574 	unsigned resource_id:8;
575 
576 	unsigned src_gpr:7;
577 	unsigned src_rel:1;
578 	unsigned src_rel_global:1; /* for GDS ops */
579 	unsigned src_sel[4];
580 
581 	unsigned dst_gpr:7;
582 	unsigned dst_rel:1;
583 	unsigned dst_rel_global:1; /* for GDS ops */
584 	unsigned dst_sel[4];
585 
586 	unsigned alt_const:1;
587 
588 	unsigned inst_mod:2;
589 	unsigned resource_index_mode:2;
590 	unsigned sampler_index_mode:2;
591 
592 	unsigned coord_type[4];
593 	unsigned lod_bias:7;
594 
595 	unsigned offset[3];
596 
597 	unsigned sampler_id:5;
598 
599 
600 	unsigned fetch_type:2;
601 	unsigned mega_fetch_count:6;
602 	unsigned coalesced_read:1;
603 	unsigned structured_read:2;
604 	unsigned lds_req:1;
605 
606 	unsigned data_format:6;
607 	unsigned format_comp_all:1;
608 	unsigned num_format_all:2;
609 	unsigned semantic_id:8;
610 	unsigned srf_mode_all:1;
611 	unsigned use_const_fields:1;
612 
613 	unsigned const_buf_no_stride:1;
614 	unsigned endian_swap:2;
615 	unsigned mega_fetch:1;
616 
617 	unsigned src2_gpr:7; /* for GDS */
618 	unsigned alloc_consume:1;
619 	unsigned uav_id:4;
620 	unsigned uav_index_mode:2;
621 	unsigned bcast_first_req:1;
622 
623 	/* for MEM ops */
624 	unsigned elem_size:2;
625 	unsigned uncached:1;
626 	unsigned indexed:1;
627 	unsigned burst_count:4;
628 	unsigned array_base:13;
629 	unsigned array_size:12;
630 
set_opbc_fetch631 	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
632 };
633 
634 struct shader_stats {
635 	unsigned	ndw;
636 	unsigned	ngpr;
637 	unsigned	nstack;
638 
639 	unsigned	cf; // clause instructions not included
640 	unsigned	alu;
641 	unsigned	alu_clauses;
642 	unsigned	fetch_clauses;
643 	unsigned	fetch;
644 	unsigned	alu_groups;
645 
646 	unsigned	shaders;		// number of shaders (for accumulated stats)
647 
shader_statsshader_stats648 	shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(),
649 			fetch_clauses(), fetch(), alu_groups(), shaders() {}
650 
651 	void collect(node *n);
652 	void accumulate(shader_stats &s);
653 	void dump();
654 	void dump_diff(shader_stats &s);
655 };
656 
657 class sb_context {
658 
659 public:
660 
661 	shader_stats src_stats, opt_stats;
662 
663 	r600_isa *isa;
664 
665 	sb_hw_chip hw_chip;
666 	sb_hw_class hw_class;
667 
668 	unsigned alu_temp_gprs;
669 	unsigned max_fetch;
670 	bool has_trans;
671 	unsigned vtx_src_num;
672 	unsigned num_slots;
673 	bool uses_mova_gpr;
674 
675 	bool r6xx_gpr_index_workaround;
676 
677 	bool stack_workaround_8xx;
678 	bool stack_workaround_9xx;
679 
680 	unsigned wavefront_size;
681 	unsigned stack_entry_size;
682 
683 	static unsigned dump_pass;
684 	static unsigned dump_stat;
685 
686 	static unsigned dry_run;
687 	static unsigned no_fallback;
688 	static unsigned safe_math;
689 
690 	static unsigned dskip_start;
691 	static unsigned dskip_end;
692 	static unsigned dskip_mode;
693 
sb_context()694 	sb_context() : src_stats(), opt_stats(), isa(0),
695 			hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN),
696 			alu_temp_gprs(0), max_fetch(0), has_trans(false), vtx_src_num(0),
697 			num_slots(0), uses_mova_gpr(false),
698 			r6xx_gpr_index_workaround(false), stack_workaround_8xx(false),
699 			stack_workaround_9xx(false), wavefront_size(0),
700 			stack_entry_size(0) {}
701 
702 	int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass);
703 
is_r600()704 	bool is_r600() {return hw_class == HW_CLASS_R600;}
is_r700()705 	bool is_r700() {return hw_class == HW_CLASS_R700;}
is_evergreen()706 	bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;}
is_cayman()707 	bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;}
is_egcm()708 	bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;}
709 
needs_8xx_stack_workaround()710 	bool needs_8xx_stack_workaround() {
711 		if (!is_evergreen())
712 			return false;
713 
714 		switch (hw_chip) {
715 		case HW_CHIP_HEMLOCK:
716 		case HW_CHIP_CYPRESS:
717 		case HW_CHIP_JUNIPER:
718 			return false;
719 		default:
720 			return true;
721 		}
722 	}
723 
needs_9xx_stack_workaround()724 	bool needs_9xx_stack_workaround() {
725 		return is_cayman();
726 	}
727 
hw_class_bit()728 	sb_hw_class_bits hw_class_bit() {
729 		switch (hw_class) {
730 		case HW_CLASS_R600:return HB_R6;
731 		case HW_CLASS_R700:return HB_R7;
732 		case HW_CLASS_EVERGREEN:return HB_EG;
733 		case HW_CLASS_CAYMAN:return HB_CM;
734 		default: assert(!"unknown hw class"); return (sb_hw_class_bits)0;
735 
736 		}
737 	}
738 
cf_opcode(unsigned op)739 	unsigned cf_opcode(unsigned op) {
740 		return r600_isa_cf_opcode(isa->hw_class, op);
741 	}
742 
alu_opcode(unsigned op)743 	unsigned alu_opcode(unsigned op) {
744 		return r600_isa_alu_opcode(isa->hw_class, op);
745 	}
746 
alu_slots(unsigned op)747 	unsigned alu_slots(unsigned op) {
748 		return r600_isa_alu_slots(isa->hw_class, op);
749 	}
750 
alu_slots(const alu_op_info * op_ptr)751 	unsigned alu_slots(const alu_op_info * op_ptr) {
752 		return op_ptr->slots[isa->hw_class];
753 	}
754 
alu_slots_mask(const alu_op_info * op_ptr)755 	unsigned alu_slots_mask(const alu_op_info * op_ptr) {
756 		unsigned mask = 0;
757 		unsigned slot_flags = alu_slots(op_ptr);
758 		if (slot_flags & AF_V)
759 			mask = 0x0F;
760 		if (!is_cayman() && (slot_flags & AF_S))
761 			mask |= 0x10;
762 		/* Force LDS_IDX ops into SLOT_X */
763 		if (op_ptr->opcode[0] == -1 && ((op_ptr->opcode[1] & 0xFF) == 0x11))
764 			mask = 0x01;
765 		return mask;
766 	}
767 
fetch_opcode(unsigned op)768 	unsigned fetch_opcode(unsigned op) {
769 		return r600_isa_fetch_opcode(isa->hw_class, op);
770 	}
771 
is_kcache_sel(unsigned sel)772 	bool is_kcache_sel(unsigned sel) {
773 		return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320));
774 	}
775 
is_lds_oq(unsigned sel)776 	bool is_lds_oq(unsigned sel) {
777 		return (sel >= 0xdb && sel <= 0xde);
778 	}
779 
780 	const char * get_hw_class_name();
781 	const char * get_hw_chip_name();
782 
783 };
784 
785 #define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0)
786 #define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0)
787 
788 class bc_decoder {
789 
790 	sb_context &ctx;
791 
792 	uint32_t* dw;
793 	unsigned ndw;
794 
795 public:
796 
bc_decoder(sb_context & sctx,uint32_t * data,unsigned size)797 	bc_decoder(sb_context &sctx, uint32_t *data, unsigned size)
798 		: ctx(sctx), dw(data), ndw(size) {}
799 
800 	int decode_cf(unsigned &i, bc_cf &bc);
801 	int decode_alu(unsigned &i, bc_alu &bc);
802 	int decode_fetch(unsigned &i, bc_fetch &bc);
803 
804 private:
805 	int decode_cf_alu(unsigned &i, bc_cf &bc);
806 	int decode_cf_exp(unsigned &i, bc_cf &bc);
807 	int decode_cf_mem(unsigned &i, bc_cf &bc);
808 
809 	int decode_fetch_vtx(unsigned &i, bc_fetch &bc);
810 	int decode_fetch_gds(unsigned &i, bc_fetch &bc);
811 	int decode_fetch_mem(unsigned &i, bc_fetch &bc);
812 };
813 
814 // bytecode format definition
815 
816 class hw_encoding_format {
817 	const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing
818 	hw_encoding_format();
819 protected:
820 	uint32_t value;
821 public:
hw_encoding_format(sb_hw_class_bits hw)822 	hw_encoding_format(sb_hw_class_bits hw)
823 		: hw_target(hw), value(0) {}
hw_encoding_format(uint32_t v,sb_hw_class_bits hw)824 	hw_encoding_format(uint32_t v, sb_hw_class_bits hw)
825 		: hw_target(hw), value(v) {}
get_value(sb_hw_class_bits hw)826 	uint32_t get_value(sb_hw_class_bits hw) const {
827 		assert((hw & hw_target) == hw);
828 		return value;
829 	}
830 };
831 
832 #define BC_FORMAT_BEGIN_HW(fmt, hwset) \
833 class fmt##_##hwset : public hw_encoding_format {\
834 	typedef fmt##_##hwset thistype; \
835 public: \
836 	fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \
837 	fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {};
838 
839 #define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL)
840 
841 #define BC_FORMAT_END(fmt) };
842 
843 // bytecode format field definition
844 
845 #define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \
846 	thistype & name(unsigned v) { \
847 		value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \
848 		return *this; \
849 	} \
850 	unsigned get_##name() const { \
851 		return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \
852 	}
853 
854 #define BC_RSRVD(fmt, last_bit, first_bit)
855 
856 // CLAMP macro defined elsewhere interferes with bytecode field name
857 #undef CLAMP
858 #include "sb_bc_fmt_def.inc"
859 
860 #undef BC_FORMAT_BEGIN
861 #undef BC_FORMAT_END
862 #undef BC_FIELD
863 #undef BC_RSRVD
864 
865 class bc_parser {
866 	sb_context & ctx;
867 
868 	bc_decoder *dec;
869 
870 	r600_bytecode *bc;
871 	r600_shader *pshader;
872 
873 	uint32_t *dw;
874 	unsigned bc_ndw;
875 
876 	unsigned max_cf;
877 
878 	shader *sh;
879 
880 	int error;
881 
882 	alu_node *slots[2][5];
883 	unsigned cgroup;
884 
885 	typedef std::vector<cf_node*> id_cf_map;
886 	id_cf_map cf_map;
887 
888 	typedef std::stack<region_node*> region_stack;
889 	region_stack loop_stack;
890 
891 	bool gpr_reladdr;
892 
893 	// Note: currently relies on input emitting SET_CF in same basic block as uses
894 	value *cf_index_value[2];
895 	alu_node *mova;
896 public:
897 
bc_parser(sb_context & sctx,r600_bytecode * bc,r600_shader * pshader)898 	bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
899 		ctx(sctx), dec(), bc(bc), pshader(pshader),
900 		dw(), bc_ndw(), max_cf(),
901 		sh(), error(), slots(), cgroup(),
902 		cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { }
903 
904 	int decode();
905 	int prepare();
906 
get_shader()907 	shader* get_shader() { assert(!error); return sh; }
908 
909 private:
910 
911 	int decode_shader();
912 
913 	int parse_decls();
914 
915 	int decode_cf(unsigned &i, bool &eop);
916 
917 	int decode_alu_clause(cf_node *cf);
918 	int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
919 
920 	int decode_fetch_clause(cf_node *cf);
921 
922 	int prepare_ir();
923 	int prepare_alu_clause(cf_node *cf);
924 	int prepare_alu_group(cf_node* cf, alu_group_node *g);
925 	int prepare_fetch_clause(cf_node *cf);
926 
927 	int prepare_loop(cf_node *c);
928 	int prepare_if(cf_node *c);
929 
930 	void save_set_cf_index(value *val, unsigned idx);
931 	value *get_cf_index_value(unsigned idx);
932 	void save_mova(alu_node *mova);
933 	alu_node *get_mova();
934 };
935 
936 
937 
938 
939 class bytecode {
940 	typedef std::vector<uint32_t> bc_vector;
941 	sb_hw_class_bits hw_class_bit;
942 
943 	bc_vector bc;
944 
945 	unsigned pos;
946 
947 public:
948 
949 	bytecode(sb_hw_class_bits hw, unsigned rdw = 256)
hw_class_bit(hw)950 		: hw_class_bit(hw), pos(0) { bc.reserve(rdw); }
951 
ndw()952 	unsigned ndw() { return bc.size(); }
953 
write_data(uint32_t * dst)954 	void write_data(uint32_t* dst) {
955 		std::copy(bc.begin(), bc.end(), dst);
956 	}
957 
align(unsigned a)958 	void align(unsigned a) {
959 		unsigned size = bc.size();
960 		size = (size + a - 1) & ~(a-1);
961 		bc.resize(size);
962 	}
963 
set_size(unsigned sz)964 	void set_size(unsigned sz) {
965 		assert(sz >= bc.size());
966 		bc.resize(sz);
967 	}
968 
seek(unsigned p)969 	void seek(unsigned p) {
970 		if (p != pos) {
971 			if (p > bc.size()) {
972 				bc.resize(p);
973 			}
974 			pos = p;
975 		}
976 	}
977 
get_pos()978 	unsigned get_pos() { return pos; }
data()979 	uint32_t *data() { return &bc[0]; }
980 
981 	bytecode & operator <<(uint32_t v) {
982 		if (pos == ndw()) {
983 			bc.push_back(v);
984 		} else
985 			bc.at(pos) = v;
986 		++pos;
987 		return *this;
988 	}
989 
990 	bytecode & operator <<(const hw_encoding_format &e) {
991 		*this << e.get_value(hw_class_bit);
992 		return *this;
993 	}
994 
995 	bytecode & operator <<(const bytecode &b) {
996 		bc.insert(bc.end(), b.bc.begin(), b.bc.end());
997 		return *this;
998 	}
999 
at(unsigned dw_id)1000 	uint32_t at(unsigned dw_id) { return bc.at(dw_id); }
1001 };
1002 
1003 
1004 class bc_builder {
1005 	shader &sh;
1006 	sb_context &ctx;
1007 	bytecode bb;
1008 	int error;
1009 
1010 public:
1011 
1012 	bc_builder(shader &s);
1013 	int build();
get_bytecode()1014 	bytecode& get_bytecode() { assert(!error); return bb; }
1015 
1016 private:
1017 
1018 	int build_cf(cf_node *n);
1019 
1020 	int build_cf_alu(cf_node *n);
1021 	int build_cf_mem(cf_node *n);
1022 	int build_cf_exp(cf_node *n);
1023 
1024 	int build_alu_clause(cf_node *n);
1025 	int build_alu_group(alu_group_node *n);
1026 	int build_alu(alu_node *n);
1027 
1028 	int build_fetch_clause(cf_node *n);
1029 	int build_fetch_tex(fetch_node *n);
1030 	int build_fetch_vtx(fetch_node *n);
1031 	int build_fetch_gds(fetch_node *n);
1032 	int build_fetch_mem(fetch_node* n);
1033 };
1034 
1035 } // namespace r600_sb
1036 
1037 #endif /* SB_BC_H_ */
1038