1 /*
2  * Copyright (C) 2005 Ben Skeggs.
3  *
4  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5  * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6  *
7  * All Rights Reserved.
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining
10  * a copy of this software and associated documentation files (the
11  * "Software"), to deal in the Software without restriction, including
12  * without limitation the rights to use, copy, modify, merge, publish,
13  * distribute, sublicense, and/or sell copies of the Software, and to
14  * permit persons to whom the Software is furnished to do so, subject to
15  * the following conditions:
16  *
17  * The above copyright notice and this permission notice (including the
18  * next paragraph) shall be included in all copies or substantial
19  * portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28  *
29  */
30 
31 /**
32  * \file
33  *
34  * \author Ben Skeggs <darktama@iinet.net.au>
35  *
36  * \author Jerome Glisse <j.glisse@gmail.com>
37  *
38  * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39  *
40  */
41 
42 #include "r500_fragprog.h"
43 
44 #include "r300_reg.h"
45 
46 #include "radeon_program_pair.h"
47 
48 #include "util/compiler.h"
49 
50 #define PROG_CODE \
51 	struct r500_fragment_program_code *code = &c->code->code.r500
52 
53 #define error(fmt, args...) do {			\
54 		rc_error(&c->Base, "%s::%s(): " fmt "\n",	\
55 			__FILE__, __FUNCTION__, ##args);	\
56 	} while(0)
57 
58 
59 struct branch_info {
60 	int If;
61 	int Else;
62 	int Endif;
63 };
64 
65 struct r500_loop_info {
66 	int BgnLoop;
67 
68 	int BranchDepth;
69 	int * Brks;
70 	int BrkCount;
71 	int BrkReserved;
72 
73 	int * Conts;
74 	int ContCount;
75 	int ContReserved;
76 };
77 
78 struct emit_state {
79 	struct radeon_compiler * C;
80 	struct r500_fragment_program_code * Code;
81 
82 	struct branch_info * Branches;
83 	unsigned int CurrentBranchDepth;
84 	unsigned int BranchesReserved;
85 
86 	struct r500_loop_info * Loops;
87 	unsigned int CurrentLoopDepth;
88 	unsigned int LoopsReserved;
89 
90 	unsigned int MaxBranchDepth;
91 
92 };
93 
translate_rgb_op(struct r300_fragment_program_compiler * c,rc_opcode opcode)94 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
95 {
96 	switch(opcode) {
97 	case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
98 	case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
99 	case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
100 	case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
101 	case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
102 	case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
103 	case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
104 	default:
105 		error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
106 		FALLTHROUGH;
107 	case RC_OPCODE_NOP:
108 		FALLTHROUGH;
109 	case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
110 	case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
111 	case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
112 	case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
113 	}
114 }
115 
translate_alpha_op(struct r300_fragment_program_compiler * c,rc_opcode opcode)116 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
117 {
118 	switch(opcode) {
119 	case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
120 	case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
121 	case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
122 	case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
123 	case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
124 	case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
125 	case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
126 	case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
127 	case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
128 	case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
129 	default:
130 		error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
131 		FALLTHROUGH;
132 	case RC_OPCODE_NOP:
133 		FALLTHROUGH;
134 	case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
135 	case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
136 	case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
137 	case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
138 	case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
139 	case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
140 	}
141 }
142 
fix_hw_swizzle(unsigned int swz)143 static unsigned int fix_hw_swizzle(unsigned int swz)
144 {
145     switch (swz) {
146         case RC_SWIZZLE_ZERO:
147         case RC_SWIZZLE_UNUSED:
148             swz = 4;
149             break;
150         case RC_SWIZZLE_HALF:
151             swz = 5;
152             break;
153         case RC_SWIZZLE_ONE:
154             swz = 6;
155             break;
156     }
157 
158 	return swz;
159 }
160 
translate_arg_rgb(struct rc_pair_instruction * inst,int arg)161 static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
162 {
163 	unsigned int t = inst->RGB.Arg[arg].Source;
164 	int comp;
165 	t |= inst->RGB.Arg[arg].Negate << 11;
166 	t |= inst->RGB.Arg[arg].Abs << 12;
167 
168 	for(comp = 0; comp < 3; ++comp)
169 		t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
170 
171 	return t;
172 }
173 
translate_arg_alpha(struct rc_pair_instruction * inst,int i)174 static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
175 {
176 	unsigned int t = inst->Alpha.Arg[i].Source;
177 	t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
178 	t |= inst->Alpha.Arg[i].Negate << 5;
179 	t |= inst->Alpha.Arg[i].Abs << 6;
180 	return t;
181 }
182 
translate_alu_result_op(struct r300_fragment_program_compiler * c,rc_compare_func func)183 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
184 {
185 	switch(func) {
186 	case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
187 	case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
188 	case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
189 	case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
190 	default:
191 		rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
192 		return 0;
193 	}
194 }
195 
use_temporary(struct r500_fragment_program_code * code,unsigned int index)196 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
197 {
198 	if (index > code->max_temp_idx)
199 		code->max_temp_idx = index;
200 }
201 
use_source(struct r500_fragment_program_code * code,struct rc_pair_instruction_source src)202 static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
203 {
204 	/* From docs:
205 	 *   Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
206 	 * MSB = 1 << 7 */
207 	if (!src.Used)
208 		return 1 << 7;
209 
210 	if (src.File == RC_FILE_CONSTANT) {
211 		return src.Index | R500_RGB_ADDR0_CONST;
212 	} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
213 		use_temporary(code, src.Index);
214 		return src.Index;
215 	} else if (src.File == RC_FILE_INLINE) {
216 		return src.Index | (1 << 7);
217 	}
218 
219 	return 0;
220 }
221 
222 /**
223  * NOP the specified instruction if it is not a texture lookup.
224  */
alu_nop(struct r300_fragment_program_compiler * c,int ip)225 static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
226 {
227 	PROG_CODE;
228 
229 	if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
230 		code->inst[ip].inst0 |= R500_INST_NOP;
231 	}
232 }
233 
234 /**
235  * Emit a paired ALU instruction.
236  */
emit_paired(struct r300_fragment_program_compiler * c,struct rc_pair_instruction * inst)237 static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
238 {
239 	int ip;
240 	PROG_CODE;
241 
242 	if (code->inst_end >= c->Base.max_alu_insts-1) {
243 		error("emit_alu: Too many instructions");
244 		return;
245 	}
246 
247 	ip = ++code->inst_end;
248 
249 	/* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
250 	if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
251 		inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
252 		if (ip > 0) {
253 			alu_nop(c, ip - 1);
254 		}
255 	}
256 
257 	code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
258 	code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
259 
260 	if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
261 		code->inst[ip].inst0 = R500_INST_TYPE_OUT;
262 		if (inst->WriteALUResult) {
263 			error("Cannot write output and ALU result at the same time");
264 			return;
265 		}
266 	} else {
267 		code->inst[ip].inst0 = R500_INST_TYPE_ALU;
268 	}
269 	code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
270 
271 	code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
272 	code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
273 	code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
274 	if (inst->Nop) {
275 		code->inst[ip].inst0 |= R500_INST_NOP;
276 	}
277 	if (inst->Alpha.DepthWriteMask) {
278 		code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
279 		c->code->writes_depth = 1;
280 	}
281 
282 	code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
283 	code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
284 	use_temporary(code, inst->Alpha.DestIndex);
285 	use_temporary(code, inst->RGB.DestIndex);
286 
287 	if (inst->RGB.Saturate)
288 		code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
289 	if (inst->Alpha.Saturate)
290 		code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
291 
292 	/* Set the presubtract operation. */
293 	switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
294 		case RC_PRESUB_BIAS:
295 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
296 			break;
297 		case RC_PRESUB_SUB:
298 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
299 			break;
300 		case RC_PRESUB_ADD:
301 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
302 			break;
303 		case RC_PRESUB_INV:
304 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
305 			break;
306 		default:
307 			break;
308 	}
309 	switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
310 		case RC_PRESUB_BIAS:
311 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
312 			break;
313 		case RC_PRESUB_SUB:
314 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
315 			break;
316 		case RC_PRESUB_ADD:
317 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
318 			break;
319 		case RC_PRESUB_INV:
320 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
321 			break;
322 		default:
323 			break;
324 	}
325 
326 	/* Set the output modifier */
327 	code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;
328 	code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;
329 
330 	code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
331 	code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
332 	code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
333 
334 	code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
335 	code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
336 	code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
337 
338 	code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
339 	code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
340 	code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
341 
342 	code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
343 	code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
344 	code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
345 
346 	code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
347 	code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
348 
349 	if (inst->WriteALUResult) {
350 		code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
351 
352 		if (inst->WriteALUResult == RC_ALURESULT_X)
353 			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
354 		else
355 			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
356 
357 		code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
358 	}
359 }
360 
translate_strq_swizzle(unsigned int swizzle)361 static unsigned int translate_strq_swizzle(unsigned int swizzle)
362 {
363 	unsigned int swiz = 0;
364 	int i;
365 	for (i = 0; i < 4; i++)
366 		swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
367 	return swiz;
368 }
369 
370 /**
371  * Emit a single TEX instruction
372  */
emit_tex(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)373 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
374 {
375 	int ip;
376 	PROG_CODE;
377 
378 	if (code->inst_end >= c->Base.max_alu_insts-1) {
379 		error("emit_tex: Too many instructions");
380 		return 0;
381 	}
382 
383 	ip = ++code->inst_end;
384 
385 	code->inst[ip].inst0 = R500_INST_TYPE_TEX
386 		| (inst->DstReg.WriteMask << 11)
387 		| (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
388 	code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
389 		| (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
390 
391 	if (inst->TexSrcTarget == RC_TEXTURE_RECT)
392 		code->inst[ip].inst1 |= R500_TEX_UNSCALED;
393 
394 	switch (inst->Opcode) {
395 	case RC_OPCODE_KIL:
396 		code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
397 		break;
398 	case RC_OPCODE_TEX:
399 		code->inst[ip].inst1 |= R500_TEX_INST_LD;
400 		break;
401 	case RC_OPCODE_TXB:
402 		code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
403 		break;
404 	case RC_OPCODE_TXP:
405 		code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
406 		break;
407 	case RC_OPCODE_TXD:
408 		code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
409 		break;
410 	case RC_OPCODE_TXL:
411 		code->inst[ip].inst1 |= R500_TEX_INST_LOD;
412 		break;
413 	default:
414 		error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
415 	}
416 
417 	use_temporary(code, inst->SrcReg[0].Index);
418 	if (inst->Opcode != RC_OPCODE_KIL)
419 		use_temporary(code, inst->DstReg.Index);
420 
421 	code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
422 		| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
423 		| R500_TEX_DST_ADDR(inst->DstReg.Index)
424 		| (GET_SWZ(inst->TexSwizzle, 0) << 24)
425 		| (GET_SWZ(inst->TexSwizzle, 1) << 26)
426 		| (GET_SWZ(inst->TexSwizzle, 2) << 28)
427 		| (GET_SWZ(inst->TexSwizzle, 3) << 30)
428 		;
429 
430 	if (inst->Opcode == RC_OPCODE_TXD) {
431 		use_temporary(code, inst->SrcReg[1].Index);
432 		use_temporary(code, inst->SrcReg[2].Index);
433 
434 		/* DX and DY parameters are specified in a separate register. */
435 		code->inst[ip].inst3 =
436 			R500_DX_ADDR(inst->SrcReg[1].Index) |
437 			(translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
438 			R500_DY_ADDR(inst->SrcReg[2].Index) |
439 			(translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
440 	}
441 
442 	return 1;
443 }
444 
emit_flowcontrol(struct emit_state * s,struct rc_instruction * inst)445 static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
446 {
447 	unsigned int newip;
448 
449 	if (s->Code->inst_end >= s->C->max_alu_insts-1) {
450 		rc_error(s->C, "emit_tex: Too many instructions");
451 		return;
452 	}
453 
454 	newip = ++s->Code->inst_end;
455 
456 	/* Currently all loops use the same integer constant to initialize
457 	 * the loop variables. */
458 	if(!s->Code->int_constants[0]) {
459 		s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
460 		s->Code->int_constant_count = 1;
461 	}
462 	s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
463 
464 	switch(inst->U.I.Opcode){
465 	struct branch_info * branch;
466 	struct r500_loop_info * loop;
467 	case RC_OPCODE_BGNLOOP:
468 		memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
469 			s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
470 
471 		loop = &s->Loops[s->CurrentLoopDepth++];
472 		memset(loop, 0, sizeof(struct r500_loop_info));
473 		loop->BranchDepth = s->CurrentBranchDepth;
474 		loop->BgnLoop = newip;
475 
476 		s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
477 			| R500_FC_JUMP_FUNC(0x00)
478 			| R500_FC_IGNORE_UNCOVERED
479 			;
480 		break;
481 	case RC_OPCODE_BRK:
482 		loop = &s->Loops[s->CurrentLoopDepth - 1];
483 		memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
484 					loop->BrkCount, loop->BrkReserved, 1);
485 
486 		loop->Brks[loop->BrkCount++] = newip;
487 		s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
488 			| R500_FC_JUMP_FUNC(0xff)
489 			| R500_FC_B_OP1_DECR
490 			| R500_FC_B_POP_CNT(
491 				s->CurrentBranchDepth - loop->BranchDepth)
492 			| R500_FC_IGNORE_UNCOVERED
493 			;
494 		break;
495 
496 	case RC_OPCODE_CONT:
497 		loop = &s->Loops[s->CurrentLoopDepth - 1];
498 		memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
499 					loop->ContCount, loop->ContReserved, 1);
500 		loop->Conts[loop->ContCount++] = newip;
501 		s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
502 			| R500_FC_JUMP_FUNC(0xff)
503 			| R500_FC_B_OP1_DECR
504 			| R500_FC_B_POP_CNT(
505 				s->CurrentBranchDepth -	loop->BranchDepth)
506 			| R500_FC_IGNORE_UNCOVERED
507 			;
508 		break;
509 
510 	case RC_OPCODE_ENDLOOP:
511 	{
512 		loop = &s->Loops[s->CurrentLoopDepth - 1];
513 		/* Emit ENDLOOP */
514 		s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
515 			| R500_FC_JUMP_FUNC(0xff)
516 			| R500_FC_JUMP_ANY
517 			| R500_FC_IGNORE_UNCOVERED
518 			;
519 		/* The constant integer at index 0 is used by all loops. */
520 		s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
521 			| R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
522 			;
523 
524 		/* Set jump address and int constant for BGNLOOP */
525 		s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
526 			| R500_FC_JUMP_ADDR(newip)
527 			;
528 
529 		/* Set jump address for the BRK instructions. */
530 		while(loop->BrkCount--) {
531 			s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
532 						R500_FC_JUMP_ADDR(newip + 1);
533 		}
534 
535 		/* Set jump address for CONT instructions. */
536 		while(loop->ContCount--) {
537 			s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
538 						R500_FC_JUMP_ADDR(newip);
539 		}
540 		s->CurrentLoopDepth--;
541 		break;
542 	}
543 	case RC_OPCODE_IF:
544 		if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
545 			rc_error(s->C, "Branch depth exceeds hardware limit");
546 			return;
547 		}
548 		memory_pool_array_reserve(&s->C->Pool, struct branch_info,
549 				s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
550 
551 		branch = &s->Branches[s->CurrentBranchDepth++];
552 		branch->If = newip;
553 		branch->Else = -1;
554 		branch->Endif = -1;
555 
556 		if (s->CurrentBranchDepth > s->MaxBranchDepth)
557 			s->MaxBranchDepth = s->CurrentBranchDepth;
558 
559 		/* actual instruction is filled in at ENDIF time */
560 		break;
561 
562 	case RC_OPCODE_ELSE:
563 		if (!s->CurrentBranchDepth) {
564 			rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
565 			return;
566 		}
567 
568 		branch = &s->Branches[s->CurrentBranchDepth - 1];
569 		branch->Else = newip;
570 
571 		/* actual instruction is filled in at ENDIF time */
572 		break;
573 
574 	case RC_OPCODE_ENDIF:
575 		if (!s->CurrentBranchDepth) {
576 			rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
577 			return;
578 		}
579 
580 		branch = &s->Branches[s->CurrentBranchDepth - 1];
581 		branch->Endif = newip;
582 
583 		s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
584 			| R500_FC_A_OP_NONE /* no address stack */
585 			| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
586 			| R500_FC_B_OP0_DECR /* decrement branch counter if stay */
587 			| R500_FC_B_OP1_NONE /* no branch counter if stay */
588 			| R500_FC_B_POP_CNT(1)
589 			;
590 		s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
591 		s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
592 			| R500_FC_A_OP_NONE /* no address stack */
593 			| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
594 			| R500_FC_B_OP0_INCR /* increment branch counter if stay */
595 			| R500_FC_IGNORE_UNCOVERED
596 		;
597 
598 		if (branch->Else >= 0) {
599 			/* increment branch counter also if jump */
600 			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
601 			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
602 
603 			s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
604 				| R500_FC_A_OP_NONE /* no address stack */
605 				| R500_FC_B_ELSE /* all active pixels want to jump */
606 				| R500_FC_B_OP0_NONE /* no counter op if stay */
607 				| R500_FC_B_OP1_DECR /* decrement branch counter if jump */
608 				| R500_FC_B_POP_CNT(1)
609 			;
610 			s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
611 		} else {
612 			/* don't touch branch counter on jump */
613 			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
614 			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
615 		}
616 
617 
618 		s->CurrentBranchDepth--;
619 		break;
620 	default:
621 		rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
622 	}
623 }
624 
r500BuildFragmentProgramHwCode(struct radeon_compiler * c,void * user)625 void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
626 {
627 	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
628 	struct emit_state s;
629 	struct r500_fragment_program_code *code = &compiler->code->code.r500;
630 
631 	memset(&s, 0, sizeof(s));
632 	s.C = &compiler->Base;
633 	s.Code = code;
634 
635 	memset(code, 0, sizeof(*code));
636 	code->max_temp_idx = 1;
637 	code->inst_end = -1;
638 
639 	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
640 	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
641 	    inst = inst->Next) {
642 		if (inst->Type == RC_INSTRUCTION_NORMAL) {
643 			const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
644 
645 			if (opcode->IsFlowControl) {
646 				emit_flowcontrol(&s, inst);
647 			} else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
648 				continue;
649 			} else {
650 				emit_tex(compiler, &inst->U.I);
651 			}
652 		} else {
653 			emit_paired(compiler, &inst->U.P);
654 		}
655 	}
656 
657 	if (code->max_temp_idx >= compiler->Base.max_temp_regs)
658 		rc_error(&compiler->Base, "Too many hardware temporaries used");
659 
660 	if (compiler->Base.Error)
661 		return;
662 
663 	if (code->inst_end == -1 ||
664 	    (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
665 		int ip;
666 
667 		/* This may happen when dead-code elimination is disabled or
668 		 * when most of the fragment program logic is leading to a KIL */
669 		if (code->inst_end >= compiler->Base.max_alu_insts-1) {
670 			rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
671 			return;
672 		}
673 
674 		ip = ++code->inst_end;
675 		code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
676 	}
677 
678 	/* Make sure TEX_SEM_WAIT is set on the last instruction */
679 	code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
680 
681 	/* Enable full flow control mode if we are using loops or have if
682 	 * statements nested at least four deep. */
683 	if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
684 		if (code->max_temp_idx < 1)
685 			code->max_temp_idx = 1;
686 
687 		code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
688 	}
689 }
690