1 /*
2  * Copyright (C) 2005 Ben Skeggs.
3  *
4  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
5  * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6  *
7  * All Rights Reserved.
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining
10  * a copy of this software and associated documentation files (the
11  * "Software"), to deal in the Software without restriction, including
12  * without limitation the rights to use, copy, modify, merge, publish,
13  * distribute, sublicense, and/or sell copies of the Software, and to
14  * permit persons to whom the Software is furnished to do so, subject to
15  * the following conditions:
16  *
17  * The above copyright notice and this permission notice (including the
18  * next paragraph) shall be included in all copies or substantial
19  * portions of the Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28  *
29  */
30 
31 /**
32  * \file
33  *
34  * \author Ben Skeggs <darktama@iinet.net.au>
35  *
36  * \author Jerome Glisse <j.glisse@gmail.com>
37  *
38  * \author Corbin Simpson <MostAwesomeDude@gmail.com>
39  *
40  */
41 
42 #include "r500_fragprog.h"
43 
44 #include "r300_reg.h"
45 
46 #include "radeon_program_pair.h"
47 
48 #define PROG_CODE \
49 	struct r500_fragment_program_code *code = &c->code->code.r500
50 
51 #define error(fmt, args...) do {			\
52 		rc_error(&c->Base, "%s::%s(): " fmt "\n",	\
53 			__FILE__, __FUNCTION__, ##args);	\
54 	} while(0)
55 
56 
57 struct branch_info {
58 	int If;
59 	int Else;
60 	int Endif;
61 };
62 
63 struct r500_loop_info {
64 	int BgnLoop;
65 
66 	int BranchDepth;
67 	int * Brks;
68 	int BrkCount;
69 	int BrkReserved;
70 
71 	int * Conts;
72 	int ContCount;
73 	int ContReserved;
74 };
75 
76 struct emit_state {
77 	struct radeon_compiler * C;
78 	struct r500_fragment_program_code * Code;
79 
80 	struct branch_info * Branches;
81 	unsigned int CurrentBranchDepth;
82 	unsigned int BranchesReserved;
83 
84 	struct r500_loop_info * Loops;
85 	unsigned int CurrentLoopDepth;
86 	unsigned int LoopsReserved;
87 
88 	unsigned int MaxBranchDepth;
89 
90 };
91 
translate_rgb_op(struct r300_fragment_program_compiler * c,rc_opcode opcode)92 static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
93 {
94 	switch(opcode) {
95 	case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
96 	case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
97 	case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
98 	case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
99 	case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
100 	case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
101 	case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
102 	default:
103 		error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
104 		/* fall through */
105 	case RC_OPCODE_NOP:
106 		/* fall through */
107 	case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
108 	case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
109 	case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
110 	case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
111 	}
112 }
113 
translate_alpha_op(struct r300_fragment_program_compiler * c,rc_opcode opcode)114 static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
115 {
116 	switch(opcode) {
117 	case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
118 	case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
119 	case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
120 	case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
121 	case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
122 	case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
123 	case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
124 	case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
125 	case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
126 	case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
127 	default:
128 		error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
129 		/* fall through */
130 	case RC_OPCODE_NOP:
131 		/* fall through */
132 	case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
133 	case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
134 	case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
135 	case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
136 	case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
137 	case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
138 	}
139 }
140 
fix_hw_swizzle(unsigned int swz)141 static unsigned int fix_hw_swizzle(unsigned int swz)
142 {
143     switch (swz) {
144         case RC_SWIZZLE_ZERO:
145         case RC_SWIZZLE_UNUSED:
146             swz = 4;
147             break;
148         case RC_SWIZZLE_HALF:
149             swz = 5;
150             break;
151         case RC_SWIZZLE_ONE:
152             swz = 6;
153             break;
154     }
155 
156 	return swz;
157 }
158 
translate_arg_rgb(struct rc_pair_instruction * inst,int arg)159 static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
160 {
161 	unsigned int t = inst->RGB.Arg[arg].Source;
162 	int comp;
163 	t |= inst->RGB.Arg[arg].Negate << 11;
164 	t |= inst->RGB.Arg[arg].Abs << 12;
165 
166 	for(comp = 0; comp < 3; ++comp)
167 		t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
168 
169 	return t;
170 }
171 
translate_arg_alpha(struct rc_pair_instruction * inst,int i)172 static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
173 {
174 	unsigned int t = inst->Alpha.Arg[i].Source;
175 	t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
176 	t |= inst->Alpha.Arg[i].Negate << 5;
177 	t |= inst->Alpha.Arg[i].Abs << 6;
178 	return t;
179 }
180 
translate_alu_result_op(struct r300_fragment_program_compiler * c,rc_compare_func func)181 static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
182 {
183 	switch(func) {
184 	case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
185 	case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
186 	case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
187 	case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
188 	default:
189 		rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
190 		return 0;
191 	}
192 }
193 
use_temporary(struct r500_fragment_program_code * code,unsigned int index)194 static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
195 {
196 	if (index > code->max_temp_idx)
197 		code->max_temp_idx = index;
198 }
199 
use_source(struct r500_fragment_program_code * code,struct rc_pair_instruction_source src)200 static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
201 {
202 	/* From docs:
203 	 *   Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
204 	 * MSB = 1 << 7 */
205 	if (!src.Used)
206 		return 1 << 7;
207 
208 	if (src.File == RC_FILE_CONSTANT) {
209 		return src.Index | R500_RGB_ADDR0_CONST;
210 	} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
211 		use_temporary(code, src.Index);
212 		return src.Index;
213 	} else if (src.File == RC_FILE_INLINE) {
214 		return src.Index | (1 << 7);
215 	}
216 
217 	return 0;
218 }
219 
220 /**
221  * NOP the specified instruction if it is not a texture lookup.
222  */
alu_nop(struct r300_fragment_program_compiler * c,int ip)223 static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
224 {
225 	PROG_CODE;
226 
227 	if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
228 		code->inst[ip].inst0 |= R500_INST_NOP;
229 	}
230 }
231 
232 /**
233  * Emit a paired ALU instruction.
234  */
emit_paired(struct r300_fragment_program_compiler * c,struct rc_pair_instruction * inst)235 static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
236 {
237 	int ip;
238 	PROG_CODE;
239 
240 	if (code->inst_end >= c->Base.max_alu_insts-1) {
241 		error("emit_alu: Too many instructions");
242 		return;
243 	}
244 
245 	ip = ++code->inst_end;
246 
247 	/* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
248 	if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
249 		inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
250 		if (ip > 0) {
251 			alu_nop(c, ip - 1);
252 		}
253 	}
254 
255 	code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
256 	code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
257 
258 	if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
259 		code->inst[ip].inst0 = R500_INST_TYPE_OUT;
260 		if (inst->WriteALUResult) {
261 			error("Cannot write output and ALU result at the same time");
262 			return;
263 		}
264 	} else {
265 		code->inst[ip].inst0 = R500_INST_TYPE_ALU;
266 	}
267 	code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
268 
269 	code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
270 	code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
271 	code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
272 	if (inst->Nop) {
273 		code->inst[ip].inst0 |= R500_INST_NOP;
274 	}
275 	if (inst->Alpha.DepthWriteMask) {
276 		code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
277 		c->code->writes_depth = 1;
278 	}
279 
280 	code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
281 	code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
282 	use_temporary(code, inst->Alpha.DestIndex);
283 	use_temporary(code, inst->RGB.DestIndex);
284 
285 	if (inst->RGB.Saturate)
286 		code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
287 	if (inst->Alpha.Saturate)
288 		code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
289 
290 	/* Set the presubtract operation. */
291 	switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
292 		case RC_PRESUB_BIAS:
293 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
294 			break;
295 		case RC_PRESUB_SUB:
296 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
297 			break;
298 		case RC_PRESUB_ADD:
299 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
300 			break;
301 		case RC_PRESUB_INV:
302 			code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
303 			break;
304 		default:
305 			break;
306 	}
307 	switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
308 		case RC_PRESUB_BIAS:
309 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
310 			break;
311 		case RC_PRESUB_SUB:
312 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
313 			break;
314 		case RC_PRESUB_ADD:
315 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
316 			break;
317 		case RC_PRESUB_INV:
318 			code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
319 			break;
320 		default:
321 			break;
322 	}
323 
324 	/* Set the output modifier */
325 	code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;
326 	code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;
327 
328 	code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
329 	code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
330 	code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
331 
332 	code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
333 	code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
334 	code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
335 
336 	code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
337 	code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
338 	code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
339 
340 	code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
341 	code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
342 	code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
343 
344 	code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
345 	code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
346 
347 	if (inst->WriteALUResult) {
348 		code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
349 
350 		if (inst->WriteALUResult == RC_ALURESULT_X)
351 			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
352 		else
353 			code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
354 
355 		code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
356 	}
357 }
358 
translate_strq_swizzle(unsigned int swizzle)359 static unsigned int translate_strq_swizzle(unsigned int swizzle)
360 {
361 	unsigned int swiz = 0;
362 	int i;
363 	for (i = 0; i < 4; i++)
364 		swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
365 	return swiz;
366 }
367 
368 /**
369  * Emit a single TEX instruction
370  */
emit_tex(struct r300_fragment_program_compiler * c,struct rc_sub_instruction * inst)371 static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
372 {
373 	int ip;
374 	PROG_CODE;
375 
376 	if (code->inst_end >= c->Base.max_alu_insts-1) {
377 		error("emit_tex: Too many instructions");
378 		return 0;
379 	}
380 
381 	ip = ++code->inst_end;
382 
383 	code->inst[ip].inst0 = R500_INST_TYPE_TEX
384 		| (inst->DstReg.WriteMask << 11)
385 		| (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
386 	code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
387 		| (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
388 
389 	if (inst->TexSrcTarget == RC_TEXTURE_RECT)
390 		code->inst[ip].inst1 |= R500_TEX_UNSCALED;
391 
392 	switch (inst->Opcode) {
393 	case RC_OPCODE_KIL:
394 		code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
395 		break;
396 	case RC_OPCODE_TEX:
397 		code->inst[ip].inst1 |= R500_TEX_INST_LD;
398 		break;
399 	case RC_OPCODE_TXB:
400 		code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
401 		break;
402 	case RC_OPCODE_TXP:
403 		code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
404 		break;
405 	case RC_OPCODE_TXD:
406 		code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
407 		break;
408 	case RC_OPCODE_TXL:
409 		code->inst[ip].inst1 |= R500_TEX_INST_LOD;
410 		break;
411 	default:
412 		error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
413 	}
414 
415 	use_temporary(code, inst->SrcReg[0].Index);
416 	if (inst->Opcode != RC_OPCODE_KIL)
417 		use_temporary(code, inst->DstReg.Index);
418 
419 	code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
420 		| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
421 		| R500_TEX_DST_ADDR(inst->DstReg.Index)
422 		| (GET_SWZ(inst->TexSwizzle, 0) << 24)
423 		| (GET_SWZ(inst->TexSwizzle, 1) << 26)
424 		| (GET_SWZ(inst->TexSwizzle, 2) << 28)
425 		| (GET_SWZ(inst->TexSwizzle, 3) << 30)
426 		;
427 
428 	if (inst->Opcode == RC_OPCODE_TXD) {
429 		use_temporary(code, inst->SrcReg[1].Index);
430 		use_temporary(code, inst->SrcReg[2].Index);
431 
432 		/* DX and DY parameters are specified in a separate register. */
433 		code->inst[ip].inst3 =
434 			R500_DX_ADDR(inst->SrcReg[1].Index) |
435 			(translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
436 			R500_DY_ADDR(inst->SrcReg[2].Index) |
437 			(translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
438 	}
439 
440 	return 1;
441 }
442 
emit_flowcontrol(struct emit_state * s,struct rc_instruction * inst)443 static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
444 {
445 	unsigned int newip;
446 
447 	if (s->Code->inst_end >= s->C->max_alu_insts-1) {
448 		rc_error(s->C, "emit_tex: Too many instructions");
449 		return;
450 	}
451 
452 	newip = ++s->Code->inst_end;
453 
454 	/* Currently all loops use the same integer constant to intialize
455 	 * the loop variables. */
456 	if(!s->Code->int_constants[0]) {
457 		s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
458 		s->Code->int_constant_count = 1;
459 	}
460 	s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
461 
462 	switch(inst->U.I.Opcode){
463 	struct branch_info * branch;
464 	struct r500_loop_info * loop;
465 	case RC_OPCODE_BGNLOOP:
466 		memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
467 			s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
468 
469 		loop = &s->Loops[s->CurrentLoopDepth++];
470 		memset(loop, 0, sizeof(struct r500_loop_info));
471 		loop->BranchDepth = s->CurrentBranchDepth;
472 		loop->BgnLoop = newip;
473 
474 		s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
475 			| R500_FC_JUMP_FUNC(0x00)
476 			| R500_FC_IGNORE_UNCOVERED
477 			;
478 		break;
479 	case RC_OPCODE_BRK:
480 		loop = &s->Loops[s->CurrentLoopDepth - 1];
481 		memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
482 					loop->BrkCount, loop->BrkReserved, 1);
483 
484 		loop->Brks[loop->BrkCount++] = newip;
485 		s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
486 			| R500_FC_JUMP_FUNC(0xff)
487 			| R500_FC_B_OP1_DECR
488 			| R500_FC_B_POP_CNT(
489 				s->CurrentBranchDepth - loop->BranchDepth)
490 			| R500_FC_IGNORE_UNCOVERED
491 			;
492 		break;
493 
494 	case RC_OPCODE_CONT:
495 		loop = &s->Loops[s->CurrentLoopDepth - 1];
496 		memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
497 					loop->ContCount, loop->ContReserved, 1);
498 		loop->Conts[loop->ContCount++] = newip;
499 		s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
500 			| R500_FC_JUMP_FUNC(0xff)
501 			| R500_FC_B_OP1_DECR
502 			| R500_FC_B_POP_CNT(
503 				s->CurrentBranchDepth -	loop->BranchDepth)
504 			| R500_FC_IGNORE_UNCOVERED
505 			;
506 		break;
507 
508 	case RC_OPCODE_ENDLOOP:
509 	{
510 		loop = &s->Loops[s->CurrentLoopDepth - 1];
511 		/* Emit ENDLOOP */
512 		s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
513 			| R500_FC_JUMP_FUNC(0xff)
514 			| R500_FC_JUMP_ANY
515 			| R500_FC_IGNORE_UNCOVERED
516 			;
517 		/* The constant integer at index 0 is used by all loops. */
518 		s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
519 			| R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
520 			;
521 
522 		/* Set jump address and int constant for BGNLOOP */
523 		s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
524 			| R500_FC_JUMP_ADDR(newip)
525 			;
526 
527 		/* Set jump address for the BRK instructions. */
528 		while(loop->BrkCount--) {
529 			s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
530 						R500_FC_JUMP_ADDR(newip + 1);
531 		}
532 
533 		/* Set jump address for CONT instructions. */
534 		while(loop->ContCount--) {
535 			s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
536 						R500_FC_JUMP_ADDR(newip);
537 		}
538 		s->CurrentLoopDepth--;
539 		break;
540 	}
541 	case RC_OPCODE_IF:
542 		if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
543 			rc_error(s->C, "Branch depth exceeds hardware limit");
544 			return;
545 		}
546 		memory_pool_array_reserve(&s->C->Pool, struct branch_info,
547 				s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
548 
549 		branch = &s->Branches[s->CurrentBranchDepth++];
550 		branch->If = newip;
551 		branch->Else = -1;
552 		branch->Endif = -1;
553 
554 		if (s->CurrentBranchDepth > s->MaxBranchDepth)
555 			s->MaxBranchDepth = s->CurrentBranchDepth;
556 
557 		/* actual instruction is filled in at ENDIF time */
558 		break;
559 
560 	case RC_OPCODE_ELSE:
561 		if (!s->CurrentBranchDepth) {
562 			rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
563 			return;
564 		}
565 
566 		branch = &s->Branches[s->CurrentBranchDepth - 1];
567 		branch->Else = newip;
568 
569 		/* actual instruction is filled in at ENDIF time */
570 		break;
571 
572 	case RC_OPCODE_ENDIF:
573 		if (!s->CurrentBranchDepth) {
574 			rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
575 			return;
576 		}
577 
578 		branch = &s->Branches[s->CurrentBranchDepth - 1];
579 		branch->Endif = newip;
580 
581 		s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
582 			| R500_FC_A_OP_NONE /* no address stack */
583 			| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
584 			| R500_FC_B_OP0_DECR /* decrement branch counter if stay */
585 			| R500_FC_B_OP1_NONE /* no branch counter if stay */
586 			| R500_FC_B_POP_CNT(1)
587 			;
588 		s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
589 		s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
590 			| R500_FC_A_OP_NONE /* no address stack */
591 			| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
592 			| R500_FC_B_OP0_INCR /* increment branch counter if stay */
593 			| R500_FC_IGNORE_UNCOVERED
594 		;
595 
596 		if (branch->Else >= 0) {
597 			/* increment branch counter also if jump */
598 			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
599 			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
600 
601 			s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
602 				| R500_FC_A_OP_NONE /* no address stack */
603 				| R500_FC_B_ELSE /* all active pixels want to jump */
604 				| R500_FC_B_OP0_NONE /* no counter op if stay */
605 				| R500_FC_B_OP1_DECR /* decrement branch counter if jump */
606 				| R500_FC_B_POP_CNT(1)
607 			;
608 			s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
609 		} else {
610 			/* don't touch branch counter on jump */
611 			s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
612 			s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
613 		}
614 
615 
616 		s->CurrentBranchDepth--;
617 		break;
618 	default:
619 		rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
620 	}
621 }
622 
r500BuildFragmentProgramHwCode(struct radeon_compiler * c,void * user)623 void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
624 {
625 	struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
626 	struct emit_state s;
627 	struct r500_fragment_program_code *code = &compiler->code->code.r500;
628 
629 	memset(&s, 0, sizeof(s));
630 	s.C = &compiler->Base;
631 	s.Code = code;
632 
633 	memset(code, 0, sizeof(*code));
634 	code->max_temp_idx = 1;
635 	code->inst_end = -1;
636 
637 	for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
638 	    inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
639 	    inst = inst->Next) {
640 		if (inst->Type == RC_INSTRUCTION_NORMAL) {
641 			const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
642 
643 			if (opcode->IsFlowControl) {
644 				emit_flowcontrol(&s, inst);
645 			} else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
646 				continue;
647 			} else {
648 				emit_tex(compiler, &inst->U.I);
649 			}
650 		} else {
651 			emit_paired(compiler, &inst->U.P);
652 		}
653 	}
654 
655 	if (code->max_temp_idx >= compiler->Base.max_temp_regs)
656 		rc_error(&compiler->Base, "Too many hardware temporaries used");
657 
658 	if (compiler->Base.Error)
659 		return;
660 
661 	if (code->inst_end == -1 ||
662 	    (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
663 		int ip;
664 
665 		/* This may happen when dead-code elimination is disabled or
666 		 * when most of the fragment program logic is leading to a KIL */
667 		if (code->inst_end >= compiler->Base.max_alu_insts-1) {
668 			rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
669 			return;
670 		}
671 
672 		ip = ++code->inst_end;
673 		code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
674 	}
675 
676 	/* Make sure TEX_SEM_WAIT is set on the last instruction */
677 	code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
678 
679 	/* Enable full flow control mode if we are using loops or have if
680 	 * statements nested at least four deep. */
681 	if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
682 		if (code->max_temp_idx < 1)
683 			code->max_temp_idx = 1;
684 
685 		code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
686 	}
687 }
688